package org.apache.lucene.analysis.miscellaneous;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collection;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
import org.elasticsearch.index.query.NotFilterParser;
import org.elasticsearch.threadpool.ThreadPool;

@Deprecated
/* loaded from: input_file:lib/lucene-analyzers-common-4.10.4.jar:org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.class */
public final class PatternAnalyzer extends Analyzer {
    public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
    public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
    private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, (Collection<?>) Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", WikipediaTokenizer.ITALICS, "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", NotFilterParser.NAME, "nothing", TimestampFieldMapper.Defaults.DEFAULT_TIMESTAMP, "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", ThreadPool.Names.SAME, "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "throughout", "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
    public static final PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    public static final PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
    private final Pattern pattern;
    private final boolean toLowerCase;
    private final CharArraySet stopWords;
    private final Version matchVersion;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:lib/lucene-analyzers-common-4.10.4.jar:org/apache/lucene/analysis/miscellaneous/PatternAnalyzer$FastStringReader.class */
    public static final class FastStringReader extends StringReader {
        private final String s;

        FastStringReader(String str) {
            super(str);
            this.s = str;
        }

        String getString() {
            return this.s;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:lib/lucene-analyzers-common-4.10.4.jar:org/apache/lucene/analysis/miscellaneous/PatternAnalyzer$FastStringTokenizer.class */
    public static final class FastStringTokenizer extends Tokenizer {
        private String str;
        private int pos;
        private final boolean isLetter;
        private final boolean toLowerCase;
        private final CharArraySet stopWords;
        private static final Locale locale = Locale.getDefault();
        private final CharTermAttribute termAtt;
        private final OffsetAttribute offsetAtt;

        public FastStringTokenizer(Reader reader, boolean z, boolean z2, CharArraySet charArraySet) {
            super(reader);
            this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
            this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
            this.isLetter = z;
            this.toLowerCase = z2;
            this.stopWords = charArraySet;
        }

        @Override // org.apache.lucene.analysis.TokenStream
        public boolean incrementToken() {
            String str;
            if (this.str == null) {
                throw new IllegalStateException("Consumer did not call reset().");
            }
            clearAttributes();
            String str2 = this.str;
            int length = str2.length();
            int i = this.pos;
            boolean z = this.isLetter;
            int i2 = 0;
            do {
                str = null;
                while (i < length && !isTokenChar(str2.charAt(i), z)) {
                    i++;
                }
                if (i < length) {
                    i2 = i;
                    while (i < length && isTokenChar(str2.charAt(i), z)) {
                        i++;
                    }
                    str = str2.substring(i2, i);
                    if (this.toLowerCase) {
                        str = str.toLowerCase(locale);
                    }
                }
                if (str == null) {
                    break;
                }
            } while (isStopWord(str));
            this.pos = i;
            if (str == null) {
                return false;
            }
            this.termAtt.setEmpty().append(str);
            this.offsetAtt.setOffset(correctOffset(i2), correctOffset(i));
            return true;
        }

        @Override // org.apache.lucene.analysis.TokenStream
        public final void end() throws IOException {
            super.end();
            int length = this.str.length();
            this.offsetAtt.setOffset(correctOffset(length), correctOffset(length));
        }

        private boolean isTokenChar(char c, boolean z) {
            return z ? Character.isLetter(c) : !Character.isWhitespace(c);
        }

        private boolean isStopWord(String str) {
            return this.stopWords != null && this.stopWords.contains((CharSequence) str);
        }

        @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            super.close();
            this.str = null;
        }

        @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
        public void reset() throws IOException {
            super.reset();
            this.str = PatternAnalyzer.toString(this.input);
            this.pos = 0;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:lib/lucene-analyzers-common-4.10.4.jar:org/apache/lucene/analysis/miscellaneous/PatternAnalyzer$PatternTokenizer.class */
    public static final class PatternTokenizer extends Tokenizer {
        private final Pattern pattern;
        private String str;
        private final boolean toLowerCase;
        private Matcher matcher;
        private int pos;
        private boolean initialized;
        private static final Locale locale = Locale.getDefault();
        private final CharTermAttribute termAtt;
        private final OffsetAttribute offsetAtt;

        public PatternTokenizer(Reader reader, Pattern pattern, boolean z) {
            super(reader);
            this.pos = 0;
            this.initialized = false;
            this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
            this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
            this.pattern = pattern;
            this.matcher = pattern.matcher("");
            this.toLowerCase = z;
        }

        @Override // org.apache.lucene.analysis.TokenStream
        public final boolean incrementToken() {
            boolean find;
            int length;
            if (!this.initialized) {
                throw new IllegalStateException("Consumer did not call reset().");
            }
            if (this.matcher == null) {
                return false;
            }
            clearAttributes();
            do {
                int i = this.pos;
                find = this.matcher.find();
                if (find) {
                    length = this.matcher.start();
                    this.pos = this.matcher.end();
                } else {
                    length = this.str.length();
                    this.matcher = null;
                }
                if (i != length) {
                    String substring = this.str.substring(i, length);
                    if (this.toLowerCase) {
                        substring = substring.toLowerCase(locale);
                    }
                    this.termAtt.setEmpty().append(substring);
                    this.offsetAtt.setOffset(correctOffset(i), correctOffset(length));
                    return true;
                }
            } while (find);
            return false;
        }

        @Override // org.apache.lucene.analysis.TokenStream
        public final void end() throws IOException {
            super.end();
            int correctOffset = correctOffset(this.str.length());
            this.offsetAtt.setOffset(correctOffset, correctOffset);
        }

        @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            super.close();
            this.initialized = false;
        }

        @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
        public void reset() throws IOException {
            super.reset();
            this.str = PatternAnalyzer.toString(this.input);
            this.matcher = this.pattern.matcher(this.str);
            this.pos = 0;
            this.initialized = true;
        }
    }

    public PatternAnalyzer(Version version, Pattern pattern, boolean z, CharArraySet charArraySet) {
        if (pattern == null) {
            throw new IllegalArgumentException("pattern must not be null");
        }
        if (eqPattern(NON_WORD_PATTERN, pattern)) {
            pattern = NON_WORD_PATTERN;
        } else if (eqPattern(WHITESPACE_PATTERN, pattern)) {
            pattern = WHITESPACE_PATTERN;
        }
        if (charArraySet != null && charArraySet.size() == 0) {
            charArraySet = null;
        }
        this.pattern = pattern;
        this.toLowerCase = z;
        this.stopWords = charArraySet;
        this.matchVersion = version;
    }

    public Analyzer.TokenStreamComponents createComponents(String str, Reader reader, String str2) {
        if (reader == null) {
            reader = new FastStringReader(str2);
        }
        if (this.pattern == NON_WORD_PATTERN) {
            return new Analyzer.TokenStreamComponents(new FastStringTokenizer(reader, true, this.toLowerCase, this.stopWords));
        }
        if (this.pattern == WHITESPACE_PATTERN) {
            return new Analyzer.TokenStreamComponents(new FastStringTokenizer(reader, false, this.toLowerCase, this.stopWords));
        }
        PatternTokenizer patternTokenizer = new PatternTokenizer(reader, this.pattern, this.toLowerCase);
        return new Analyzer.TokenStreamComponents(patternTokenizer, this.stopWords != null ? new StopFilter(this.matchVersion, patternTokenizer, this.stopWords) : patternTokenizer);
    }

    @Override // org.apache.lucene.analysis.Analyzer
    public Analyzer.TokenStreamComponents createComponents(String str, Reader reader) {
        return createComponents(str, reader, null);
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (this == DEFAULT_ANALYZER && obj == EXTENDED_ANALYZER) {
            return false;
        }
        if ((obj == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER) || !(obj instanceof PatternAnalyzer)) {
            return false;
        }
        PatternAnalyzer patternAnalyzer = (PatternAnalyzer) obj;
        return this.toLowerCase == patternAnalyzer.toLowerCase && eqPattern(this.pattern, patternAnalyzer.pattern) && eq(this.stopWords, patternAnalyzer.stopWords);
    }

    public int hashCode() {
        if (this == DEFAULT_ANALYZER) {
            return -1218418418;
        }
        if (this == EXTENDED_ANALYZER) {
            return 1303507063;
        }
        return (31 * ((31 * ((31 * ((31 * 1) + this.pattern.pattern().hashCode())) + this.pattern.flags())) + (this.toLowerCase ? 1231 : 1237))) + (this.stopWords != null ? this.stopWords.hashCode() : 0);
    }

    private static boolean eq(Object obj, Object obj2) {
        return obj == obj2 || (obj != null && obj.equals(obj2));
    }

    private static boolean eqPattern(Pattern pattern, Pattern pattern2) {
        return pattern == pattern2 || (pattern.flags() == pattern2.flags() && pattern.pattern().equals(pattern2.pattern()));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String toString(Reader reader) throws IOException {
        if (reader instanceof FastStringReader) {
            return ((FastStringReader) reader).getString();
        }
        try {
            char[] cArr = new char[256];
            char[] cArr2 = new char[256];
            int i = 0;
            while (true) {
                int read = reader.read(cArr);
                if (read < 0) {
                    String str = new String(cArr2, 0, i);
                    reader.close();
                    return str;
                }
                if (i + read > cArr2.length) {
                    char[] cArr3 = new char[Math.max(cArr2.length << 1, i + read)];
                    System.arraycopy(cArr2, 0, cArr3, 0, i);
                    System.arraycopy(cArr, 0, cArr3, i, read);
                    cArr = cArr2;
                    cArr2 = cArr3;
                } else {
                    System.arraycopy(cArr, 0, cArr2, i, read);
                }
                i += read;
            }
        } catch (Throwable th) {
            reader.close();
            throw th;
        }
    }
}
