package edu.emory.clir.clearnlp.tokenization;

import edu.emory.clir.clearnlp.dictionary.english.DTAbbreviation;
import edu.emory.clir.clearnlp.dictionary.english.DTHyphen;
import edu.emory.clir.clearnlp.dictionary.universal.DTCompound;
import edu.emory.clir.clearnlp.tokenization.english.ApostropheEnglishTokenizer;
import edu.emory.clir.clearnlp.util.StringUtils;
import edu.emory.clir.clearnlp.util.lang.TLanguage;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:edu/emory/clir/clearnlp/tokenization/EnglishTokenizer.class */
public class EnglishTokenizer extends AbstractTokenizer {
    private final String[] L_BRACKETS = {"\"", "(", "{", "["};
    private final String[] R_BRACKETS = {"\"", ")", "}", "]"};
    private ApostropheEnglishTokenizer d_apostrophe = new ApostropheEnglishTokenizer();
    private DTAbbreviation d_abbreviation = new DTAbbreviation();
    private DTCompound d_compound = new DTCompound(TLanguage.ENGLISH);
    private DTHyphen d_hyphen = new DTHyphen();

    @Override // edu.emory.clir.clearnlp.tokenization.AbstractTokenizer
    protected int adjustFirstNonSymbolGap(char[] cArr, int i, String str) {
        return 0;
    }

    @Override // edu.emory.clir.clearnlp.tokenization.AbstractTokenizer
    protected int adjustLastSymbolSequenceGap(char[] cArr, int i, String str) {
        return (cArr[i] == '.' && this.d_abbreviation.isAbbreviationEndingWithPeriod(StringUtils.toLowerCase(str))) ? 1 : 0;
    }

    @Override // edu.emory.clir.clearnlp.tokenization.AbstractTokenizer
    protected boolean preserveSymbolInBetween(char[] cArr, int i) {
        return this.d_hyphen.preserveHyphen(cArr, i);
    }

    @Override // edu.emory.clir.clearnlp.tokenization.AbstractTokenizer
    protected boolean tokenizeWordsMore(List<String> list, String str, String str2, char[] cArr) {
        return tokenize(list, str, str2, cArr, this.d_apostrophe) || tokenize(list, str, str2, cArr, this.d_compound);
    }

    @Override // edu.emory.clir.clearnlp.tokenization.AbstractTokenizer
    public List<List<String>> segmentize(InputStream inputStream) {
        ArrayList arrayList = new ArrayList();
        int[] iArr = new int[this.R_BRACKETS.length];
        List<String> list = tokenize(inputStream);
        int size = list.size();
        boolean z = false;
        int i = 0;
        for (int i2 = 0; i2 < size; i2++) {
            String str = list.get(i2);
            countBrackets(str, iArr);
            if (z || isFinalMarksOnly(str)) {
                if (i2 + 1 >= size || !isFollowedByBracket(list.get(i2 + 1), iArr)) {
                    int i3 = i;
                    int i4 = i2 + 1;
                    i = i4;
                    arrayList.add(list.subList(i3, i4));
                    z = false;
                } else {
                    z = true;
                }
            }
        }
        if (i < size) {
            arrayList.add(list.subList(i, size));
        }
        return arrayList;
    }

    private void countBrackets(String str, int[] iArr) {
        if (str.equals("\"")) {
            iArr[0] = iArr[0] + (iArr[0] == 0 ? 1 : -1);
            return;
        }
        int length = iArr.length;
        for (int i = 1; i < length; i++) {
            if (str.equals(this.L_BRACKETS[i])) {
                int i2 = i;
                iArr[i2] = iArr[i2] + 1;
            } else if (str.equals(this.R_BRACKETS[i])) {
                int i3 = i;
                iArr[i3] = iArr[i3] - 1;
            }
        }
    }

    private boolean isFollowedByBracket(String str, int[] iArr) {
        int length = this.R_BRACKETS.length;
        for (int i = 0; i < length; i++) {
            if (iArr[i] > 0 && str.equals(this.R_BRACKETS[i])) {
                return true;
            }
        }
        return false;
    }
}
