package net.sf.okapi.lib.segmentation;

import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.util.ULocale;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeMap;
import java.util.regex.Pattern;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/sf/okapi/lib/segmentation/SRXSegmenter.class */
public class SRXSegmenter implements ISegmenter {
    private static final String ISOLATED_CODE_REPLACEMENT_TEXT = " ";
    private static final int CODE_MARKER_LENGTH = 2;
    private boolean segmentSubFlows;
    private boolean cascade;
    private boolean includeStartCodes;
    private boolean includeEndCodes;
    private boolean includeIsolatedCodes;
    private LocaleId currentLanguageCode;
    private boolean oneSegmentIncludesAll;
    private boolean trimLeadingWS;
    private boolean trimTrailingWS;
    private boolean useJavaRegex;
    private boolean trimCodes;
    private boolean treatIsolatedCodesAsWhitespace;
    private ArrayList<CompiledRule> rules;
    private Pattern maskRule;
    private TreeMap<Integer, Boolean> splits;
    private List<Integer> finalSplits;
    private ArrayList<Integer> starts;
    private ArrayList<Integer> ends;
    private BreakIterator icu4jBreakIterator;
    private final Logger LOGGER = LoggerFactory.getLogger(getClass());
    private boolean useIcu4JBreakRules = false;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: net.sf.okapi.lib.segmentation.SRXSegmenter$1, reason: invalid class name */
    /* loaded from: input_file:net/sf/okapi/lib/segmentation/SRXSegmenter$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker = new int[TextFragment.Marker.values().length];

        static {
            try {
                $SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker[TextFragment.Marker.OPENING.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker[TextFragment.Marker.CLOSING.ordinal()] = SRXSegmenter.CODE_MARKER_LENGTH;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker[TextFragment.Marker.ISOLATED.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
        }
    }

    public SRXSegmenter() {
        reset();
    }

    public void reset() {
        this.currentLanguageCode = null;
        this.rules = new ArrayList<>();
        this.maskRule = null;
        this.splits = null;
        this.segmentSubFlows = true;
        this.cascade = false;
        this.includeStartCodes = false;
        this.includeEndCodes = true;
        this.includeIsolatedCodes = false;
        this.oneSegmentIncludesAll = false;
        this.trimLeadingWS = false;
        this.trimTrailingWS = false;
        this.useJavaRegex = true;
        this.trimCodes = false;
        this.treatIsolatedCodesAsWhitespace = false;
        this.useIcu4JBreakRules = false;
    }

    public void setOptions(boolean z, boolean z2, boolean z3, boolean z4, boolean z5, boolean z6, boolean z7, boolean z8, boolean z9, boolean z10) {
        this.segmentSubFlows = z;
        this.includeStartCodes = z2;
        this.includeEndCodes = z3;
        this.includeIsolatedCodes = z4;
        this.oneSegmentIncludesAll = z5;
        this.trimLeadingWS = z6;
        this.trimTrailingWS = z7;
        this.useJavaRegex = z8;
        this.useIcu4JBreakRules = z9;
        this.treatIsolatedCodesAsWhitespace = z10;
        if (z8) {
            return;
        }
        this.LOGGER.error("Use of ICU regex has been removed.");
    }

    public void setOptions(boolean z, boolean z2, boolean z3, boolean z4, boolean z5, boolean z6, boolean z7) {
        this.segmentSubFlows = z;
        this.includeStartCodes = z2;
        this.includeEndCodes = z3;
        this.includeIsolatedCodes = z4;
        this.oneSegmentIncludesAll = z5;
        this.trimLeadingWS = z6;
        this.trimTrailingWS = z7;
    }

    public boolean oneSegmentIncludesAll() {
        return this.oneSegmentIncludesAll;
    }

    public boolean segmentSubFlows() {
        return this.segmentSubFlows;
    }

    public boolean cascade() {
        return this.cascade;
    }

    public boolean trimLeadingWhitespaces() {
        return this.trimLeadingWS;
    }

    public boolean trimTrailingWhitespaces() {
        return this.trimTrailingWS;
    }

    public boolean useJavaRegex() {
        return this.useJavaRegex;
    }

    public boolean treatIsolatedCodesAsWhitespace() {
        return this.treatIsolatedCodesAsWhitespace;
    }

    public void setUseJavaRegex(boolean z) {
        this.useJavaRegex = z;
        if (z) {
            return;
        }
        this.LOGGER.warn("Use of ICU regex is deprecated and may be removed in the future.");
    }

    public boolean includeStartCodes() {
        return this.includeStartCodes;
    }

    public boolean includeEndCodes() {
        return this.includeEndCodes;
    }

    public boolean includeIsolatedCodes() {
        return this.includeIsolatedCodes;
    }

    public int computeSegments(String str) {
        return computeSegments(new TextContainer(str));
    }

    /* JADX WARN: Code restructure failed: missing block: B:103:0x031b, code lost:
    
        if (r0.contains(r0) != false) goto L96;
     */
    /* JADX WARN: Code restructure failed: missing block: B:104:0x031e, code lost:
    
        r22 = r22 + net.sf.okapi.lib.segmentation.SRXSegmenter.CODE_MARKER_LENGTH;
     */
    /* JADX WARN: Code restructure failed: missing block: B:105:0x032a, code lost:
    
        if (r22 >= (r0.length() - 1)) goto L193;
     */
    /* JADX WARN: Code restructure failed: missing block: B:107:0x033c, code lost:
    
        if (r0.contains(net.sf.okapi.common.resource.TextFragment.Marker.asEnum(r0.charAt(r22))) != false) goto L195;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public int computeSegments(net.sf.okapi.common.resource.TextContainer r9) {
        /*
            Method dump skipped, instructions count: 1370
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.sf.okapi.lib.segmentation.SRXSegmenter.computeSegments(net.sf.okapi.common.resource.TextContainer):int");
    }

    private Collection<CompiledRule> getIcu4jBreakRules(String str) {
        LinkedList linkedList = new LinkedList();
        this.icu4jBreakIterator.setText(str);
        SRXDocument sRXDocument = new SRXDocument();
        int next = this.icu4jBreakIterator.next();
        while (true) {
            int i = next;
            if (i == -1) {
                return linkedList;
            }
            if (i != str.length()) {
                while (i > 0 && Character.isWhitespace(str.codePointAt(i - 1))) {
                    i--;
                }
                linkedList.add(new CompiledRule(sRXDocument.generateRuleRegex(new Rule(String.format("^(.|\\s){%d}", Integer.valueOf(i)), "", true)).replace(SRXDocument.ANYCODE, SRXDocument.INLINECODE_PATTERN), true));
            }
            next = this.icu4jBreakIterator.next();
        }
    }

    private int calculatePosition(int i, int i2, int i3, boolean z) {
        int i4 = i2 * CODE_MARKER_LENGTH;
        int length = i3 * (treatIsolatedCodesAsWhitespace() ? ISOLATED_CODE_REPLACEMENT_TEXT.length() : CODE_MARKER_LENGTH);
        return z ? i + i4 + length : (i - i4) - length;
    }

    private int calculateIncreasedPosition(int i, int i2, int i3) {
        return calculatePosition(i, i2, i3, true);
    }

    private int calculateDecreasedPosition(int i, int i2, int i3) {
        return calculatePosition(i, i2, i3, false);
    }

    int recalcPos(String str, int i, List<Integer> list, List<Integer> list2) {
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < list.size() && list.get(i4).intValue() < i; i4++) {
            switch (AnonymousClass1.$SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker[TextFragment.Marker.asEnum(str.charAt(list2.get(i4).intValue())).ordinal()]) {
                case 1:
                case CODE_MARKER_LENGTH /* 2 */:
                    i2++;
                    break;
                case 3:
                    i3++;
                    break;
            }
        }
        return calculateIncreasedPosition(i, i2, i3);
    }

    int recalcPosBack(String str, int i, List<Integer> list) {
        int i2 = 0;
        int i3 = 0;
        for (Integer num : list) {
            if (num.intValue() >= i) {
                return calculateDecreasedPosition(i, i2, i3);
            }
            switch (AnonymousClass1.$SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker[TextFragment.Marker.asEnum(str.charAt(num.intValue())).ordinal()]) {
                case 1:
                case CODE_MARKER_LENGTH /* 2 */:
                    i2++;
                    break;
                case 3:
                    i3++;
                    break;
            }
        }
        return calculateDecreasedPosition(i, i2, i3);
    }

    List<Integer> storeCodePositions(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        while (i3 < str.length()) {
            switch (AnonymousClass1.$SwitchMap$net$sf$okapi$common$resource$TextFragment$Marker[TextFragment.Marker.asEnum(str.charAt(i3)).ordinal()]) {
                case 1:
                case CODE_MARKER_LENGTH /* 2 */:
                    arrayList.add(Integer.valueOf(calculateDecreasedPosition(i3, i, i2)));
                    i++;
                    i3++;
                    break;
                case 3:
                    arrayList.add(Integer.valueOf(calculateDecreasedPosition(i3, i, i2)));
                    i2++;
                    i3++;
                    break;
            }
            i3++;
        }
        return arrayList;
    }

    List<Integer> storeOriginalCodePositions(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < str.length()) {
            switch (str.charAt(i)) {
                case 57601:
                case 57602:
                case 57603:
                    int i2 = i;
                    i++;
                    arrayList.add(Integer.valueOf(i2));
                    break;
            }
            i++;
        }
        return arrayList;
    }

    public Range getNextSegmentRange(TextContainer textContainer) {
        return null;
    }

    public List<Integer> getSplitPositions() {
        if (this.finalSplits == null) {
            this.finalSplits = new ArrayList();
        }
        return Collections.unmodifiableList(this.finalSplits);
    }

    public List<Range> getRanges() {
        ArrayList arrayList = new ArrayList();
        if (this.starts == null) {
            return null;
        }
        for (int i = 0; i < this.starts.size(); i++) {
            arrayList.add(new Range(this.starts.get(i).intValue(), this.ends.get(i).intValue()));
        }
        return Collections.unmodifiableList(arrayList);
    }

    public LocaleId getLanguage() {
        return this.currentLanguageCode;
    }

    public void setLanguage(LocaleId localeId) {
        if (localeId != null) {
            this.icu4jBreakIterator = RuleBasedBreakIterator.getSentenceInstance(ULocale.createCanonical(localeId.toBCP47()));
        }
        this.currentLanguageCode = localeId;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setCascade(boolean z) {
        this.cascade = z;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addRule(CompiledRule compiledRule) {
        this.rules.add(compiledRule);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setMaskRule(String str) {
        if (str == null || str.length() <= 0) {
            this.maskRule = null;
        } else {
            this.maskRule = Pattern.compile(str, 256);
        }
    }

    public void setSegmentSubFlows(boolean z) {
        this.segmentSubFlows = z;
    }

    public void setIncludeStartCodes(boolean z) {
        this.includeStartCodes = z;
    }

    public void setIncludeEndCodes(boolean z) {
        this.includeEndCodes = z;
    }

    public void setIncludeIsolatedCodes(boolean z) {
        this.includeIsolatedCodes = z;
    }

    public void setOneSegmentIncludesAll(boolean z) {
        this.oneSegmentIncludesAll = z;
    }

    public void setTrimLeadingWS(boolean z) {
        this.trimLeadingWS = z;
    }

    public void setTrimTrailingWS(boolean z) {
        this.trimTrailingWS = z;
    }

    public void setTrimCodes(boolean z) {
        this.trimCodes = z;
    }

    public void setTreatIsolatedCodesAsWhitespace(boolean z) {
        this.treatIsolatedCodesAsWhitespace = z;
    }
}
