package cc.factorie.app.nlp.segment;

import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Iterable;
import scala.collection.IterableLike;
import scala.collection.Iterator;
import scala.collection.Set;
import scala.collection.Set$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.StringBuilder;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.util.matching.Regex;

/* compiled from: PunktSentenceSegmenter.scala */
/* loaded from: input_file:cc/factorie/app/nlp/segment/PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer.class */
public class PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer extends PunktSentenceSegmenter$Punkt$PunktBase {
    private final Option<String> trainText;
    private final boolean verbose;
    private final Set<String> PUNCTUATION;

    public /* synthetic */ PunktSentenceSegmenter$Punkt$PunktLanguageVars cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$super$languageVars() {
        return super.languageVars();
    }

    public Option<String> trainText() {
        return this.trainText;
    }

    public boolean verbose() {
        return this.verbose;
    }

    public Set<String> PUNCTUATION() {
        return this.PUNCTUATION;
    }

    public PunktSentenceSegmenter$Punkt$PunktParameters train(String str, boolean z) {
        return new PunktSentenceSegmenter$Punkt$PunktTrainer(new Some(str), z, super.languageVars(), params()).params();
    }

    public boolean train$default$2() {
        return false;
    }

    public ArrayBuffer<String> sentencesFromText(String str, boolean z) {
        ArrayBuffer<String> arrayBuffer = (ArrayBuffer) slicesFromText(str).map(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$4(this, str), ArrayBuffer$.MODULE$.canBuildFrom());
        if (z) {
            arrayBuffer = realignBoundaries(arrayBuffer);
        }
        return arrayBuffer;
    }

    public boolean sentencesFromText$default$2() {
        return false;
    }

    public Iterable<PunktSentenceSegmenter$Punkt$PunktToken> annotateTokens(Iterable<PunktSentenceSegmenter$Punkt$PunktToken> iterable) {
        annotateFirstPass(iterable);
        annotateSecondPass(iterable);
        return iterable;
    }

    public ArrayBuffer<String> buildSentenceList(String str, ArrayBuffer<PunktSentenceSegmenter$Punkt$PunktToken> arrayBuffer) {
        ArrayBuffer<String> arrayBuffer2 = new ArrayBuffer<>();
        IntRef create = IntRef.create(0);
        Regex r = new StringOps(Predef$.MODULE$.augmentString("\\s*")).r();
        ObjectRef create2 = ObjectRef.create("");
        arrayBuffer.foreach(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$buildSentenceList$1(this, str, arrayBuffer2, create, r, create2));
        String str2 = (String) create2.elem;
        if (str2 != null ? !str2.equals("") : "" != 0) {
            arrayBuffer2.$plus$eq((String) create2.elem);
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        return arrayBuffer2;
    }

    public void annotateSecondPass(Iterable<PunktSentenceSegmenter$Punkt$PunktToken> iterable) {
        PunktSentenceSegmenter$Punkt$.MODULE$.iteratePairs(iterable).withFilter(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$annotateSecondPass$1(this)).foreach(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$annotateSecondPass$2(this));
    }

    public void secondPassAnnotation(PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken, PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken2) {
        if (punktSentenceSegmenter$Punkt$PunktToken.periodFinal()) {
            String typeNoPeriod = punktSentenceSegmenter$Punkt$PunktToken.typeNoPeriod();
            String typeNoSentPeriod = punktSentenceSegmenter$Punkt$PunktToken2.typeNoSentPeriod();
            boolean isInitial = punktSentenceSegmenter$Punkt$PunktToken.isInitial();
            if (params().collocations().contains(new Tuple2(typeNoPeriod, typeNoSentPeriod))) {
                punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak_$eq(false);
                punktSentenceSegmenter$Punkt$PunktToken.abbr_$eq(true);
                return;
            }
            if ((punktSentenceSegmenter$Punkt$PunktToken.abbr() || punktSentenceSegmenter$Punkt$PunktToken.ellipsis()) && !isInitial) {
                Option<Object> orthoHeuristic = orthoHeuristic(punktSentenceSegmenter$Punkt$PunktToken2);
                if (orthoHeuristic.isDefined() && BoxesRunTime.unboxToBoolean(orthoHeuristic.get())) {
                    punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak_$eq(true);
                    return;
                } else if (punktSentenceSegmenter$Punkt$PunktToken2.firstUpper() && params().sentenceStarters().contains(typeNoSentPeriod)) {
                    punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak_$eq(true);
                    return;
                }
            }
            if (!isInitial) {
                if (typeNoPeriod == null) {
                    if ("##number##" != 0) {
                        return;
                    }
                } else if (!typeNoPeriod.equals("##number##")) {
                    return;
                }
            }
            Option<Object> orthoHeuristic2 = orthoHeuristic(punktSentenceSegmenter$Punkt$PunktToken2);
            if (orthoHeuristic2.isDefined() && !BoxesRunTime.unboxToBoolean(orthoHeuristic2.get())) {
                punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak_$eq(false);
                punktSentenceSegmenter$Punkt$PunktToken.abbr_$eq(true);
            } else {
                if (orthoHeuristic2.isDefined() || !isInitial || !punktSentenceSegmenter$Punkt$PunktToken2.firstUpper() || PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(BoxesRunTime.unboxToInt(params().orthoContext().apply(typeNoSentPeriod)), PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_LC())) {
                    return;
                }
                punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak_$eq(false);
                punktSentenceSegmenter$Punkt$PunktToken.abbr_$eq(true);
            }
        }
    }

    public Option<Object> orthoHeuristic(PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken) {
        if (PUNCTUATION().contains(punktSentenceSegmenter$Punkt$PunktToken.token())) {
            return new Some(BoxesRunTime.boxToBoolean(false));
        }
        int unboxToInt = BoxesRunTime.unboxToInt(params().orthoContext().apply(punktSentenceSegmenter$Punkt$PunktToken.typeNoSentPeriod()));
        return (punktSentenceSegmenter$Punkt$PunktToken.firstUpper() && PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(unboxToInt, PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_LC()) && !PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(unboxToInt, PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_MID_UC())) ? new Some(BoxesRunTime.boxToBoolean(true)) : (!punktSentenceSegmenter$Punkt$PunktToken.firstLower() || (!PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(unboxToInt, PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_UC()) && PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(unboxToInt, PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_BEG_LC()))) ? None$.MODULE$ : new Some(BoxesRunTime.boxToBoolean(false));
    }

    public Option<PunktSentenceSegmenter$Punkt$PunktToken> textContainsSentenceBreak(String str) {
        return ((IterableLike) annotateTokens(tokenizeWords(str)).dropRight(1)).find(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$textContainsSentenceBreak$1(this));
    }

    public ArrayBuffer<Tuple3<Object, Object, TokenType>> slicesFromText(String str) {
        int i = 0;
        ArrayBuffer<Tuple3<Object, Object, TokenType>> arrayBuffer = new ArrayBuffer<>();
        Iterator matchData = super.languageVars().periodContextRegex().findAllIn(str).matchData();
        while (matchData.hasNext()) {
            Regex.Match match = (Regex.Match) matchData.next();
            Option<PunktSentenceSegmenter$Punkt$PunktToken> textContainsSentenceBreak = textContainsSentenceBreak(new StringBuilder().append(match.group(0)).append(match.group(1)).toString());
            if (textContainsSentenceBreak.isDefined()) {
                arrayBuffer.$plus$eq(new Tuple3(BoxesRunTime.boxToInteger(i), BoxesRunTime.boxToInteger(match.end()), ((PunktSentenceSegmenter$Punkt$PunktToken) textContainsSentenceBreak.get()).abbr() ? AS$.MODULE$ : S$.MODULE$));
                i = match.groupNames().length() > 2 ? match.start(2) : match.end();
            }
        }
        arrayBuffer.$plus$eq(new Tuple3(BoxesRunTime.boxToInteger(i), BoxesRunTime.boxToInteger(str.length()), S$.MODULE$));
        return arrayBuffer;
    }

    public ArrayBuffer<String> realignBoundaries(ArrayBuffer<String> arrayBuffer) {
        IntRef create = IntRef.create(0);
        ArrayBuffer<String> arrayBuffer2 = new ArrayBuffer<>();
        PunktSentenceSegmenter$Punkt$.MODULE$.iteratePairs(arrayBuffer).withFilter(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$realignBoundaries$1(this)).foreach(new PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer$$anonfun$realignBoundaries$2(this, create, arrayBuffer2));
        return arrayBuffer2;
    }

    public ArrayBuffer<String> tokenize(String str, boolean z) {
        return sentencesFromText(str, z);
    }

    public boolean tokenize$default$2() {
        return false;
    }

    public ArrayBuffer<Tuple3<Object, Object, TokenType>> spanTokenize(String str) {
        return slicesFromText(str);
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public PunktSentenceSegmenter$Punkt$PunktSentenceTokenizer(Option<String> option, boolean z, PunktSentenceSegmenter$Punkt$PunktLanguageVars punktSentenceSegmenter$Punkt$PunktLanguageVars, PunktSentenceSegmenter$Punkt$PunktParameters punktSentenceSegmenter$Punkt$PunktParameters) {
        super(punktSentenceSegmenter$Punkt$PunktLanguageVars, punktSentenceSegmenter$Punkt$PunktParameters);
        this.trainText = option;
        this.verbose = z;
        this.PUNCTUATION = Set$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{";", ":", ",", ".", "!", "?"}));
        None$ none$ = None$.MODULE$;
        if (option == null) {
            if (none$ == null) {
                return;
            }
        } else if (option.equals(none$)) {
            return;
        }
        super.params_$eq(train((String) option.get(), z));
    }
}
