package cc.factorie.app.nlp.load;

import cc.factorie.app.nlp.Document;
import cc.factorie.app.nlp.Sentence;
import cc.factorie.app.nlp.coref.Mention;
import cc.factorie.app.nlp.segment.DeterministicSentenceSegmenter$;
import cc.factorie.app.nlp.segment.DeterministicTokenizer$;
import cc.factorie.variable.SpanVar;
import java.io.File;
import scala.Array$;
import scala.Predef$;
import scala.Predef$DummyImplicit$;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.io.BufferedSource;
import scala.io.Codec$;
import scala.io.Source$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.NonLocalReturnControl;
import scala.util.matching.Regex;
import scala.xml.Elem;
import scala.xml.NodeSeq;
import scala.xml.NodeSeq$;
import scala.xml.XML$;

/* compiled from: LoadACE.scala */
/* loaded from: input_file:cc/factorie/app/nlp/load/LoadACE$.class */
public final class LoadACE$ {
    public static final LoadACE$ MODULE$ = null;
    private final Regex matchTag;

    static {
        new LoadACE$();
    }

    private Regex matchTag() {
        return this.matchTag;
    }

    private Document makeDoc(String str) {
        BufferedSource fromFile = Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec());
        String mkString = fromFile.mkString();
        fromFile.close();
        Document name = new Document(matchTag().replaceAllIn(mkString, new LoadACE$$anonfun$1())).setName(str);
        name.attr().$plus$eq(new ACEFileIdentifier(new StringBuilder().append((String) new StringOps(Predef$.MODULE$.augmentString(str)).dropRight(4)).append(".apf.xml").toString()));
        DeterministicTokenizer$.MODULE$.process(name);
        DeterministicSentenceSegmenter$.MODULE$.process(name);
        int end = ((SpanVar) name.asSection().sentences().last()).end();
        if (end != name.asSection().length() - 1) {
            new Sentence(name.asSection(), end + 1, (name.asSection().length() - 1) - end);
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        return name;
    }

    public int cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex(int i, Document document) {
        Object obj = new Object();
        try {
            Predef$.MODULE$.require(i >= 0 && i <= document.string().length());
            document.tokens().foreach(new LoadACE$$anonfun$cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex$1(i, new IntRef(0), obj));
            return -1;
        } catch (NonLocalReturnControl e) {
            if (e.key() == obj) {
                return e.value$mcI$sp();
            }
            throw e;
        }
    }

    public Tuple2<Object, Object> cc$factorie$app$nlp$load$LoadACE$$getTokenIdxAndLength(NodeSeq nodeSeq, Document document) {
        int i = new StringOps(Predef$.MODULE$.augmentString(cc$factorie$app$nlp$load$LoadACE$$getAttr(nodeSeq.$bslash("extent").$bslash("charseq"), "START"))).toInt();
        int i2 = new StringOps(Predef$.MODULE$.augmentString(cc$factorie$app$nlp$load$LoadACE$$getAttr(nodeSeq.$bslash("extent").$bslash("charseq"), "END"))).toInt() + 1;
        int cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex = cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex(i, document);
        return new Tuple2.mcII.sp(cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex, (cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex(i2, document) - cc$factorie$app$nlp$load$LoadACE$$tokenIndexAtCharIndex) + 1);
    }

    public String cc$factorie$app$nlp$load$LoadACE$$getAttr(NodeSeq nodeSeq, String str) {
        Seq seq = (Seq) nodeSeq.apply(0).attribute(str).getOrElse(new LoadACE$$anonfun$2());
        return seq == null ? "None" : NodeSeq$.MODULE$.seqToNodeSeq(seq).text();
    }

    public void addMentionsFromApf(NodeSeq nodeSeq, Document document) {
        nodeSeq.$bslash$bslash("entity").foreach(new LoadACE$$anonfun$addMentionsFromApf$1(document, document.getTargetCoref()));
    }

    private Mention lookupEntityMention(String str, Document document) {
        return (Mention) document.targetCoref().mentions().find(new LoadACE$$anonfun$lookupEntityMention$1(str)).get();
    }

    private NodeSeq loadXML(String str) {
        BufferedSource fromFile = Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec());
        Elem loadString = XML$.MODULE$.loadString(fromFile.getLines().drop(2).mkString("\n"));
        fromFile.close();
        return loadString;
    }

    public Document fromApf(String str) {
        return fromApf(str, makeDoc(new StringBuilder().append((String) new StringOps(Predef$.MODULE$.augmentString(str)).dropRight(8)).append(".sgm").toString()));
    }

    public Document fromApf(String str, Document document) {
        addMentionsFromApf(loadXML(str), document);
        return document;
    }

    public Seq<Document> fromDirectory(String str, int i) {
        return (Seq) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(new File(str).listFiles()).filter(new LoadACE$$anonfun$fromDirectory$1())).take(i)).map(new LoadACE$$anonfun$fromDirectory$2(), Array$.MODULE$.fallbackCanBuildFrom(Predef$DummyImplicit$.MODULE$.dummyImplicit()));
    }

    public int fromDirectory$default$2() {
        return Integer.MAX_VALUE;
    }

    public void main(String[] strArr) {
        Seq<Document> fromDirectory = fromDirectory(strArr[0], fromDirectory$default$2());
        Predef$.MODULE$.println(new StringBuilder().append("docs: ").append(BoxesRunTime.boxToInteger(fromDirectory.size())).toString());
        fromDirectory.foreach(new LoadACE$$anonfun$main$1());
    }

    private LoadACE$() {
        MODULE$ = this;
        this.matchTag = new StringOps(Predef$.MODULE$.augmentString("<[A-Za-z=_\"/ ]*>")).r();
    }
}
