/*
 * Decompiled with CFR 0.152.
 */
package io.annot8.components.opennlp.processors;

import io.annot8.api.annotations.Annotation;
import io.annot8.api.bounds.Bounds;
import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.context.Context;
import io.annot8.api.exceptions.BadConfigurationException;
import io.annot8.api.settings.NoSettings;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.bounds.SpanBounds;
import io.annot8.common.data.content.Text;
import io.annot8.components.base.processors.AbstractTextProcessor;
import java.io.IOException;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;

@ComponentName(value="OpenNLP Tokenizer")
@ComponentDescription(value="Tokenizes words and sentences using OpenNLP tokenization models")
public class Tokenizer
extends AbstractProcessorDescriptor<Processor, NoSettings> {
    protected Processor createComponent(Context context, NoSettings settings) {
        return new Processor();
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(Text.class).withCreatesAnnotations("grammar/sentence", SpanBounds.class).withCreatesAnnotations("grammar/wordToken", SpanBounds.class).build();
    }

    public static class Processor
    extends AbstractTextProcessor {
        private SentenceDetectorME sentenceDetector;
        private TokenizerME wordTokenizer;

        public Processor() {
            TokenizerModel wordTokenModel;
            SentenceModel sentenceModel;
            try {
                sentenceModel = new SentenceModel(((Object)((Object)this)).getClass().getResourceAsStream("en-sent.bin"));
            }
            catch (IOException e) {
                throw new BadConfigurationException("Unable to load sentence model");
            }
            try {
                wordTokenModel = new TokenizerModel(((Object)((Object)this)).getClass().getResourceAsStream("en-token.bin"));
            }
            catch (IOException e) {
                throw new BadConfigurationException("Unable to load word tokenizer model");
            }
            this.sentenceDetector = new SentenceDetectorME(sentenceModel);
            this.wordTokenizer = new TokenizerME(wordTokenModel);
        }

        protected void process(Text content) {
            String textContent = (String)content.getData();
            for (Span sentence : this.sentenceDetector.sentPosDetect(textContent)) {
                ((Annotation.Builder)content.getAnnotations().create().withType("grammar/sentence")).withBounds((Bounds)new SpanBounds(sentence.getStart(), sentence.getEnd())).save();
                for (Span token : this.wordTokenizer.tokenizePos(textContent.substring(sentence.getStart(), sentence.getEnd()))) {
                    ((Annotation.Builder)content.getAnnotations().create().withType("grammar/wordToken")).withBounds((Bounds)new SpanBounds(sentence.getStart() + token.getStart(), sentence.getStart() + token.getEnd())).save();
                }
            }
        }

        public void close() {
            this.sentenceDetector = null;
            this.wordTokenizer = null;
        }
    }
}

