/*
 * Decompiled with CFR 0.152.
 */
package io.annot8.components.opennlp.processors;

import io.annot8.api.annotations.Annotation;
import io.annot8.api.bounds.Bounds;
import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.api.exceptions.BadConfigurationException;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.bounds.SpanBounds;
import io.annot8.common.data.content.Text;
import io.annot8.components.base.processors.AbstractTextProcessor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;

@ComponentName(value="OpenNLP Tokens")
@ComponentDescription(value="Annotate tokens identified by OpenNLP's token detector")
@SettingsClass(value=Settings.class)
public class Tokens
extends AbstractProcessorDescriptor<Processor, Settings> {
    protected Processor createComponent(Context context, Settings settings) {
        InputStream model;
        if (settings.getModel() == null) {
            model = Tokens.class.getResourceAsStream("en-token.bin");
        } else {
            try {
                model = new FileInputStream(settings.getModel());
            }
            catch (IOException e) {
                throw new BadConfigurationException("Could not read Token model");
            }
        }
        return new Processor(model);
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(Text.class).withCreatesAnnotations("grammar/wordToken", SpanBounds.class).build();
    }

    public static class Settings
    implements io.annot8.api.settings.Settings {
        private File model;

        public boolean validate() {
            return true;
        }

        @Description(value="OpenNLP Token Model (or null to use default)")
        public File getModel() {
            return this.model;
        }

        public void setModel(File model) {
            this.model = model;
        }
    }

    public static class Processor
    extends AbstractTextProcessor {
        private TokenizerME detector;

        public Processor(InputStream model) {
            try {
                this.detector = new TokenizerME(new TokenizerModel(model));
            }
            catch (IOException ioe) {
                throw new BadConfigurationException("Cannot read Token model", (Throwable)ioe);
            }
        }

        protected void process(Text content) {
            List sentences = content.getAnnotations().getByType("grammar/sentence").collect(Collectors.toList());
            if (sentences.isEmpty()) {
                this.createTokens(content, this.detector.tokenizePos((String)content.getData()), 0);
            } else {
                for (Annotation sentence : sentences) {
                    Optional sb = sentence.getBounds(SpanBounds.class);
                    if (sb.isEmpty()) continue;
                    content.getText(sentence).ifPresent(s -> this.createTokens(content, this.detector.tokenizePos(s), ((SpanBounds)sb.get()).getBegin()));
                }
            }
        }

        private void createTokens(Text content, Span[] spans, int offset) {
            for (Span s : spans) {
                ((Annotation.Builder)((Annotation.Builder)content.getAnnotations().create().withBounds((Bounds)new SpanBounds(s.getStart() + offset, s.getEnd() + offset)).withType("grammar/wordToken")).withProperty("probability", (Object)s.getProb())).save();
            }
        }

        public void close() {
            this.detector = null;
        }
    }
}

