/*
 * Decompiled with CFR 0.152.
 */
package io.annot8.components.opennlp.processors;

import io.annot8.api.annotations.Annotation;
import io.annot8.api.annotations.Group;
import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.api.exceptions.BadConfigurationException;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.bounds.SpanBounds;
import io.annot8.common.data.content.Text;
import io.annot8.common.data.utils.SortUtils;
import io.annot8.components.base.processors.AbstractTextProcessor;
import io.annot8.components.opennlp.processors.POS;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.util.Span;

@ComponentName(value="OpenNLP Phrase Chunks")
@ComponentDescription(value="Annotate phrase chunks identified by OpenNLP's chunker")
@SettingsClass(value=Settings.class)
public class PhraseChunks
extends AbstractProcessorDescriptor<Processor, Settings> {
    protected Processor createComponent(Context context, Settings settings) {
        InputStream model;
        if (settings.getModel() == null) {
            model = POS.class.getResourceAsStream("en-chunker.bin");
        } else {
            try {
                model = new FileInputStream(settings.getModel());
            }
            catch (IOException e) {
                throw new BadConfigurationException("Could not read Chunker model");
            }
        }
        return new Processor(model);
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(Text.class).withProcessesAnnotations("grammar/sentence", SpanBounds.class).withProcessesAnnotations("grammar/wordToken", SpanBounds.class).withCreatesGroups("group/grammar/phrase").build();
    }

    public static class Settings
    implements io.annot8.api.settings.Settings {
        private File model;

        public boolean validate() {
            return true;
        }

        @Description(value="OpenNLP Phrase Chunk Model (or null to use default)")
        public File getModel() {
            return this.model;
        }

        public void setModel(File model) {
            this.model = model;
        }
    }

    public static class Processor
    extends AbstractTextProcessor {
        private ChunkerME phraseChunker;
        private final Set<String> prepositions = Set.of("about", "above", "across", "against", "amid", "around", "at", "atop", "behind", "below", "beneath", "beside", "between", "beyond", "by", "for", "from", "down", "in", "including", "inside", "into", "mid", "near", "of", "off", "on", "onto", "opposite", "out", "outside", "over", "round", "through", "throughout", "to", "under", "underneath", "with", "within", "without");

        public Processor(InputStream model) {
            try {
                this.phraseChunker = new ChunkerME(new ChunkerModel(model));
            }
            catch (IOException ioe) {
                throw new BadConfigurationException("Cannot read Chunker model", (Throwable)ioe);
            }
        }

        protected void process(Text content) {
            content.getAnnotations().getByBoundsAndType(SpanBounds.class, "grammar/sentence").forEach(s -> {
                Span[] spans;
                SpanBounds sentenceBounds = (SpanBounds)s.getBounds();
                ArrayList tokens = new ArrayList();
                content.getBetween(sentenceBounds.getBegin(), sentenceBounds.getEnd()).filter(a -> "grammar/wordToken".equals(a.getType())).filter(a -> a.getBounds() instanceof SpanBounds).sorted(SortUtils.SORT_BY_SPANBOUNDS).forEach(tokens::add);
                String[] words = new String[tokens.size()];
                String[] pos = new String[tokens.size()];
                int i = 0;
                for (Annotation a2 : tokens) {
                    String word = content.getText(a2).orElse("");
                    String tag = a2.getProperties().get("pos", String.class).orElse("UNK");
                    words[i] = word;
                    pos[i] = tag;
                    ++i;
                }
                for (Span span : spans = this.phraseChunker.chunkAsSpans(words, pos)) {
                    List constituentWords = content.getBetween(((SpanBounds)((Annotation)tokens.get(span.getStart())).getBounds(SpanBounds.class).get()).getBegin(), ((SpanBounds)((Annotation)tokens.get(span.getEnd())).getBounds(SpanBounds.class).get()).getEnd()).filter(a -> "grammar/wordToken".equals(a.getType())).collect(Collectors.toList());
                    int headWordId = constituentWords.size() - 1;
                    for (int a3 = constituentWords.size() - 2; a3 > 1; --a3) {
                        String cwPos = ((Annotation)constituentWords.get(a3)).getProperties().get("pos", String.class).orElse("UNK");
                        String cwText = content.getText((Annotation)constituentWords.get(a3)).orElse("");
                        if (!("IN".equals(cwPos) || ",".equals(cwPos) || this.prepositions.contains(cwText))) {
                            headWordId = a3;
                            break;
                        }
                        headWordId = a3 - 1;
                    }
                    Group.Builder builder = (Group.Builder)((Group.Builder)((Group.Builder)content.getItem().getGroups().create().withType("group/grammar/phrase")).withProperty("probability", (Object)span.getProb())).withProperty("subtype", (Object)span.getType());
                    for (int a4 = 0; a4 < constituentWords.size(); ++a4) {
                        builder = a4 == headWordId ? builder.withAnnotation("grammar/head", (Annotation)constituentWords.get(a4)) : builder.withAnnotation("grammar/constituent", (Annotation)constituentWords.get(a4));
                    }
                    builder.save();
                }
            });
        }
    }
}

