package de.julielab.jcore.consumer.entityevaluator;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.utility.JCoReAnnotationIndexMerger;
import de.julielab.jcore.utility.index.Comparators;
import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
import de.julielab.jcore.utility.index.TermGenerators;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.class */
public class EntityEvaluatorConsumer extends JCasAnnotator_ImplBase {
    public static final String DOCUMENT_ID_COLUMN = "DocumentId";
    public static final String SENTENCE_ID_COLUMN = "SentenceId";
    public static final String OFFSETS_COLUMN = "Offsets";
    public static final String PARAM_OUTPUT_COLUMNS = "OutputColumns";
    public static final String PARAM_COLUMN_DEFINITIONS = "ColumnDefinitions";
    public static final String PARAM_TYPE_PREFIX = "TypePrefix";
    public static final String PARAM_ENTITY_TYPES = "EntityTypes";
    public static final String PARAM_FEATURE_FILTERS = "FeatureFilters";
    public static final String PARAM_OFFSET_MODE = "OffsetMode";
    public static final String PARAM_OFFSET_SCOPE = "OffsetScope";
    public static final String PARAM_OUTPUT_FILE = "OutputFile";
    private static final Logger log = LoggerFactory.getLogger(EntityEvaluatorConsumer.class);

    @ConfigurationParameter(name = PARAM_OUTPUT_COLUMNS, description = "A list of column names that are either defined with the parameter ColumnDefinitions or one of 'DocumentId', 'SentenceId' or 'Offsets'. This list determines the set and the order of columns that are written into the output file in a tab-separated manner.")
    private String[] outputColumnNamesArray;

    @ConfigurationParameter(name = PARAM_COLUMN_DEFINITIONS, description = "Custom definitions of output columns. Predefined columns are 'DocumentId', 'SentenceId' and 'Offsets'. The first two may be overwritten by a custom definition using their exact name. A column definition consists of the name of the column, the type of the annotation from which the values for this column should be derived, and a feature path pointing to the value. A single column definition may refer to multiple, different annotation types with their own feature path. Annotation types that should use the same feature path are separated by a comma. The sets of annotation types where each set shared one feature path are separated by a semicolon. Example: 'entityid:Chemical,Gene=/registryNumber;Disease=/specificType'. In this example, the column named 'entityid' will list the IDs of annotations of types 'Chemical', 'Gene' and 'Disease'. For the first two, the feature 'registryNumber' will be employed, for the latter the feature 'specificType'. The annotation type names will be resolved against the 'TypePrefix' parameter, if specified. The built-in feature path functions 'coveredText()' and 'typeName()' are available. For example, 'type:Gene=/:typeName()' (note the colon preceding the built-in function) will output the fully qualified name of the Gene type.")
    private String[] columnDefinitionDescriptions;

    @ConfigurationParameter(name = PARAM_ENTITY_TYPES, mandatory = false, description = "Optional. A list of entity types for which an output should be created. If all desired types are already mentioned in the 'ColumnDefinitions' parameter, this parameter can be left empty.")
    private String[] entityTypeStrings;

    @ConfigurationParameter(name = PARAM_OFFSET_MODE, mandatory = false, description = "Optional. Determines the kind of offset printed out by the component for each entity. Supported are CharacterSpan and NonWsCharacters. The first uses the common UIMA character span offsets. The second counts only the non-whitespace characters for the offsets. This last format is used, for example, by the BioCreative 2 Gene Mention task data. Default is CharacterSpan.")
    private OffsetMode offsetMode;

    @ConfigurationParameter(name = PARAM_OFFSET_SCOPE, mandatory = false, description = "Optional. 'Document' or 'Sentence'. Defaults to Document.")
    private OffsetScope offsetScope;

    @ConfigurationParameter(name = PARAM_TYPE_PREFIX, mandatory = false, description = "Optional. If an annotation type name given in one of the 'ColumnDefinitions' or 'EntityTypes' can not be found, it is searched with this prefix. Thus, for JCoRe the prefix 'de.julielab.jcore.types' will cover all annotation types and make the other parameter values briefer.")
    private String typePrefix;

    @ConfigurationParameter(name = PARAM_FEATURE_FILTERS, mandatory = false, description = "Optional. Only lets those entities contribute to the output file that fulfill the given feature value(s). The syntax is <type>:<feature path>=<value>. The '<type>:' prefix is optional. If omitted, the filters will be applied to all entities given in the EntityTypes parameter. An arbitrary number of filter expressions may be specified. In such cases, it is important to understand the boolean structure after which the expressions are evaluated in order to omit an annotation or take it into account for the output. The filter expressions are first grouped by feature path. Within such a group, the filter values form a disjunction. Thus, if any filter in a group is satisfied, the whole group is satisfied. The different groups form a conjunction. Thus, if any group is not satisfied, the whole conjunction is unsatisfied and the respective annotation will be omitted from output.")
    private String[] featureFilterDefinitions;

    @ConfigurationParameter(name = PARAM_OUTPUT_FILE, description = "Output file to which all entity information is written in the format\ndocId EGID begin end confidence\nWhere the fields are separated by tab stops. If the file name ends with .gz, the output file will automatically be gzipped.")
    private String outputFilePath;
    private LinkedHashSet<String> outputColumnNames;
    private LinkedHashMap<String, Column> columns;
    private Map<String, List<FeatureValueFilter>> featureFilters;
    private File outputFile;
    private BufferedWriter bw;
    private Set<String> predefinedColumnNames = new HashSet();
    private LinkedHashSet<Object> entityTypes = new LinkedHashSet<>();
    private List<String[]> entityRecords = new ArrayList();

    /* loaded from: input_file:de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer$OffsetMode.class */
    public enum OffsetMode {
        CharacterSpan,
        NonWsCharacters
    }

    /* loaded from: input_file:de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer$OffsetScope.class */
    public enum OffsetScope {
        Document,
        Sentence
    }

    public static NavigableMap<Integer, Integer> createNumWsMap(String str) {
        boolean z;
        TreeMap treeMap = new TreeMap();
        treeMap.put(0, 0);
        int i = 0;
        boolean z2 = false;
        for (int i2 = 0; i2 < str.length(); i2++) {
            if (z2) {
                treeMap.put(Integer.valueOf(i2), Integer.valueOf(i));
            }
            if (Character.isWhitespace(str.charAt(i2))) {
                i++;
                z = true;
            } else {
                z = false;
            }
            z2 = z;
        }
        return treeMap;
    }

    public static Type findType(String str, String str2, TypeSystem typeSystem) {
        String str3 = str.contains(".") ? str : str2 + "." + str;
        Type type = typeSystem.getType(str3);
        if (type == null) {
            type = typeSystem.getType(str2 + "." + str3);
        }
        if (type == null) {
            throw new IllegalArgumentException("The annotation type " + str3 + " was not found in the type system. The prefixed name \"" + str2 + "." + str3 + "\" has also been tried without success.");
        }
        return type;
    }

    private void addOffsetsColumn(JCas jCas) {
        OffsetsColumn offsetsColumn;
        if (this.offsetMode == OffsetMode.NonWsCharacters && this.offsetScope == OffsetScope.Document) {
            offsetsColumn = new OffsetsColumn(createNumWsMap(jCas.getDocumentText()), this.offsetMode);
        } else if (this.offsetScope == OffsetScope.Document) {
            offsetsColumn = new OffsetsColumn(this.offsetMode);
        } else {
            if (this.offsetScope != OffsetScope.Sentence) {
                throw new IllegalArgumentException("Unsupported offset scope " + this.offsetScope);
            }
            offsetsColumn = new OffsetsColumn(((SentenceIdColumn) this.columns.get(SENTENCE_ID_COLUMN)).getSentenceIndex(), this.offsetMode);
        }
        this.columns.put(OFFSETS_COLUMN, offsetsColumn);
    }

    private void addDocumentIdColumn(JCas jCas) throws CASException {
        if (this.outputColumnNames.contains(DOCUMENT_ID_COLUMN)) {
            Column column = this.columns.get(DOCUMENT_ID_COLUMN);
            if (column == null) {
                column = new Column("DocumentId:" + Header.class.getCanonicalName() + "=/docId", null, jCas.getTypeSystem());
            }
            this.columns.put(DOCUMENT_ID_COLUMN, new DocumentIdColumn(column));
        }
    }

    private void addSentenceIdColumn(JCas jCas) throws CASException {
        if (this.outputColumnNames.contains(SENTENCE_ID_COLUMN)) {
            Column column = this.columns.get(SENTENCE_ID_COLUMN);
            if (column == null) {
                column = new Column("SentenceId:" + Sentence.class.getCanonicalName() + "=/id", null, jCas.getTypeSystem());
            }
            Column column2 = this.columns.get(DOCUMENT_ID_COLUMN);
            String str = null;
            if (column2 != null) {
                str = column2.getValue(jCas.getDocumentAnnotationFs());
            }
            Type singleType = column.getSingleType();
            JCoReTreeMapAnnotationIndex jCoReTreeMapAnnotationIndex = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), TermGenerators.longOffsetTermGenerator(), TermGenerators.longOffsetTermGenerator());
            jCoReTreeMapAnnotationIndex.index(jCas, singleType);
            this.columns.put(SENTENCE_ID_COLUMN, new SentenceIdColumn(str, column, jCoReTreeMapAnnotationIndex));
        }
    }

    protected void appendEntityRecordsToFile() {
        Iterator<String[]> it = this.entityRecords.iterator();
        while (it.hasNext()) {
            try {
                this.bw.write(((String) Stream.of((Object[]) it.next()).collect(Collectors.joining("\t"))) + "\n");
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        this.entityRecords.clear();
    }

    private void assertColumnDefined(String str) {
        if (this.columns.get(str) == null) {
            throw new IllegalArgumentException("The column \"" + str + "\" was set for output but was not defined.");
        }
    }

    public void batchProcessComplete() throws AnalysisEngineProcessException {
        super.batchProcessComplete();
        log.debug("Batch completed. Writing {} entity records to file {}.", Integer.valueOf(this.entityRecords.size()), this.outputFile.getName());
        appendEntityRecordsToFile();
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        log.info("Collection completed. Writing {} entity records to file {}.", Integer.valueOf(this.entityRecords.size()), this.outputFile.getName());
        appendEntityRecordsToFile();
        try {
            this.bw.close();
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.outputColumnNamesArray = (String[]) uimaContext.getConfigParameterValue(PARAM_OUTPUT_COLUMNS);
        this.columnDefinitionDescriptions = (String[]) uimaContext.getConfigParameterValue(PARAM_COLUMN_DEFINITIONS);
        this.typePrefix = (String) uimaContext.getConfigParameterValue(PARAM_TYPE_PREFIX);
        this.featureFilterDefinitions = (String[]) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_FEATURE_FILTERS)).orElse(new String[0]);
        this.outputFilePath = (String) uimaContext.getConfigParameterValue(PARAM_OUTPUT_FILE);
        this.entityTypeStrings = (String[]) uimaContext.getConfigParameterValue(PARAM_ENTITY_TYPES);
        String str = (String) uimaContext.getConfigParameterValue(PARAM_OFFSET_MODE);
        String str2 = (String) uimaContext.getConfigParameterValue(PARAM_OFFSET_SCOPE);
        this.outputColumnNames = new LinkedHashSet<>((Collection) Stream.of((Object[]) this.outputColumnNamesArray).collect(Collectors.toList()));
        this.offsetMode = null == str ? OffsetMode.CharacterSpan : OffsetMode.valueOf(str);
        if (null == str2) {
            this.offsetScope = this.outputColumnNames.contains(SENTENCE_ID_COLUMN) ? OffsetScope.Sentence : OffsetScope.Document;
        } else {
            this.offsetScope = OffsetScope.valueOf(str2);
        }
        this.outputFile = new File(this.outputFilePath);
        if (this.outputFile.exists()) {
            log.warn("File \"{}\" is overridden.", this.outputFile.getAbsolutePath());
            this.outputFile.delete();
        }
        try {
            if (this.outputFile != null && this.outputFile.getParentFile() != null && !this.outputFile.getParentFile().exists()) {
                this.outputFile.getParentFile().mkdirs();
            }
            this.bw = FileUtilities.getWriterToFile(this.outputFile);
            this.predefinedColumnNames.add(DOCUMENT_ID_COLUMN);
            this.predefinedColumnNames.add(SENTENCE_ID_COLUMN);
            this.predefinedColumnNames.add(OFFSETS_COLUMN);
            log.info("{}: {}", PARAM_OUTPUT_COLUMNS, this.outputColumnNames);
            log.info("{}: {}", PARAM_COLUMN_DEFINITIONS, this.columnDefinitionDescriptions);
            log.info("{}: {}", PARAM_FEATURE_FILTERS, this.featureFilterDefinitions);
            log.info("{}: {}", PARAM_ENTITY_TYPES, this.entityTypeStrings);
            log.info("{}: {}", PARAM_TYPE_PREFIX, this.typePrefix);
            log.info("{}: {}", PARAM_OUTPUT_FILE, this.outputFilePath);
            log.info("{}: {}", PARAM_OFFSET_MODE, this.offsetMode);
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        try {
            TypeSystem typeSystem = jCas.getTypeSystem();
            if (this.columns == null) {
                this.columns = new LinkedHashMap<>();
                for (int i = 0; i < this.columnDefinitionDescriptions.length; i++) {
                    Column column = new Column(this.columnDefinitionDescriptions[i], this.typePrefix, typeSystem);
                    this.columns.put(column.getName(), column);
                }
                this.entityTypes = new LinkedHashSet<>((Collection) this.columns.values().stream().filter(column2 -> {
                    return !this.predefinedColumnNames.contains(column2.getName());
                }).flatMap(column3 -> {
                    return column3.getTypes().stream();
                }).collect(Collectors.toList()));
                if (this.entityTypeStrings != null) {
                    Stream map = Stream.of((Object[]) this.entityTypeStrings).map(str -> {
                        return findType(str, this.typePrefix, typeSystem);
                    });
                    LinkedHashSet<Object> linkedHashSet = this.entityTypes;
                    Objects.requireNonNull(linkedHashSet);
                    map.forEach((v1) -> {
                        r1.add(v1);
                    });
                }
                if (this.entityTypes == null || this.entityTypes.isEmpty()) {
                    throw new IllegalArgumentException("No entity names are given, neither by the EntityTypes parameter nor in the ColumnDefinitions parameter.");
                }
                removeSubsumedTypes(this.entityTypes, typeSystem);
                this.featureFilters = (Map) Stream.of((Object[]) this.featureFilterDefinitions).map(str2 -> {
                    return new FeatureValueFilter(str2, this.typePrefix, typeSystem);
                }).collect(Collectors.groupingBy(featureValueFilter -> {
                    return featureValueFilter.getPathValuePair().fp.getFeaturePath();
                }));
                addDocumentIdColumn(jCas);
            }
            addSentenceIdColumn(jCas);
            addOffsetsColumn(jCas);
            JCoReAnnotationIndexMerger jCoReAnnotationIndexMerger = new JCoReAnnotationIndexMerger(this.entityTypes, true, (AnnotationFS) null, jCas);
            while (jCoReAnnotationIndexMerger.incrementAnnotation()) {
                TOP annotation = jCoReAnnotationIndexMerger.getAnnotation();
                boolean z = true;
                for (String str3 : this.featureFilters.keySet()) {
                    int i2 = 0;
                    Iterator<FeatureValueFilter> it = this.featureFilters.get(str3).iterator();
                    while (it.hasNext()) {
                        if (it.next().contradictsFeatureFilter(annotation)) {
                            i2++;
                        }
                        if (i2 == this.featureFilters.get(str3).size()) {
                            z = false;
                        }
                    }
                    if (!z) {
                        break;
                    }
                }
                if (this.featureFilters.isEmpty() || z) {
                    int i3 = 0;
                    String[] strArr = new String[this.outputColumnNames.size()];
                    Iterator<String> it2 = this.outputColumnNames.iterator();
                    while (it2.hasNext()) {
                        String next = it2.next();
                        assertColumnDefined(next);
                        int i4 = i3;
                        i3++;
                        strArr[i4] = removeLineBreak(this.columns.get(next).getValue(annotation));
                    }
                    this.entityRecords.add(strArr);
                }
            }
        } catch (CASException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        Iterator<Column> it3 = this.columns.values().iterator();
        while (it3.hasNext()) {
            it3.next().reset();
        }
    }

    private String removeLineBreak(String str) {
        if (str == null) {
            return null;
        }
        return str.replaceAll("\n", " ");
    }

    private void removeSubsumedTypes(LinkedHashSet<Object> linkedHashSet, TypeSystem typeSystem) {
        Stream stream = linkedHashSet.stream();
        Class<Type> cls = Type.class;
        Objects.requireNonNull(Type.class);
        for (Type type : (Set) stream.map(cls::cast).collect(Collectors.toSet())) {
            Iterator<Object> it = linkedHashSet.iterator();
            while (it.hasNext()) {
                Type type2 = (Type) it.next();
                if (!type.equals(type2) && typeSystem.subsumes(type, type2)) {
                    it.remove();
                }
            }
        }
    }
}
