package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.elementlevel;

import de.uni_mannheim.informatik.dws.melt.matching_base.DataStore;
import de.uni_mannheim.informatik.dws.melt.matching_base.OaeiOptions;
import de.uni_mannheim.informatik.dws.melt.matching_jena.MatcherYAAAJena;
import de.uni_mannheim.informatik.dws.melt.matching_jena.TextExtractor;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.textExtractors.TextExtractorProperty;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.Alignment;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/elementlevel/StopwordExtraction.class */
public class StopwordExtraction extends MatcherYAAAJena {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) StopwordExtraction.class);
    private List<TextExtractor> valueExtractors;
    private Function<String, Collection<String>> tokenizer;
    private boolean countDistinctTermsPerResource;
    private int topNStopwords;
    private double stopwordsPercentage;

    public StopwordExtraction(Function<String, Collection<String>> function, boolean z, int i, double d, List<TextExtractor> list) {
        this.valueExtractors = list;
        this.tokenizer = function;
        this.countDistinctTermsPerResource = z;
        this.topNStopwords = i;
        this.stopwordsPercentage = d;
    }

    public StopwordExtraction(Function<String, Collection<String>> function, boolean z, int i, double d, TextExtractor... textExtractorArr) {
        this(function, z, i, d, (List<TextExtractor>) Arrays.asList(textExtractorArr));
    }

    public StopwordExtraction(Function<String, Collection<String>> function, int i, Property... propertyArr) {
        this(function, true, i, 0.0d, TextExtractorProperty.wrapExtractor(propertyArr));
    }

    public StopwordExtraction(Function<String, Collection<String>> function, double d, Property... propertyArr) {
        this(function, true, 0, d, TextExtractorProperty.wrapExtractor(propertyArr));
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena.MatcherYAAAJena, de.uni_mannheim.informatik.dws.melt.matching_base.IMatcher
    public Alignment match(OntModel ontModel, OntModel ontModel2, Alignment alignment, Properties properties) throws Exception {
        if (OaeiOptions.isMatchingClassesRequired()) {
            storeExtractedStopwords(ontModel.listClasses(), "stopwords_source_classes");
            storeExtractedStopwords(ontModel2.listClasses(), "stopwords_target_classes");
        }
        if (OaeiOptions.isMatchingDataPropertiesRequired() || OaeiOptions.isMatchingObjectPropertiesRequired()) {
            storeExtractedStopwords(ontModel.listAllOntProperties(), "stopwords_source_properties");
            storeExtractedStopwords(ontModel2.listAllOntProperties(), "stopwords_target_properties");
        }
        if (OaeiOptions.isMatchingInstancesRequired()) {
            storeExtractedStopwords(ontModel.listIndividuals(), "stopwords_source_instances");
            storeExtractedStopwords(ontModel2.listIndividuals(), "stopwords_target_instances");
        }
        return alignment;
    }

    public void storeExtractedStopwords(Iterable<? extends Resource> iterable, String str) {
        DataStore.getGlobal().put(str, extractStopwords(iterable));
    }

    public void storeExtractedStopwords(Iterator<? extends Resource> it2, String str) {
        DataStore.getGlobal().put(str, extractStopwords(it2));
    }

    public Set<String> extractStopwords(Iterable<? extends Resource> iterable) {
        return extractStopwords(iterable.iterator());
    }

    public Set<String> extractStopwords(Iterator<? extends Resource> it2) {
        int i = 0;
        HashMap hashMap = new HashMap();
        while (it2.hasNext()) {
            Resource next = it2.next();
            HashSet<String> hashSet = new HashSet();
            Iterator<TextExtractor> it3 = this.valueExtractors.iterator();
            while (it3.hasNext()) {
                hashSet.addAll(it3.next().extract(next));
            }
            if (!hashSet.isEmpty()) {
                i++;
            }
            AbstractCollection<String> arrayList = new ArrayList();
            for (String str : hashSet) {
                if (!str.isEmpty()) {
                    for (String str2 : this.tokenizer.apply(str)) {
                        if (!str2.isEmpty()) {
                            arrayList.add(str2);
                        }
                    }
                }
            }
            if (this.countDistinctTermsPerResource) {
                arrayList = new HashSet(arrayList);
            }
            for (String str3 : arrayList) {
                hashMap.put(str3, Integer.valueOf(((Integer) hashMap.getOrDefault(str3, 0)).intValue() + 1));
            }
        }
        HashSet hashSet2 = new HashSet();
        if (hashMap.isEmpty()) {
            return hashSet2;
        }
        List<Map.Entry> list = (List) hashMap.entrySet().stream().sorted(Map.Entry.comparingByValue().reversed()).collect(Collectors.toList());
        LOGGER.debug("Sorted tokens (#resources: {}): {}", Integer.valueOf(i), list.stream().limit(30L).collect(Collectors.toList()));
        for (Map.Entry entry : list) {
            if (this.topNStopwords != 0 && hashSet2.size() >= this.topNStopwords) {
                break;
            }
            double intValue = ((Integer) entry.getValue()).intValue() / i;
            if (this.stopwordsPercentage != 0.0d && intValue <= this.stopwordsPercentage) {
                break;
            }
            hashSet2.add((String) entry.getKey());
        }
        LOGGER.debug("Extracted stopwords: {}", hashSet2);
        return hashSet2;
    }
}
