package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.multisource.dispatchers;

import de.uni_mannheim.informatik.dws.melt.matching_base.typetransformer.TypeTransformationException;
import de.uni_mannheim.informatik.dws.melt.matching_base.typetransformer.TypeTransformerRegistry;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.Counter;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.URIUtil;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.lang.StringUtils;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResIterator;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.vocabulary.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import smile.math.MathEx;
import smile.nlp.dictionary.EnglishPunctuations;
import smile.nlp.dictionary.EnglishStopWords;
import smile.nlp.normalizer.SimpleNormalizer;
import smile.nlp.stemmer.PorterStemmer;
import smile.nlp.tokenizer.SimpleSentenceSplitter;
import smile.nlp.tokenizer.SimpleTokenizer;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/multisource/dispatchers/MultiSourceDispatcherIncrementalMergeByClusterText.class */
public class MultiSourceDispatcherIncrementalMergeByClusterText extends MultiSourceDispatcherIncrementalMergeByCluster {
    private double mindf;
    private double maxdf;
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) MultiSourceDispatcherIncrementalMergeByClusterText.class);
    private static final Pattern URI_SEPARATOR = Pattern.compile("[-_~|]");
    private static final Pattern CAMEL_CASE_SPLIT = Pattern.compile("(?<!^)(?<!\\s)(?=[A-Z][a-z])");
    private static final String NEWLINE = System.getProperty("line.separator");

    public MultiSourceDispatcherIncrementalMergeByClusterText(Object obj, ClusterLinkage clusterLinkage, double d, double d2) {
        super(obj, clusterLinkage);
        this.mindf = d;
        this.maxdf = d2;
    }

    public MultiSourceDispatcherIncrementalMergeByClusterText(Object obj, ClusterLinkage clusterLinkage) {
        this(obj, clusterLinkage, 0.0d, 1.0d);
    }

    public MultiSourceDispatcherIncrementalMergeByClusterText(Object obj) {
        this(obj, ClusterLinkage.SINGLE, 0.0d, 1.0d);
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.multisource.dispatchers.MultiSourceDispatcherIncrementalMergeByCluster
    public double[][] getClusterFeatures(List<Set<Object>> list, Object obj) {
        Properties transformedPropertiesOrNewInstance = TypeTransformerRegistry.getTransformedPropertiesOrNewInstance(obj);
        Counter counter = new Counter();
        ArrayList arrayList = new ArrayList(list.size());
        for (int i = 0; i < list.size(); i++) {
            try {
                Model model = (Model) TypeTransformerRegistry.getTransformedObjectMultipleRepresentations(list.get(i), OntModel.class, transformedPropertiesOrNewInstance);
                if (model == null) {
                    LOGGER.warn("Initial model is null. Can't compute the similarities between the ontologies/knowledge graphs.");
                    return new double[0][0];
                }
                Counter<String> bagOfWords = getBagOfWords(model);
                arrayList.add(bagOfWords);
                counter.addAll(bagOfWords.getDistinctElements());
            } catch (TypeTransformationException e) {
                LOGGER.warn("Conversion to OntModel/Model did not work. Can't compute the similarities between the ontologies/knowledge graphs.", (Throwable) e);
                return new double[0][0];
            }
        }
        Set distinctElements = (this.mindf == 0.0d && this.maxdf == 1.0d) ? counter.getDistinctElements() : counter.betweenFrequencyReturningElements(this.mindf, this.maxdf);
        if (distinctElements.isEmpty()) {
            distinctElements = counter.getDistinctElements();
        }
        String[] strArr = (String[]) distinctElements.toArray(new String[0]);
        long size = arrayList.size();
        int[] array = Arrays.stream(strArr).mapToInt(str -> {
            return counter.getCount(str);
        }).toArray();
        return (double[][]) arrayList.stream().map(counter2 -> {
            double[] dArr = new double[strArr.length];
            for (int i2 = 0; i2 < dArr.length; i2++) {
                dArr[i2] = counter2.getCount(strArr[i2]);
            }
            double max = MathEx.max(dArr);
            double[] dArr2 = new double[dArr.length];
            if (max != 0.0d) {
                for (int i3 = 0; i3 < dArr2.length; i3++) {
                    dArr2[i3] = (dArr[i3] / max) * Math.log((1.0d + size) / (1.0d + array[i3]));
                }
                MathEx.unitize(dArr2);
            }
            return dArr2;
        }).toArray(i2 -> {
            return new double[i2];
        });
    }

    public Counter<String> getBagOfWords(Model model) {
        SimpleTokenizer simpleTokenizer = new SimpleTokenizer(true);
        PorterStemmer porterStemmer = new PorterStemmer();
        Counter<String> counter = new Counter<>();
        StmtIterator listStatements = model.listStatements();
        while (listStatements.hasNext()) {
            RDFNode object = ((Statement) listStatements.next()).getObject();
            if (object.isLiteral()) {
                Literal asLiteral = object.asLiteral();
                if (isLiteralAString(asLiteral)) {
                    Stream filter = Arrays.stream(SimpleSentenceSplitter.getInstance().split(SimpleNormalizer.getInstance().normalize(asLiteral.getLexicalForm()))).flatMap(str -> {
                        return Arrays.stream(simpleTokenizer.split(str));
                    }).filter(str2 -> {
                        return (EnglishStopWords.DEFAULT.contains(str2.toLowerCase()) || EnglishPunctuations.getInstance().contains(str2)) ? false : true;
                    });
                    Objects.requireNonNull(porterStemmer);
                    counter.addAll(filter.map(porterStemmer::stem).map((v0) -> {
                        return v0.toLowerCase();
                    }).iterator());
                }
            }
        }
        ResIterator listSubjects = model.listSubjects();
        while (listSubjects.hasNext()) {
            String uri = ((Resource) listSubjects.next()).getURI();
            if (uri != null) {
                String uriFragment = URIUtil.getUriFragment(uri);
                if (!StringUtils.isBlank(uriFragment)) {
                    Stream filter2 = Arrays.stream(simpleTokenizer.split(splitFragment(uriFragment))).filter(str3 -> {
                        return (EnglishStopWords.DEFAULT.contains(str3.toLowerCase()) || EnglishPunctuations.getInstance().contains(str3)) ? false : true;
                    });
                    Objects.requireNonNull(porterStemmer);
                    counter.addAll(filter2.map(porterStemmer::stem).map((v0) -> {
                        return v0.toLowerCase();
                    }).iterator());
                }
            }
        }
        return counter;
    }

    private static String splitFragment(String str) {
        return URI_SEPARATOR.matcher(CAMEL_CASE_SPLIT.matcher(str).replaceAll(" ")).replaceAll(" ");
    }

    private void writeTextualRepresentationOfModel(Model model, File file) {
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
            try {
                StmtIterator listStatements = model.listStatements();
                while (listStatements.hasNext()) {
                    RDFNode object = ((Statement) listStatements.next()).getObject();
                    if (object.isLiteral()) {
                        Literal asLiteral = object.asLiteral();
                        if (isLiteralAString(asLiteral)) {
                            String trim = asLiteral.getLexicalForm().trim();
                            if (!trim.isEmpty()) {
                                bufferedWriter.write(trim);
                                bufferedWriter.write(NEWLINE);
                            }
                        }
                    }
                }
                bufferedWriter.close();
            } finally {
            }
        } catch (IOException e) {
            LOGGER.error("Could not write the textual representation of a model.", (Throwable) e);
        }
    }

    private static boolean isLiteralAString(Literal literal) {
        String datatypeURI = literal.getDatatypeURI();
        if (datatypeURI != null && (datatypeURI.equals(XSDDatatype.XSDstring.getURI()) || datatypeURI.equals(RDF.dtLangString.getURI()))) {
            return true;
        }
        String language = literal.getLanguage();
        return (language == null || language.equals("")) ? false : true;
    }
}
