package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.wiktionary;

import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.Language;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.persistence.PersistenceService;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.sparql.SparqlServices;
import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;
import org.apache.jena.atlas.lib.Chars;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QueryParseException;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.query.ResultSet;
import org.apache.jena.tdb.TDBFactory;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/external/wiktionary/WiktionaryKnowledgeSource.class */
public class WiktionaryKnowledgeSource extends SemanticWordRelationDictionary {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) WiktionaryKnowledgeSource.class);
    private PersistenceService persistenceService;
    private ConcurrentMap<String, HashSet<String>> synonymyBuffer;
    private ConcurrentMap<String, HashSet<String>> hypernymyBuffer;
    private ConcurrentMap<String, Boolean> askBuffer;
    private ConcurrentMap<String, HashSet<String>> translationBuffer;
    private ConcurrentMap<String, HashSet<String>> translationOfBuffer;
    private Dataset tdbDataset;
    private static final String ENDPOINT_URL = "http://kaiko.getalp.org/sparql";
    private boolean isUseTdb;
    private boolean isDiskBufferEnabled;
    private WiktionaryLinker linker;

    public WiktionaryKnowledgeSource() {
        this(true);
    }

    public WiktionaryKnowledgeSource(boolean z) {
        this.isUseTdb = false;
        this.isUseTdb = false;
        this.isDiskBufferEnabled = z;
        initialize();
    }

    public WiktionaryKnowledgeSource(String str) {
        this.isUseTdb = false;
        if (str == null) {
            LOGGER.error("The provided tdbDirectoryPath for Wiktionary is null. - ABORTING PROGRAM");
            return;
        }
        File file = new File(str);
        if (!file.exists()) {
            LOGGER.error("tdbDirectoryPath does not exist. - ABORTING PROGRAM");
            return;
        }
        if (!file.isDirectory()) {
            LOGGER.error("tdbDirectoryPath is not a directory. - ABORTING PROGRAM");
            return;
        }
        this.isUseTdb = true;
        this.tdbDataset = TDBFactory.createDataset(str);
        this.tdbDataset.begin(ReadWrite.READ);
        this.isDiskBufferEnabled = true;
        initialize();
    }

    private void initialize() {
        if (this.isDiskBufferEnabled) {
            this.persistenceService = PersistenceService.getService();
            this.synonymyBuffer = this.persistenceService.getMapDatabase(PersistenceService.PreconfiguredPersistences.WIKTIONARY_SYNONYMY_BUFFER);
            this.hypernymyBuffer = this.persistenceService.getMapDatabase(PersistenceService.PreconfiguredPersistences.WIKTIONARY_HYPERNYMY_BUFFER);
            this.askBuffer = this.persistenceService.getMapDatabase(PersistenceService.PreconfiguredPersistences.WIKTIONARY_ASK_BUFFER);
            this.translationBuffer = this.persistenceService.getMapDatabase(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_BUFFER);
            this.translationOfBuffer = this.persistenceService.getMapDatabase(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_OF_BUFFER);
        } else {
            this.synonymyBuffer = new ConcurrentHashMap();
            this.hypernymyBuffer = new ConcurrentHashMap();
            this.askBuffer = new ConcurrentHashMap();
            this.translationBuffer = new ConcurrentHashMap();
            this.translationOfBuffer = new ConcurrentHashMap();
        }
        this.linker = new WiktionaryLinker(this);
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary
    public void close() {
        commitAll();
        if (this.tdbDataset != null) {
            this.tdbDataset.end();
            this.tdbDataset.close();
        }
        LOGGER.info("DBnary TDB dataset closed.");
    }

    public boolean isInDictionary(String str) {
        return isInDictionary(str, Language.ENGLISH);
    }

    public boolean isInDictionary(String str, Language language) {
        if (str == null || language == null) {
            return false;
        }
        String encodeWord = encodeWord(str);
        String str2 = "in_dict_" + encodeWord + "_" + language.toSparqlChar2();
        if (this.askBuffer.containsKey(str2)) {
            return this.askBuffer.get(str2).booleanValue();
        }
        boolean z = false;
        try {
            Query create = QueryFactory.create("PREFIX lexvo: <http://lexvo.org/id/iso639-3/>\r\nPREFIX dbnary: <http://kaiko.getalp.org/dbnary#>\r\nASK {  <http://kaiko.getalp.org/dbnary/" + language.toWiktionaryChar3() + "/" + encodeWord + "> ?p ?o . }");
            QueryExecution create2 = this.isUseTdb ? QueryExecutionFactory.create(create, this.tdbDataset) : QueryExecutionFactory.sparqlService(ENDPOINT_URL, create);
            z = SparqlServices.safeAsk(create2);
            create2.close();
        } catch (Exception e) {
            LOGGER.warn("An error occurred while trying to look up: '" + encodeWord + "'. Returning false.");
        }
        this.askBuffer.put(str2, Boolean.valueOf(z));
        commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_ASK_BUFFER);
        return z;
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary, de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SynonymCapability
    public boolean isStrongFormSynonymous(String str, String str2) {
        if (str == null || str2 == null) {
            return false;
        }
        Set<String> synonymsEncoded = getSynonymsEncoded(str);
        Set<String> synonymsEncoded2 = getSynonymsEncoded(str2);
        if (synonymsEncoded == null && synonymsEncoded2 == null) {
            return false;
        }
        if (synonymsEncoded == null) {
            synonymsEncoded = new HashSet();
        }
        if (synonymsEncoded2 == null) {
            synonymsEncoded2 = new HashSet();
        }
        synonymsEncoded.add(str);
        synonymsEncoded2.add(str2);
        synonymsEncoded.remove("");
        synonymsEncoded2.remove("");
        if (synonymsEncoded.contains(str2)) {
            return true;
        }
        return synonymsEncoded2.contains(str);
    }

    public Set<String> getSynonymsEncoded(String str) {
        Set<String> synonymsLexical = getSynonymsLexical(str);
        if (synonymsLexical == null) {
            return null;
        }
        return (Set) synonymsLexical.stream().map(WiktionaryKnowledgeSource::encodeWord).collect(Collectors.toSet());
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary, de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SynonymCapability
    public Set<String> getSynonymsLexical(String str) {
        if (str == null) {
            return null;
        }
        HashSet<String> synonyms = getSynonyms(str, Language.ENGLISH);
        if (synonyms.size() == 0) {
            return null;
        }
        return synonyms;
    }

    public HashSet<String> getSynonyms(String str, Language language) {
        String encodeWord = encodeWord(str);
        if (this.synonymyBuffer.containsKey(encodeWord + "_" + language.toWiktionaryChar3())) {
            return this.synonymyBuffer.get(encodeWord + "_" + language.toWiktionaryChar3());
        }
        HashSet<String> hashSet = new HashSet<>();
        try {
            Query create = QueryFactory.create("PREFIX dbnary: <http://kaiko.getalp.org/dbnary#>\r\nPREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>\r\nselect distinct ?synonym WHERE {\r\n\r\n{select distinct ?synonym where {\r\n<http://kaiko.getalp.org/dbnary/" + language.toWiktionaryChar3() + "/" + encodeWord + "> <http://kaiko.getalp.org/dbnary#describes> ?descriptionConcepts .\r\n?descriptionConcepts dbnary:synonym ?synonym .\r\n}}\r\nUNION\r\n{\r\nselect distinct ?synonym where {\r\n<http://kaiko.getalp.org/dbnary/" + language.toWiktionaryChar3() + "/" + encodeWord + "> <http://kaiko.getalp.org/dbnary#describes> ?descriptionConcepts .\r\n?descriptionConcepts ontolex:sense ?sense .\r\n?sense dbnary:synonym ?synonym .\r\n}\r\n}\r\n}");
            QueryExecution create2 = this.isUseTdb ? QueryExecutionFactory.create(create, this.tdbDataset) : QueryExecutionFactory.sparqlService(ENDPOINT_URL, create);
            ResultSet execSelect = create2.execSelect();
            while (execSelect.hasNext()) {
                hashSet.add(getLemmaFromURI(execSelect.next().getResource("synonym").toString()));
            }
            create2.close();
        } catch (Exception e) {
            LOGGER.warn("Problem with query getSynonyms for word: '" + encodeWord + "'.");
        }
        this.synonymyBuffer.put(encodeWord + "_" + language.toWiktionaryChar3(), hashSet);
        commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_SYNONYMY_BUFFER);
        return hashSet;
    }

    private static String getLemmaFromURI(String str) {
        return str.substring(35, str.length()).replace("_", " ");
    }

    static String encodeWord(String str) {
        return str.trim().replace(Chars.S_PERCENT, "%25").replace(" ", "_").replace(".", "%2E").replace("^", "%5E").replace("<", "%3C").replace(">", "%3E");
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary
    public HashSet<String> getHypernyms(String str) {
        return getHypernyms(str, Language.ENGLISH);
    }

    public HashSet<String> getHypernyms(String str, Language language) {
        HashSet<String> hashSet = new HashSet<>();
        if (str == null) {
            return hashSet;
        }
        String encodeWord = encodeWord(str);
        String str2 = encodeWord + "_" + language.toSparqlChar2();
        if (this.hypernymyBuffer.containsKey(str2)) {
            return this.hypernymyBuffer.get(str2);
        }
        try {
            Query create = QueryFactory.create("PREFIX dbnary: <http://kaiko.getalp.org/dbnary#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX dbnarylan: <http://kaiko.getalp.org/dbnary/eng/>\nSELECT distinct ?hypernym {\n{select ?hypernym where {\ndbnarylan:" + encodeWord + " dbnary:hypernym ?hypernym.\n?hypernym rdf:type dbnary:Page .}}\nUNION\n{select ?hypernym where {\n?hs dbnary:hyponym dbnarylan:" + encodeWord + " .\n?hypernym dbnary:describes ?hs .\n?hypernym rdf:type dbnary:Page .}}\nUNION\n{select ?hypernym where {\ndbnarylan:" + encodeWord + " dbnary:describes ?dc .\n?dc dbnary:hypernym ?hypernym.\n?hypernym rdf:type dbnary:Page .\n}}}");
            QueryExecution create2 = this.isUseTdb ? QueryExecutionFactory.create(create, this.tdbDataset) : QueryExecutionFactory.sparqlService(ENDPOINT_URL, create);
            ResultSet execSelect = create2.execSelect();
            while (execSelect.hasNext()) {
                hashSet.add(getLemmaFromURI(execSelect.next().getResource("hypernym").toString()));
            }
            create2.close();
        } catch (QueryParseException e) {
            LOGGER.warn("Failed to build getHypernyms query for concept '" + encodeWord + Chars.S_QUOTE1);
        }
        this.hypernymyBuffer.put(str2, hashSet);
        commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_HYPERNYMY_BUFFER);
        return hashSet;
    }

    public HashSet<String> getTranslation(String str, Language language, Language language2) {
        String str2 = str + "_" + language + "_" + language2;
        if (this.translationBuffer.containsKey(str2)) {
            return this.translationBuffer.get(str2);
        }
        HashSet<String> hashSet = new HashSet<>();
        String str3 = "PREFIX dbnary: <http://kaiko.getalp.org/dbnary#>\nselect distinct ?tp where\n{\n<http://kaiko.getalp.org/dbnary/" + language.toWiktionaryChar3() + "/" + str + "> dbnary:describes ?le .\n?t dbnary:isTranslationOf ?le .\n?t dbnary:targetLanguage <http://lexvo.org/id/iso639-3/" + language2.toWiktionaryChar3() + "> .\n?t dbnary:writtenForm ?tp .\n}";
        try {
            QueryExecution create = this.isUseTdb ? QueryExecutionFactory.create(QueryFactory.create(str3), this.tdbDataset) : QueryExecutionFactory.sparqlService(ENDPOINT_URL, str3);
            ResultSet execSelect = create.execSelect();
            while (execSelect.hasNext()) {
                hashSet.add(execSelect.next().getLiteral("tp").getString());
            }
            create.close();
            this.translationBuffer.put(str2, hashSet);
            commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_BUFFER);
            return hashSet;
        } catch (Exception e) {
            LOGGER.error("Could not execute getTranslation query for concept " + str + " (" + language + " to " + language2 + ")");
            LOGGER.error("Problematic Query:\n" + str3);
            this.translationBuffer.put(str2, new HashSet<>());
            commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_BUFFER);
            return null;
        }
    }

    public HashSet<String> getTranslationOf(String str, Language language) {
        String str2 = str + "_" + language;
        if (this.translationOfBuffer.containsKey(str2)) {
            return this.translationOfBuffer.get(str2);
        }
        HashSet<String> hashSet = new HashSet<>();
        String str3 = language != Language.CHINESE ? "PREFIX dbnary: <http://kaiko.getalp.org/dbnary#>\nselect distinct ?c where\n{\n?c dbnary:describes ?le .\n?t dbnary:isTranslationOf ?le .\n?t dbnary:targetLanguage <http://lexvo.org/id/iso639-3/" + language.toWiktionaryChar3() + "> .\n?t dbnary:writtenForm \"" + str + "\"@" + language.toWiktionaryLanguageTag() + " .\n}" : "PREFIX dbnary: <http://kaiko.getalp.org/dbnary#>\nselect distinct ?c where\n{\n{\nselect distinct ?c where\n{\n?c dbnary:describes ?le .\n?t dbnary:isTranslationOf ?le .\n?t dbnary:targetLanguage <http://lexvo.org/id/iso639-3/yue> .\n?t dbnary:writtenForm \"" + str + "\"@yue .\n}\n}\nUNION\n{\nselect ?c where\n{\n?c dbnary:describes ?le .\n?t dbnary:isTranslationOf ?le .\n?t dbnary:targetLanguage <http://lexvo.org/id/iso639-3/cmn> .\n?t dbnary:writtenForm \"" + str + "\"@cmn .\n}\n}\n}\n";
        try {
            QueryExecution create = this.isUseTdb ? QueryExecutionFactory.create(QueryFactory.create(str3), this.tdbDataset) : QueryExecutionFactory.sparqlService(ENDPOINT_URL, str3);
            ResultSet execSelect = create.execSelect();
            while (execSelect.hasNext()) {
                hashSet.add(execSelect.next().getResource(WikipediaTokenizer.CATEGORY).getURI());
            }
            create.close();
            this.translationOfBuffer.put(str2, hashSet);
            commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_OF_BUFFER);
            return hashSet;
        } catch (Exception e) {
            LOGGER.error("Could not execute getTranslationOf query for concept " + str + " (" + language + ")", (Throwable) e);
            LOGGER.error("Problematic Query:\n" + str3);
            this.translationOfBuffer.put(str2, new HashSet<>());
            commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_OF_BUFFER);
            return null;
        }
    }

    public boolean isTranslationDerived(String str, Language language, String str2, Language language2) {
        if (str == null || str2 == null || language == null || language2 == null) {
            return false;
        }
        HashSet<String> translationOf = getTranslationOf(str, language);
        HashSet<String> translationOf2 = getTranslationOf(str2, language2);
        if (translationOf == null || translationOf2 == null || translationOf.size() == 0 || translationOf2.size() == 0) {
            return false;
        }
        int size = translationOf.size() + translationOf2.size();
        HashSet hashSet = new HashSet();
        hashSet.addAll(translationOf);
        hashSet.addAll(translationOf2);
        return size != hashSet.size();
    }

    public boolean isTranslationLinked(String str, Language language, String str2, Language language2) {
        Iterator<String> it2 = getTranslation(str, language, language2).iterator();
        while (it2.hasNext()) {
            String normalizeForTranslations = normalizeForTranslations(it2.next());
            str2 = normalizeForTranslations(str2);
            if (normalizeForTranslations.equals(str2)) {
                return true;
            }
        }
        Iterator<String> it3 = getTranslation(str2, language2, language).iterator();
        while (it3.hasNext()) {
            String normalizeForTranslations2 = normalizeForTranslations(it3.next());
            str = normalizeForTranslations(str);
            if (normalizeForTranslations2.equals(str)) {
                return true;
            }
        }
        return false;
    }

    public boolean isTranslationNonLinked(String str, Language language, String str2, Language language2) {
        if (!language.toWiktionaryChar3().equals("eng") && !language2.toWiktionaryChar3().equals("eng")) {
            LOGGER.error("Currently only English translations are supported.");
            return false;
        }
        Iterator<String> it2 = getTranslation(str, language, language2).iterator();
        while (it2.hasNext()) {
            String normalizeForTranslations = normalizeForTranslations(it2.next());
            str2 = normalizeForTranslations(str2);
            if (normalizeForTranslations.equals(str2)) {
                return true;
            }
        }
        String linkToSingleConcept = this.linker.linkToSingleConcept(str2, language2);
        if (linkToSingleConcept == null) {
            return false;
        }
        Iterator<String> it3 = getTranslation(linkToSingleConcept, language2, language).iterator();
        while (it3.hasNext()) {
            String normalizeForTranslations2 = normalizeForTranslations(it3.next());
            str = normalizeForTranslations(str);
            if (normalizeForTranslations2.equals(str)) {
                return true;
            }
        }
        return false;
    }

    public HashSet<String> getNormalizedTranslations(String str, Language language, Language language2) {
        HashSet<String> hashSet = new HashSet<>();
        HashSet<String> translation = getTranslation(str, language, language2);
        if (translation == null) {
            return null;
        }
        Iterator<String> it2 = translation.iterator();
        while (it2.hasNext()) {
            hashSet.add(normalizeForTranslations(it2.next()));
        }
        return hashSet;
    }

    public boolean isUseTdb() {
        return this.isUseTdb;
    }

    public static HashSet<String> normalizeForTranslations(HashSet<String> hashSet) {
        HashSet<String> hashSet2 = new HashSet<>();
        Iterator<String> it2 = hashSet.iterator();
        while (it2.hasNext()) {
            hashSet2.add(normalizeForTranslations(it2.next()));
        }
        return hashSet2;
    }

    public static String normalizeForTranslations(String str) {
        return str.toLowerCase().trim().replace(" ", "_").replace("-", "_");
    }

    private void commit(PersistenceService.PreconfiguredPersistences preconfiguredPersistences) {
        if (preconfiguredPersistences == null || this.persistenceService == null) {
            return;
        }
        switch (preconfiguredPersistences) {
            case WIKTIONARY_SYNONYMY_BUFFER:
                this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_SYNONYMY_BUFFER);
                return;
            case WIKTIONARY_HYPERNYMY_BUFFER:
                this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_HYPERNYMY_BUFFER);
                return;
            case WIKTIONARY_ASK_BUFFER:
                this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_ASK_BUFFER);
                return;
            case WIKTIONARY_TRANSLATION_BUFFER:
                this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_BUFFER);
                return;
            case WIKTIONARY_TRANSLATION_OF_BUFFER:
                this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_TRANSLATION_OF_BUFFER);
                return;
            default:
                return;
        }
    }

    private void commitAll() {
        if (!this.isDiskBufferEnabled || this.persistenceService == null) {
            return;
        }
        this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_SYNONYMY_BUFFER);
        this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_HYPERNYMY_BUFFER);
        this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.WIKTIONARY_ASK_BUFFER);
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary, de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.ExternalResource
    public LabelToConceptLinker getLinker() {
        return this.linker;
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.SemanticWordRelationDictionary, de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.ExternalResource
    public String getName() {
        return "Wiktionary";
    }
}
