package edu.emory.clir.clearnlp.lexicon.dbpedia;

import com.google.gson.Gson;
import edu.emory.clir.clearnlp.collection.tree.PrefixNode;
import edu.emory.clir.clearnlp.collection.tree.PrefixTree;
import edu.emory.clir.clearnlp.collection.triple.ObjectIntIntTriple;
import edu.emory.clir.clearnlp.component.utils.NLPUtils;
import edu.emory.clir.clearnlp.ner.NERInfoSet;
import edu.emory.clir.clearnlp.ner.NERTag;
import edu.emory.clir.clearnlp.tokenization.AbstractTokenizer;
import edu.emory.clir.clearnlp.util.IOUtils;
import edu.emory.clir.clearnlp.util.Joiner;
import edu.emory.clir.clearnlp.util.StringUtils;
import edu.emory.clir.clearnlp.util.constant.StringConst;
import edu.emory.clir.clearnlp.util.lang.TLanguage;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/emory/clir/clearnlp/lexicon/dbpedia/PrefixTreeGenerator.class */
public class PrefixTreeGenerator implements DBPediaXML {
    private Map<DBPediaType, DBPediaType> super_type_map;
    private DBPediaTypeMap type_map;
    private DBPediaInfoMap info_map;

    public PrefixTreeGenerator(DBPediaTypeMap dBPediaTypeMap, DBPediaInfoMap dBPediaInfoMap, Set<DBPediaType> set) {
        this.type_map = dBPediaTypeMap;
        this.info_map = dBPediaInfoMap;
        this.super_type_map = getSuperTypeMap(set);
    }

    public Map<DBPediaType, DBPediaType> getSuperTypeMap(Set<DBPediaType> set) {
        HashMap hashMap = new HashMap();
        for (DBPediaType dBPediaType : set) {
            hashMap.put(dBPediaType, dBPediaType);
        }
        for (DBPediaType dBPediaType2 : this.type_map.keySet()) {
            Iterator<DBPediaType> it = set.iterator();
            while (true) {
                if (it.hasNext()) {
                    DBPediaType next = it.next();
                    if (this.type_map.isSuperType(dBPediaType2, next)) {
                        hashMap.put(dBPediaType2, next);
                        break;
                    }
                }
            }
        }
        return hashMap;
    }

    public PrefixTree<String, NERInfoSet> getPrefixTree(AbstractTokenizer abstractTokenizer, boolean z) {
        PrefixTree<String, NERInfoSet> prefixTree = new PrefixTree<>();
        for (Map.Entry<String, DBPediaInfo> entry : this.info_map.entrySet()) {
            DBPediaInfo value = entry.getValue();
            NERInfoSet nERInfoSet = getNERInfoSet(entry.getKey(), value.getTypes());
            if (nERInfoSet != null) {
                addAliases(abstractTokenizer, prefixTree, value.getAliases(), nERInfoSet, z);
            }
        }
        return prefixTree;
    }

    private NERInfoSet getNERInfoSet(String str, Set<DBPediaType> set) {
        HashSet hashSet = new HashSet();
        Iterator<DBPediaType> it = set.iterator();
        while (it.hasNext()) {
            DBPediaType dBPediaType = this.super_type_map.get(it.next());
            if (dBPediaType != null) {
                hashSet.add(dBPediaType);
            }
        }
        if (hashSet.isEmpty()) {
            return null;
        }
        NERInfoSet nERInfoSet = new NERInfoSet();
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            nERInfoSet.addCategory(NERTag.fromDBPediaType((DBPediaType) it2.next()));
        }
        return nERInfoSet;
    }

    private void addAliases(AbstractTokenizer abstractTokenizer, PrefixTree<String, NERInfoSet> prefixTree, Set<String> set, NERInfoSet nERInfoSet, boolean z) {
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            String[] trimTokens = trimTokens(abstractTokenizer.tokenize(it.next()), z);
            if (trimTokens.length > 0) {
                PrefixNode<String, NERInfoSet> add = prefixTree.add(trimTokens, 0, trimTokens.length, (v0) -> {
                    return v0.toString();
                });
                if (add.hasValue()) {
                    add.getValue().addCategories(nERInfoSet.getCategorySet());
                } else {
                    add.setValue(nERInfoSet);
                }
            }
        }
    }

    private String[] trimTokens(List<String> list, boolean z) {
        int i = -1;
        for (int size = list.size() - 1; size >= 0 && StringUtils.containsDigitOnly(list.get(size)); size--) {
            list.remove(size);
        }
        int size2 = list.size();
        int i2 = 0;
        while (true) {
            if (i2 >= size2) {
                break;
            }
            if (list.get(i2).equals(StringConst.LRB)) {
                i = i2;
            } else if (list.get(i2).equals(StringConst.RRB) && i >= 0) {
                list.subList(i, i2 + 1).clear();
                break;
            }
            i2++;
        }
        if (list.size() == 1 && StringUtils.containsDigitOnly(list.get(0))) {
            list.clear();
        }
        int size3 = list.size();
        String[] strArr = new String[size3];
        for (int i3 = 0; i3 < size3; i3++) {
            strArr[i3] = StringUtils.toSimplifiedForm(list.get(i3));
            if (z) {
                strArr[i3] = StringUtils.toLowerCase(strArr[i3]);
            }
        }
        return strArr;
    }

    public static void main(String[] strArr) throws Exception {
        String str;
        String str2 = strArr[0];
        String str3 = strArr[1];
        String str4 = strArr[2];
        Gson gson = new Gson();
        PrefixTree<String, NERInfoSet> prefixTree = new PrefixTreeGenerator((DBPediaTypeMap) gson.fromJson(new InputStreamReader(IOUtils.createXZBufferedInputStream(str2)), DBPediaTypeMap.class), (DBPediaInfoMap) gson.fromJson(new InputStreamReader(IOUtils.createXZBufferedInputStream(str3)), DBPediaInfoMap.class), NERTag.DBPediaTypeSet).getPrefixTree(NLPUtils.getTokenizer(TLanguage.ENGLISH), true);
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(IOUtils.createXZBufferedOutputStream(str4));
        objectOutputStream.writeObject(prefixTree);
        objectOutputStream.close();
        str = "John Emory Democratic Party London Bridge Emory University South Korea Rocky Mountains M16 New Years Eve The Catcher in the Rye Korean Ming Dynasty Euro";
        String[] split = (1 != 0 ? StringUtils.toLowerCase(str) : "John Emory Democratic Party London Bridge Emory University South Korea Rocky Mountains M16 New Years Eve The Catcher in the Rye Korean Ming Dynasty Euro").split(" ");
        for (ObjectIntIntTriple<NERInfoSet> objectIntIntTriple : prefixTree.getAll(split, 0, (v0) -> {
            return v0.toString();
        }, true, true)) {
            System.out.println(objectIntIntTriple.o.joinTags("_") + " " + Joiner.join(split, " ", objectIntIntTriple.i1, objectIntIntTriple.i2 + 1));
        }
    }
}
