package org.apache.tika.parser.ner.regex;

import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.tika.parser.ner.NERecogniser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:resources/install/10/tika-parsers-1.24.jar:org/apache/tika/parser/ner/regex/RegexNERecogniser.class */
public class RegexNERecogniser implements NERecogniser {
    public static final String NER_REGEX_FILE = "ner-regex.txt";
    private static Logger LOG = LoggerFactory.getLogger((Class<?>) RegexNERecogniser.class);
    public Set<String> entityTypes;
    public Map<String, Pattern> patterns;
    private boolean available;
    private static RegexNERecogniser INSTANCE;

    public RegexNERecogniser() {
        this(RegexNERecogniser.class.getResourceAsStream(NER_REGEX_FILE));
    }

    public RegexNERecogniser(InputStream inputStream) {
        this.entityTypes = new HashSet();
        this.available = false;
        try {
            this.patterns = new HashMap();
            List<String> readLines = IOUtils.readLines(inputStream, StandardCharsets.UTF_8);
            IOUtils.closeQuietly(inputStream);
            Iterator<String> it = readLines.iterator();
            while (it.hasNext()) {
                String trim = it.next().trim();
                if (!trim.isEmpty() && !trim.startsWith("#")) {
                    int indexOf = trim.indexOf(61);
                    if (indexOf < 0) {
                        LOG.error("Skipped : Invalid config : {} ", trim);
                    } else {
                        String trim2 = trim.substring(0, indexOf).trim();
                        this.patterns.put(trim2, Pattern.compile(trim.substring(indexOf + 1, trim.length()).trim()));
                        this.entityTypes.add(trim2);
                    }
                }
            }
        } catch (Exception e) {
            LOG.error(e.getMessage(), (Throwable) e);
        }
        this.available = !this.entityTypes.isEmpty();
    }

    public static synchronized RegexNERecogniser getInstance() {
        if (INSTANCE == null) {
            INSTANCE = new RegexNERecogniser();
        }
        return INSTANCE;
    }

    @Override // org.apache.tika.parser.ner.NERecogniser
    public boolean isAvailable() {
        return this.available;
    }

    @Override // org.apache.tika.parser.ner.NERecogniser
    public Set<String> getEntityTypes() {
        return this.entityTypes;
    }

    public Set<String> findMatches(String str, Pattern pattern) {
        HashSet hashSet = null;
        Matcher matcher = pattern.matcher(str);
        if (matcher.find()) {
            hashSet = new HashSet();
            hashSet.add(matcher.group(0));
            while (matcher.find()) {
                hashSet.add(matcher.group(0));
            }
        }
        return hashSet;
    }

    @Override // org.apache.tika.parser.ner.NERecogniser
    public Map<String, Set<String>> recognise(String str) {
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, Pattern> entry : this.patterns.entrySet()) {
            Set<String> findMatches = findMatches(str, entry.getValue());
            if (findMatches != null) {
                hashMap.put(entry.getKey(), findMatches);
            }
        }
        return hashMap;
    }
}
