package net.sf.okapi.steps.tokenization.engine;

import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.steps.tokenization.common.AbstractLexer;
import net.sf.okapi.steps.tokenization.common.Lexem;
import net.sf.okapi.steps.tokenization.common.Lexems;
import net.sf.okapi.steps.tokenization.common.LexerRule;
import net.sf.okapi.steps.tokenization.common.LexerRules;
import net.sf.okapi.steps.tokenization.common.RegexRule;
import net.sf.okapi.steps.tokenization.common.RegexRules;
import net.sf.okapi.steps.tokenization.tokens.Tokens;

/* loaded from: input_file:net/sf/okapi/steps/tokenization/engine/TextScanner.class */
public class TextScanner extends AbstractLexer {
    private LinkedList<Lexem> queue;
    private LinkedList<LexerRule> rulesQueue;
    private boolean hasNext;
    private LexerRules rules;
    private LinkedHashMap<LexerRule, Pattern> patterns;
    private String text;
    private LocaleId language;

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected Class<? extends LexerRules> lexer_getRulesClass() {
        return RegexRules.class;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected boolean lexer_hasNext() {
        return this.hasNext;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected void lexer_init() {
        this.queue = new LinkedList<>();
        this.rulesQueue = new LinkedList<>();
        this.patterns = new LinkedHashMap<>();
        this.rules = getRules();
        Iterator<LexerRule> it = this.rules.iterator();
        while (it.hasNext()) {
            RegexRule regexRule = (RegexRule) it.next();
            Pattern pattern = null;
            if (regexRule.getPattern() != null) {
                pattern = Pattern.compile(regexRule.getPattern(), regexRule.getRegexOptions());
            }
            this.patterns.put(regexRule, pattern);
        }
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected Lexem lexer_next() {
        Pattern pattern;
        if (!this.queue.isEmpty()) {
            return this.queue.poll();
        }
        do {
            RegexRule regexRule = null;
            if (!this.rulesQueue.isEmpty()) {
                regexRule = (RegexRule) this.rulesQueue.poll();
            }
            while (regexRule != null && !checkRule(regexRule, this.language)) {
                regexRule = this.rulesQueue.isEmpty() ? null : (RegexRule) this.rulesQueue.poll();
            }
            if (regexRule != null && (pattern = this.patterns.get(regexRule)) != null) {
                Matcher matcher = pattern.matcher(this.text);
                int regexGroup = regexRule.getRegexGroup();
                while (matcher.find()) {
                    int start = matcher.start(regexGroup);
                    int end = matcher.end(regexGroup);
                    if (start > -1 && end > -1) {
                        this.queue.add(new Lexem(regexRule.getLexemId(), matcher.group(regexGroup), start, end));
                    }
                }
                return this.queue.poll();
            }
            if (this.rulesQueue.isEmpty()) {
                break;
            }
        } while (this.queue.isEmpty());
        this.hasNext = false;
        return null;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected void lexer_open(String str, LocaleId localeId, Tokens tokens) {
        this.text = str;
        this.language = localeId;
        this.queue.clear();
        this.rulesQueue.clear();
        this.rulesQueue.addAll(this.rules);
        this.hasNext = true;
    }

    @Override // net.sf.okapi.steps.tokenization.common.ILexer
    public Lexems process(String str, LocaleId localeId, Tokens tokens) {
        return null;
    }
}
