package dragon.ml.seqmodel.feature;

import dragon.ml.seqmodel.data.BasicToken;
import dragon.ml.seqmodel.data.DataSequence;
import dragon.ml.seqmodel.data.Dataset;

/* loaded from: input_file:dragon/ml/seqmodel/feature/FeatureTypeWord.class */
public class FeatureTypeWord extends AbstractFeatureType {
    public static int RARE_THRESHOLD = 0;
    private FeatureDictionary dict;
    private String wordFile;
    private int stateNum;
    private int curState;
    private String token;
    private int tokenId;
    private boolean caseSensitive;

    public FeatureTypeWord(String str, int i) {
        this(str, i, false);
    }

    public FeatureTypeWord(String str, int i, boolean z) {
        super(true);
        this.caseSensitive = z;
        this.stateNum = i;
        this.wordFile = str;
        this.dict = new FeatureDictionaryChar(i, 500);
        this.idPrefix = "W_";
    }

    public FeatureTypeWord(FeatureDictionary featureDictionary, int i) {
        this(featureDictionary, i, false);
    }

    public FeatureTypeWord(FeatureDictionary featureDictionary, int i, boolean z) {
        super(false);
        this.caseSensitive = z;
        this.stateNum = i;
        this.wordFile = null;
        this.dict = featureDictionary;
        this.idPrefix = "W_";
    }

    public FeatureDictionary getWordDictionary() {
        return this.dict;
    }

    @Override // dragon.ml.seqmodel.feature.FeatureType
    public boolean startScanFeaturesAt(DataSequence dataSequence, int i, int i2) {
        this.curState = -1;
        if (i != i2) {
            System.out.println("The starting position and the ending position should be the same for word features");
            return false;
        }
        this.tokenId = dataSequence.getToken(i2).getIndex();
        if (this.tokenId < 0) {
            this.token = dataSequence.getToken(i2).getContent();
            if (!this.caseSensitive) {
                this.token = this.token.toLowerCase();
            }
            this.tokenId = this.dict.getIndex(this.token);
        }
        if (this.tokenId < 0 || this.dict.getCount(this.tokenId) <= RARE_THRESHOLD) {
            return false;
        }
        getNextState();
        return true;
    }

    @Override // dragon.ml.seqmodel.feature.FeatureType
    public boolean hasNext() {
        return this.curState >= 0 && this.curState < this.stateNum;
    }

    @Override // dragon.ml.seqmodel.feature.FeatureType
    public Feature next() {
        BasicFeature basicFeature = new BasicFeature(new FeatureIdentifier(this.idPrefix + this.token, (this.tokenId * this.stateNum) + this.curState, this.curState), this.curState, 1.0d);
        getNextState();
        return basicFeature;
    }

    @Override // dragon.ml.seqmodel.feature.AbstractFeatureType, dragon.ml.seqmodel.feature.FeatureType
    public boolean train(Dataset dataset) {
        dataset.startScan();
        while (dataset.hasNext()) {
            DataSequence next = dataset.next();
            for (int i = 0; i < next.length(); i++) {
                if (i >= 0 && i < next.length()) {
                    BasicToken token = next.getToken(i);
                    token.setIndex(this.caseSensitive ? this.dict.addFeature(token.getContent(), next.getLabel(i)) : this.dict.addFeature(token.getContent().toLowerCase(), next.getLabel(i)));
                }
            }
        }
        this.dict.finalize();
        return true;
    }

    @Override // dragon.ml.seqmodel.feature.AbstractFeatureType, dragon.ml.seqmodel.feature.FeatureType
    public boolean readTrainingResult() {
        return this.dict.read(this.wordFile);
    }

    @Override // dragon.ml.seqmodel.feature.AbstractFeatureType, dragon.ml.seqmodel.feature.FeatureType
    public boolean saveTrainingResult() {
        return this.dict.write(this.wordFile);
    }

    private void getNextState() {
        if (needTraining()) {
            this.curState = this.dict.getNextStateWithFeature(this.tokenId, this.curState);
        } else {
            this.curState++;
        }
    }

    @Override // dragon.ml.seqmodel.feature.AbstractFeatureType, dragon.ml.seqmodel.feature.FeatureType
    public boolean supportSegment() {
        return false;
    }
}
