package water.rapids;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import org.apache.commons.io.FileUtils;
import water.MRTask;
import water.fvec.C0DChunk;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.parser.BufferedString;
import water.rapids.Env;

/* compiled from: ASTStrOp.java */
/* loaded from: input_file:water/rapids/ASTCountSubstringsWords.class */
class ASTCountSubstringsWords extends ASTPrim {
    @Override // water.rapids.ASTPrim
    public String[] args() {
        return new String[]{"ary", "words"};
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // water.rapids.AST
    public int nargs() {
        return 3;
    }

    @Override // water.rapids.AST
    public String str() {
        return "num_valid_substrings";
    }

    @Override // water.rapids.AST
    public ValFrame apply(Env env, Env.StackHelp stackHelp, AST[] astArr) {
        Frame frame = stackHelp.track(astArr[1].exec(env)).getFrame();
        String str = astArr[2].exec(env).getStr();
        for (Vec vec : frame.vecs()) {
            if (!vec.isCategorical() && !vec.isString()) {
                throw new IllegalArgumentException("num_valid_substrings() requires a string or categorical column. Received " + frame.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
            }
        }
        HashSet<String> hashSet = null;
        try {
            hashSet = new HashSet<>(FileUtils.readLines(new File(str)));
        } catch (IOException e) {
            e.printStackTrace();
        }
        Vec[] vecArr = new Vec[frame.numCols()];
        int i = 0;
        for (Vec vec2 : frame.vecs()) {
            if (vec2.isCategorical()) {
                vecArr[i] = countSubstringsWordsCategoricalCol(vec2, hashSet);
            } else {
                vecArr[i] = countSubstringsWordsStringCol(vec2, hashSet);
            }
            i++;
        }
        return new ValFrame(new Frame(vecArr));
    }

    private Vec countSubstringsWordsCategoricalCol(Vec vec, final HashSet<String> hashSet) {
        return new MRTask() { // from class: water.rapids.ASTCountSubstringsWords.1
            transient double[] catCounts;

            @Override // water.MRTask
            public void setupLocal() {
                String[] domain = this._fr.anyVec().domain();
                this.catCounts = new double[domain.length];
                for (int i = 0; i < domain.length; i++) {
                    this.catCounts[i] = ASTCountSubstringsWords.this.calcCountSubstringsWords(domain[i], hashSet);
                }
            }

            @Override // water.MRTask
            public void map(Chunk chunk, NewChunk newChunk) {
                newChunk.alloc_doubles(chunk._len);
                for (int i = 0; i < chunk._len; i++) {
                    if (chunk.isNA(i)) {
                        newChunk.addNA();
                    } else {
                        newChunk.addNum(this.catCounts[(int) chunk.atd(i)]);
                    }
                }
            }
        }.doAll(1, (byte) 3, new Frame(vec)).outputFrame().anyVec();
    }

    private Vec countSubstringsWordsStringCol(Vec vec, final HashSet<String> hashSet) {
        return new MRTask() { // from class: water.rapids.ASTCountSubstringsWords.2
            @Override // water.MRTask
            public void map(Chunk chunk, NewChunk newChunk) {
                if (chunk instanceof C0DChunk) {
                    newChunk.addNAs(chunk.len());
                    return;
                }
                BufferedString bufferedString = new BufferedString();
                for (int i = 0; i < chunk._len; i++) {
                    if (chunk.isNA(i)) {
                        newChunk.addNA();
                    } else {
                        newChunk.addNum(ASTCountSubstringsWords.this.calcCountSubstringsWords(chunk.atStr(bufferedString, i).toString(), hashSet));
                    }
                }
            }
        }.doAll(new byte[]{3}, vec).outputFrame().anyVec();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public int calcCountSubstringsWords(String str, HashSet<String> hashSet) {
        int i = 0;
        int length = str.length();
        for (int i2 = 0; i2 < length - 1; i2++) {
            for (int i3 = i2 + 2; i3 < length + 1; i3++) {
                if (hashSet.contains(str.substring(i2, i3))) {
                    i++;
                }
            }
        }
        return i;
    }
}
