package edu.emory.clir.clearnlp.experiment;

import edu.emory.clir.clearnlp.collection.pair.DoubleIntPair;
import edu.emory.clir.clearnlp.collection.pair.ObjectIntPair;
import edu.emory.clir.clearnlp.tokenization.EnglishTokenizer;
import edu.emory.clir.clearnlp.util.IOUtils;
import edu.emory.clir.clearnlp.util.Splitter;
import edu.emory.clir.clearnlp.util.StringUtils;
import edu.emory.clir.clearnlp.util.constant.PatternConst;
import edu.emory.clir.clearnlp.vector.Term;
import edu.emory.clir.clearnlp.vector.VectorSpaceModel;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.StringJoiner;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/* loaded from: input_file:edu/emory/clir/clearnlp/experiment/SymbolStrip.class */
public class SymbolStrip {
    final int[] CATEGORIES = {2, 3, 6, 8, 10, 12, 13, 18};
    private List<List<DoubleIntPair>> category_list = (List) IntStream.range(0, this.CATEGORIES.length).mapToObj(i -> {
        return new ArrayList();
    }).collect(Collectors.toList());
    private VectorSpaceModel vs_model = new VectorSpaceModel();
    List<List<Term>> train_vectors;

    public void initVectors(InputStream inputStream, int i, BiFunction<Term, Integer, Double> biFunction, boolean z) throws Exception {
        int i2;
        BufferedReader createBufferedReader = IOUtils.createBufferedReader(inputStream);
        EnglishTokenizer englishTokenizer = new EnglishTokenizer();
        ArrayList arrayList = new ArrayList();
        int length = this.CATEGORIES.length;
        createBufferedReader.readLine();
        int i3 = 0;
        while (true) {
            String readLine = createBufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String[] split = PatternConst.TAB.split(StringUtils.toLowerCase(readLine));
            List<String> stripPunctuation = StringUtils.stripPunctuation(englishTokenizer.tokenize(split[1]));
            if (stripPunctuation.isEmpty()) {
                System.err.println("Empty document: " + i3);
            } else {
                int size = arrayList.size();
                arrayList.add(stripPunctuation);
                for (int i4 = 0; i4 < length && (i2 = this.CATEGORIES[i4]) < split.length; i4++) {
                    String str = split[i2];
                    if (!str.isEmpty()) {
                        this.category_list.get(i4).add(new DoubleIntPair(Double.parseDouble(str), size));
                    }
                }
            }
            i3++;
        }
        createBufferedReader.close();
        this.train_vectors = this.vs_model.toTFIDFs(arrayList, i, biFunction);
        if (z) {
            Iterator<List<DoubleIntPair>> it = this.category_list.iterator();
            while (it.hasNext()) {
                normalize(it.next());
            }
        }
    }

    private void normalize(List<DoubleIntPair> list) {
        double d = list.get(0).d;
        double d2 = list.get(0).d;
        int size = list.size();
        for (int i = 1; i < size; i++) {
            d = Math.max(d, list.get(i).d);
            d2 = Math.min(d2, list.get(i).d);
        }
        for (int i2 = 0; i2 < size; i2++) {
            DoubleIntPair doubleIntPair = list.get(i2);
            doubleIntPair.d = (doubleIntPair.d - d2) / (d - d2);
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void measureCategories(InputStream inputStream, OutputStream outputStream, int i, BiFunction<Term, Integer, Double> biFunction) throws Exception {
        HashMap hashMap = new HashMap();
        BufferedReader createBufferedReader = IOUtils.createBufferedReader(inputStream);
        int length = this.CATEGORIES.length;
        while (true) {
            String readLine = createBufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String[] splitTabs = Splitter.splitTabs(readLine);
            ObjectIntPair objectIntPair = (ObjectIntPair) hashMap.computeIfAbsent(splitTabs[0], str -> {
                return new ObjectIntPair(new double[length], 0);
            });
            List<Term> tFIDFs = this.vs_model.getTFIDFs(StringUtils.stripPunctuation(Splitter.splitSpace(splitTabs[2])), i, biFunction);
            if (!tFIDFs.isEmpty()) {
                for (int i2 = 0; i2 < length; i2++) {
                    double[] dArr = (double[]) objectIntPair.o;
                    int i3 = i2;
                    dArr[i3] = dArr[i3] + getScore(this.category_list.get(i2), tFIDFs, i, biFunction);
                }
                objectIntPair.i++;
            }
        }
        createBufferedReader.close();
        PrintStream createBufferedPrintStream = IOUtils.createBufferedPrintStream(outputStream);
        ArrayList<String> arrayList = new ArrayList(hashMap.keySet());
        StringJoiner stringJoiner = new StringJoiner(",");
        Collections.sort(arrayList);
        stringJoiner.add("State");
        for (int i4 = 0; i4 < length; i4++) {
            stringJoiner.add(Integer.toString(this.CATEGORIES[i4]));
        }
        createBufferedPrintStream.println(stringJoiner.toString());
        for (String str2 : arrayList) {
            StringJoiner stringJoiner2 = new StringJoiner(",");
            ObjectIntPair objectIntPair2 = (ObjectIntPair) hashMap.get(str2);
            stringJoiner2.add(str2);
            for (int i5 = 0; i5 < length; i5++) {
                stringJoiner2.add(Double.toString(((double[]) objectIntPair2.o)[i5] / objectIntPair2.i));
            }
            createBufferedPrintStream.println(stringJoiner2.toString());
        }
        createBufferedPrintStream.close();
    }

    private double getScore(List<DoubleIntPair> list, List<Term> list2, int i, BiFunction<Term, Integer, Double> biFunction) {
        double d = 0.0d;
        for (DoubleIntPair doubleIntPair : list) {
            d += VectorSpaceModel.getCosineSimilarity(list2, this.train_vectors.get(doubleIntPair.i)) * doubleIntPair.d;
        }
        return d / list.size();
    }

    public void split(String str, String str2) throws Exception {
        PrintStream createBufferedPrintStream = IOUtils.createBufferedPrintStream(str2);
        BufferedReader createBufferedReader = IOUtils.createBufferedReader(str);
        int i = 0;
        while (true) {
            String readLine = createBufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            if (i % 10000 == 0) {
                System.out.print(".");
            }
            String[] splitTabs = Splitter.splitTabs(readLine.trim());
            if (!StringUtils.containsPunctuationOrDigitsOrWhiteSpacesOnly(splitTabs[1])) {
                createBufferedPrintStream.println(splitTabs[0] + "\t" + i + "\t" + splitTabs[1]);
            }
            i++;
        }
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr[0];
        String str2 = strArr[1];
        int parseInt = Integer.parseInt(strArr[2]);
        boolean parseBoolean = Boolean.parseBoolean(strArr[3]);
        String str3 = str + "/mind_wandering_and_axiety.txt";
        String str4 = str + "/tweetsByStateSplittedCleaned.csv.out";
        String str5 = str + "/" + str2 + "-" + parseInt + "-" + parseBoolean + ".csv";
        BiFunction<Term, Integer, Double> biFunction = str2.equals("tf") ? (v0, v1) -> {
            return VectorSpaceModel.getTFIDF(v0, v1);
        } : (v0, v1) -> {
            return VectorSpaceModel.getWFIDF(v0, v1);
        };
        SymbolStrip symbolStrip = new SymbolStrip();
        symbolStrip.initVectors(IOUtils.createFileInputStream(str3), parseInt, biFunction, parseBoolean);
        symbolStrip.measureCategories(IOUtils.createFileInputStream(str4), IOUtils.createFileOutputStream(str5), parseInt, biFunction);
    }
}
