package edu.emory.clir.clearnlp.util;

import edu.emory.clir.clearnlp.collection.map.ObjectIntHashMap;
import edu.emory.clir.clearnlp.collection.ngram.Unigram;
import edu.emory.clir.clearnlp.collection.pair.ObjectIntPair;
import edu.emory.clir.clearnlp.util.constant.PatternConst;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/emory/clir/clearnlp/util/TFIDF.class */
public class TFIDF {
    Unigram<String> term_frequencies = new Unigram<>();
    Unigram<String> document_frequencies = new Unigram<>();

    public static ObjectIntHashMap<String> getDocumentFrequencyCounts(List<String> list) throws FileNotFoundException {
        ObjectIntHashMap<String> objectIntHashMap = new ObjectIntHashMap<>();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            Iterator<String> it2 = DSUtils.getBagOfWords(new FileInputStream(it.next()), PatternConst.WHITESPACES).iterator();
            while (it2.hasNext()) {
                objectIntHashMap.add(it2.next());
            }
        }
        return objectIntHashMap;
    }

    public static void main(String[] strArr) throws FileNotFoundException {
        List<String> fileList = FileUtils.getFileList(strArr[0], ".txt", false);
        List<ObjectIntPair<String>> list = getDocumentFrequencyCounts(fileList).toList();
        DSUtils.sortReverseOrder(list);
        PrintStream createBufferedPrintStream = IOUtils.createBufferedPrintStream(strArr[1]);
        int size = fileList.size();
        System.out.println(size);
        for (ObjectIntPair<String> objectIntPair : list) {
            createBufferedPrintStream.printf("%s\t%d\t%6.4f\n", objectIntPair.o, Integer.valueOf(objectIntPair.i), Double.valueOf(MathUtils.divide(objectIntPair.i, size)));
        }
    }
}
