package edu.umd.cloud9.example.bigram;

import com.google.common.collect.Iterators;
import edu.umd.cloud9.io.SequenceFileUtils;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ToolRunner;
import tl.lin.data.pair.PairOfWritables;

/* loaded from: input_file:edu/umd/cloud9/example/bigram/AnalyzeBigramCount.class */
public class AnalyzeBigramCount {
    private static final String INPUT = "input";

    public static void main(String[] strArr) {
        Options options = new Options();
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("input path");
        options.addOption(OptionBuilder.create("input"));
        CommandLine commandLine = null;
        try {
            commandLine = new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            System.exit(-1);
        }
        if (!commandLine.hasOption("input")) {
            System.out.println("args: " + Arrays.toString(strArr));
            HelpFormatter helpFormatter = new HelpFormatter();
            helpFormatter.setWidth(120);
            helpFormatter.printHelp(AnalyzeBigramCount.class.getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            System.exit(-1);
        }
        String optionValue = commandLine.getOptionValue("input");
        System.out.println("input path: " + optionValue);
        List<PairOfWritables> readDirectory = SequenceFileUtils.readDirectory(new Path(optionValue));
        Collections.sort(readDirectory, new Comparator<PairOfWritables<Text, IntWritable>>() { // from class: edu.umd.cloud9.example.bigram.AnalyzeBigramCount.1
            @Override // java.util.Comparator
            public int compare(PairOfWritables<Text, IntWritable> pairOfWritables, PairOfWritables<Text, IntWritable> pairOfWritables2) {
                return pairOfWritables2.getRightElement().compareTo(pairOfWritables.getRightElement()) == 0 ? pairOfWritables.getLeftElement().compareTo(pairOfWritables2.getLeftElement()) : pairOfWritables2.getRightElement().compareTo(pairOfWritables.getRightElement());
            }
        });
        int i = 0;
        int i2 = 0;
        for (PairOfWritables pairOfWritables : readDirectory) {
            i2 += pairOfWritables.getRightElement().get();
            if (pairOfWritables.getRightElement().get() == 1) {
                i++;
            }
        }
        System.out.println("total number of unique bigrams: " + readDirectory.size());
        System.out.println("total number of bigrams: " + i2);
        System.out.println("number of bigrams that appear only once: " + i);
        System.out.println("\nten most frequent bigrams: ");
        Iterator limit = Iterators.limit(readDirectory.iterator(), 10);
        while (limit.hasNext()) {
            PairOfWritables pairOfWritables2 = (PairOfWritables) limit.next();
            System.out.println(pairOfWritables2.getLeftElement() + "\t" + pairOfWritables2.getRightElement());
        }
    }
}
