package edu.umd.cloud9.collection.wikipedia;

import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/CountWikipediaPages.class */
public class CountWikipediaPages extends Configured implements Tool {
    private static final Logger LOG = Logger.getLogger(CountWikipediaPages.class);
    private static final String INPUT_OPTION = "input";
    private static final String LANGUAGE_OPTION = "wiki_language";

    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/CountWikipediaPages$MyMapper.class */
    private static class MyMapper extends Mapper<LongWritable, WikipediaPage, Text, IntWritable> {
        private MyMapper() {
        }

        public void map(LongWritable longWritable, WikipediaPage wikipediaPage, Mapper<LongWritable, WikipediaPage, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            context.getCounter(PageTypes.TOTAL).increment(1L);
            if (wikipediaPage.isRedirect()) {
                context.getCounter(PageTypes.REDIRECT).increment(1L);
                return;
            }
            if (wikipediaPage.isDisambiguation()) {
                context.getCounter(PageTypes.DISAMBIGUATION).increment(1L);
                return;
            }
            if (wikipediaPage.isEmpty()) {
                context.getCounter(PageTypes.EMPTY).increment(1L);
                return;
            }
            if (!wikipediaPage.isArticle()) {
                context.getCounter(PageTypes.OTHER).increment(1L);
                return;
            }
            context.getCounter(PageTypes.ARTICLE).increment(1L);
            if (wikipediaPage.isStub()) {
                context.getCounter(PageTypes.STUB).increment(1L);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((LongWritable) obj, (WikipediaPage) obj2, (Mapper<LongWritable, WikipediaPage, Text, IntWritable>.Context) context);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/CountWikipediaPages$PageTypes.class */
    public enum PageTypes {
        TOTAL,
        REDIRECT,
        DISAMBIGUATION,
        EMPTY,
        ARTICLE,
        STUB,
        OTHER;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static PageTypes[] valuesCustom() {
            PageTypes[] valuesCustom = values();
            int length = valuesCustom.length;
            PageTypes[] pageTypesArr = new PageTypes[length];
            System.arraycopy(valuesCustom, 0, pageTypesArr, 0, length);
            return pageTypesArr;
        }
    }

    public int run(String[] strArr) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("XML dump file");
        options.addOption(OptionBuilder.create("input"));
        OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("two-letter language code");
        options.addOption(OptionBuilder.create("wiki_language"));
        try {
            CommandLine parse = new GnuParser().parse(options, strArr);
            if (!parse.hasOption("input")) {
                new HelpFormatter().printHelp(getClass().getName(), options);
                ToolRunner.printGenericCommandUsage(System.out);
                return -1;
            }
            String str = "en";
            if (parse.hasOption("wiki_language")) {
                str = parse.getOptionValue("wiki_language");
                if (str.length() != 2) {
                    System.err.println("Error: \"" + str + "\" unknown language!");
                    return -1;
                }
            }
            String optionValue = parse.getOptionValue("input");
            LOG.info("Tool name: " + getClass().getName());
            LOG.info(" - XML dump file: " + optionValue);
            LOG.info(" - language: " + str);
            Job job = Job.getInstance(getConf());
            job.setJarByClass(CountWikipediaPages.class);
            job.setJobName(String.format("CountWikipediaPages[%s: %s, %s: %s]", "input", optionValue, "wiki_language", str));
            job.setNumReduceTasks(0);
            FileInputFormat.setInputPaths(job, new Path[]{new Path(optionValue)});
            if (str != null) {
                job.getConfiguration().set("wiki.language", str);
            }
            job.setInputFormatClass(WikipediaPageInputFormat.class);
            job.setOutputFormatClass(NullOutputFormat.class);
            job.setMapperClass(MyMapper.class);
            job.waitForCompletion(true);
            return 0;
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            return -1;
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new CountWikipediaPages(), strArr);
    }
}
