package edu.umd.cloud9.collection.wikipedia.graph;

import edu.umd.cloud9.collection.wikipedia.WikipediaPage;
import edu.umd.cloud9.io.pair.PairOfStringInt;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph.class */
public class ExtractWikipediaLinkGraph extends Configured implements Tool {
    private static final Logger LOG = Logger.getLogger(ExtractWikipediaLinkGraph.class);
    private static final String INPUT_OPTION = "input";
    private static final String EDGES_OUTPUT_OPTION = "edges_output";
    private static final String ADJ_OUTPUT_OPTION = "adjacency_list_output";
    private static final String NUM_PARTITIONS_OPTION = "num_partitions";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$GraphInfo.class */
    public enum GraphInfo {
        TOTAL_VERTICES,
        VERTICES_WITH_OUTLINKS,
        EDGES;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static GraphInfo[] valuesCustom() {
            GraphInfo[] valuesCustom = values();
            int length = valuesCustom.length;
            GraphInfo[] graphInfoArr = new GraphInfo[length];
            System.arraycopy(valuesCustom, 0, graphInfoArr, 0, length);
            return graphInfoArr;
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$MyMapper1.class */
    private static class MyMapper1 extends MapReduceBase implements Mapper<IntWritable, WikipediaPage, PairOfStringInt, Text> {
        private static Text text = new Text();
        private static PairOfStringInt pair = new PairOfStringInt();

        private MyMapper1() {
        }

        public void map(IntWritable intWritable, WikipediaPage wikipediaPage, OutputCollector<PairOfStringInt, Text> outputCollector, Reporter reporter) throws IOException {
            reporter.incrCounter(PageTypes.TOTAL, 1L);
            String title = wikipediaPage.getTitle();
            text.set(wikipediaPage.getDocid());
            pair.set(title, 0);
            outputCollector.collect(pair, text);
            String substring = title.substring(0, 1);
            if (substring.matches("[A-Z]")) {
                pair.set(title.replaceFirst(substring, substring.toLowerCase()), 0);
                outputCollector.collect(pair, text);
            }
            if (wikipediaPage.isRedirect()) {
                reporter.incrCounter(PageTypes.REDIRECT, 1L);
            } else if (wikipediaPage.isDisambiguation()) {
                reporter.incrCounter(PageTypes.DISAMBIGUATION, 1L);
            } else if (wikipediaPage.isEmpty()) {
                reporter.incrCounter(PageTypes.EMPTY, 1L);
            } else if (wikipediaPage.isArticle()) {
                reporter.incrCounter(PageTypes.ARTICLE, 1L);
                if (wikipediaPage.isStub()) {
                    reporter.incrCounter(PageTypes.STUB, 1L);
                }
            } else {
                reporter.incrCounter(PageTypes.NON_ARTICLE, 1L);
            }
            Iterator<String> it = wikipediaPage.extractLinkTargets().iterator();
            while (it.hasNext()) {
                pair.set(it.next(), 1);
                text.set(wikipediaPage.getDocid());
                outputCollector.collect(pair, text);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (WikipediaPage) obj2, (OutputCollector<PairOfStringInt, Text>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$MyMapper2.class */
    private static class MyMapper2 extends MapReduceBase implements Mapper<LongWritable, Text, IntWritable, IntWritable> {
        private static IntWritable keyOut = new IntWritable();
        private static IntWritable valOut = new IntWritable();

        private MyMapper2() {
        }

        public void map(LongWritable longWritable, Text text, OutputCollector<IntWritable, IntWritable> outputCollector, Reporter reporter) throws IOException {
            String[] split = text.toString().split("\\s+");
            keyOut.set(Integer.parseInt(split[0]));
            valOut.set(Integer.parseInt(split[1]));
            outputCollector.collect(keyOut, valOut);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (Text) obj2, (OutputCollector<IntWritable, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$MyPartitioner1.class */
    private static class MyPartitioner1 implements Partitioner<PairOfStringInt, Text> {
        private MyPartitioner1() {
        }

        public void configure(JobConf jobConf) {
        }

        public int getPartition(PairOfStringInt pairOfStringInt, Text text, int i) {
            return (pairOfStringInt.getLeftElement().hashCode() & Integer.MAX_VALUE) % i;
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$MyReducer1.class */
    private static class MyReducer1 extends MapReduceBase implements Reducer<PairOfStringInt, Text, IntWritable, IntWritable> {
        private static final IntWritable finalSrc = new IntWritable();
        private static final IntWritable finalDest = new IntWritable();
        private static String curArticle;
        private static int curDocid;

        private MyReducer1() {
        }

        public void reduce(PairOfStringInt pairOfStringInt, Iterator<Text> it, OutputCollector<IntWritable, IntWritable> outputCollector, Reporter reporter) throws IOException {
            if (pairOfStringInt.getRightElement() == 0) {
                curArticle = pairOfStringInt.getLeftElement();
                curDocid = Integer.parseInt(it.next().toString());
                finalSrc.set(curDocid);
                finalDest.set(curDocid);
                outputCollector.collect(finalSrc, finalDest);
                return;
            }
            if (pairOfStringInt.getLeftElement().equals(curArticle)) {
                while (it.hasNext()) {
                    finalSrc.set(Integer.parseInt(it.next().toString()));
                    finalDest.set(curDocid);
                    outputCollector.collect(finalSrc, finalDest);
                }
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PairOfStringInt) obj, (Iterator<Text>) it, (OutputCollector<IntWritable, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$MyReducer2.class */
    private static class MyReducer2 extends MapReduceBase implements Reducer<IntWritable, IntWritable, IntWritable, Text> {
        private static final Text text = new Text();

        private MyReducer2() {
        }

        public void reduce(IntWritable intWritable, Iterator<IntWritable> it, OutputCollector<IntWritable, Text> outputCollector, Reporter reporter) throws IOException {
            StringBuilder sb = new StringBuilder();
            HashSet hashSet = new HashSet();
            reporter.incrCounter(GraphInfo.TOTAL_VERTICES, 1L);
            while (it.hasNext()) {
                IntWritable next = it.next();
                if (next.get() != intWritable.get() && !hashSet.contains(Integer.valueOf(next.get()))) {
                    hashSet.add(Integer.valueOf(next.get()));
                    reporter.incrCounter(GraphInfo.EDGES, 1L);
                    sb.append(next.get());
                    sb.append("\t");
                }
            }
            if (hashSet.size() != 0) {
                reporter.incrCounter(GraphInfo.VERTICES_WITH_OUTLINKS, 1L);
            }
            text.set(sb.toString());
            outputCollector.collect(intWritable, text);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((IntWritable) obj, (Iterator<IntWritable>) it, (OutputCollector<IntWritable, Text>) outputCollector, reporter);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/graph/ExtractWikipediaLinkGraph$PageTypes.class */
    public enum PageTypes {
        TOTAL,
        REDIRECT,
        DISAMBIGUATION,
        EMPTY,
        ARTICLE,
        STUB,
        NON_ARTICLE;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static PageTypes[] valuesCustom() {
            PageTypes[] valuesCustom = values();
            int length = valuesCustom.length;
            PageTypes[] pageTypesArr = new PageTypes[length];
            System.arraycopy(valuesCustom, 0, pageTypesArr, 0, length);
            return pageTypesArr;
        }
    }

    public int run(String[] strArr) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("input");
        options.addOption(OptionBuilder.create("input"));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("output for edges");
        options.addOption(OptionBuilder.create(EDGES_OUTPUT_OPTION));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("output for adjacency list");
        options.addOption(OptionBuilder.create(ADJ_OUTPUT_OPTION));
        OptionBuilder.withArgName("num");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("number of partitions");
        options.addOption(OptionBuilder.create(NUM_PARTITIONS_OPTION));
        try {
            CommandLine parse = new GnuParser().parse(options, strArr);
            if (!parse.hasOption("input") || !parse.hasOption(EDGES_OUTPUT_OPTION) || !parse.hasOption(ADJ_OUTPUT_OPTION) || !parse.hasOption(NUM_PARTITIONS_OPTION)) {
                new HelpFormatter().printHelp(getClass().getName(), options);
                ToolRunner.printGenericCommandUsage(System.out);
                return -1;
            }
            int parseInt = Integer.parseInt(parse.getOptionValue(NUM_PARTITIONS_OPTION));
            task1(parse.getOptionValue("input"), parse.getOptionValue(EDGES_OUTPUT_OPTION), parseInt);
            task2(parse.getOptionValue(EDGES_OUTPUT_OPTION), parse.getOptionValue(ADJ_OUTPUT_OPTION), parseInt);
            return 0;
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            return -1;
        }
    }

    private void task1(String str, String str2, int i) throws IOException {
        LOG.info("Exracting edges...");
        LOG.info(" - input: " + str);
        LOG.info(" - output: " + str2);
        JobConf jobConf = new JobConf(getConf(), ExtractWikipediaLinkGraph.class);
        jobConf.setJobName(String.format("ExtractWikipediaLinkGraph:Edges[input: %s, output: %s, num_partitions: %d]", str, str2, Integer.valueOf(i)));
        jobConf.setNumReduceTasks(i);
        SequenceFileInputFormat.addInputPath(jobConf, new Path(str));
        TextOutputFormat.setOutputPath(jobConf, new Path(str2));
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(TextOutputFormat.class);
        jobConf.setMapOutputKeyClass(PairOfStringInt.class);
        jobConf.setMapOutputValueClass(Text.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(IntWritable.class);
        jobConf.setMapperClass(MyMapper1.class);
        jobConf.setReducerClass(MyReducer1.class);
        jobConf.setPartitionerClass(MyPartitioner1.class);
        FileSystem.get(jobConf).delete(new Path(str2), true);
        JobClient.runJob(jobConf);
    }

    private void task2(String str, String str2, int i) throws IOException {
        LOG.info("Building adjacency lists...");
        LOG.info(" - input: " + str);
        LOG.info(" - output: " + str2);
        JobConf jobConf = new JobConf(getConf(), ExtractWikipediaLinkGraph.class);
        jobConf.setJobName(String.format("ExtractWikipediaLinkGraph:AdjacencyList[input: %s, output: %s, num_partitions: %d]", str, str2, Integer.valueOf(i)));
        jobConf.setNumReduceTasks(i);
        TextInputFormat.addInputPath(jobConf, new Path(str));
        TextOutputFormat.setOutputPath(jobConf, new Path(str2));
        jobConf.setInputFormat(TextInputFormat.class);
        jobConf.setOutputFormat(TextOutputFormat.class);
        jobConf.setMapOutputKeyClass(IntWritable.class);
        jobConf.setMapOutputValueClass(IntWritable.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setMapperClass(MyMapper2.class);
        jobConf.setReducerClass(MyReducer2.class);
        FileSystem.get(jobConf).delete(new Path(str2), true);
        JobClient.runJob(jobConf);
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new ExtractWikipediaLinkGraph(), strArr));
    }
}
