package edu.umd.cloud9.collection.medline;

import edu.umd.cloud9.collection.DocnoMapping;
import java.io.IOException;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/medline/MedlineDocnoMappingBuilder.class */
public class MedlineDocnoMappingBuilder extends Configured implements Tool, DocnoMapping.Builder {
    private static final Logger LOG = Logger.getLogger(MedlineDocnoMappingBuilder.class);
    private static final Random random = new Random();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/medline/MedlineDocnoMappingBuilder$Count.class */
    public enum Count {
        DOCS
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/medline/MedlineDocnoMappingBuilder$MyMapper.class */
    private static class MyMapper extends Mapper<LongWritable, MedlineCitation, IntWritable, IntWritable> {
        private static final IntWritable docid = new IntWritable();
        private static final IntWritable one = new IntWritable(1);

        private MyMapper() {
        }

        public void map(LongWritable longWritable, MedlineCitation medlineCitation, Mapper<LongWritable, MedlineCitation, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException {
            context.getCounter(Count.DOCS).increment(1L);
            docid.set(Integer.parseInt(medlineCitation.getDocid()));
            context.write(docid, one);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((LongWritable) obj, (MedlineCitation) obj2, (Mapper<LongWritable, MedlineCitation, IntWritable, IntWritable>.Context) context);
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/medline/MedlineDocnoMappingBuilder$MyReducer.class */
    private static class MyReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
        private static final IntWritable cnt = new IntWritable(1);

        private MyReducer() {
        }

        public void reduce(IntWritable intWritable, Iterable<IntWritable> iterable, Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException {
            context.write(intWritable, cnt);
            cnt.set(cnt.get() + 1);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((IntWritable) obj, (Iterable<IntWritable>) iterable, (Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context) context);
        }
    }

    @Override // edu.umd.cloud9.collection.DocnoMapping.Builder
    public int build(Path path, Path path2, Configuration configuration) throws IOException {
        super.setConf(configuration);
        return run(new String[]{"-collection=" + path.toString(), "-docnoMapping=" + path2.toString()});
    }

    public int run(String[] strArr) throws IOException {
        DocnoMapping.DefaultBuilderOptions parseDefaultOptions = DocnoMapping.BuilderUtils.parseDefaultOptions(strArr);
        if (parseDefaultOptions == null) {
            return -1;
        }
        String str = "tmp-" + MedlineDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000);
        LOG.info("Tool: " + MedlineDocnoMappingBuilder.class.getCanonicalName());
        LOG.info(" - input path: " + parseDefaultOptions.collection);
        LOG.info(" - output file: " + parseDefaultOptions.docnoMapping);
        Job job = new Job(getConf(), MedlineDocnoMappingBuilder.class.getSimpleName());
        FileSystem fileSystem = FileSystem.get(job.getConfiguration());
        job.setJarByClass(MedlineDocnoMappingBuilder.class);
        job.setNumReduceTasks(1);
        FileInputFormat.setInputPaths(job, new Path[]{new Path(parseDefaultOptions.collection)});
        FileOutputFormat.setOutputPath(job, new Path(str));
        FileOutputFormat.setCompressOutput(job, false);
        job.setInputFormatClass(MedlineCitationInputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        fileSystem.delete(new Path(str), true);
        try {
            job.waitForCompletion(true);
            MedlineDocnoMapping.writeMappingData(new Path(str + (str.endsWith("/") ? "" : "/") + "/part-r-00000"), new Path(parseDefaultOptions.docnoMapping), FileSystem.get(getConf()));
            fileSystem.delete(new Path(str), true);
            return 0;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new MedlineDocnoMappingBuilder(), strArr);
    }
}
