package edu.umd.cloud9.collection.aquaint2;

import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.io.FSLineReader;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/aquaint2/BuildAquaint2ForwardIndex.class */
public class BuildAquaint2ForwardIndex extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(BuildAquaint2ForwardIndex.class);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/aquaint2/BuildAquaint2ForwardIndex$Count.class */
    public enum Count {
        DOCS
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/aquaint2/BuildAquaint2ForwardIndex$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<LongWritable, Aquaint2Document, IntWritable, Text> {
        private static final IntWritable sInt = new IntWritable(1);
        private static final Text sText = new Text();
        private DocnoMapping mDocMapping;

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            try {
                this.mDocMapping = new Aquaint2DocnoMapping();
                if (jobConf.get("mapred.job.tracker").equals("local")) {
                    this.mDocMapping.loadMapping(new Path(jobConf.get("DocnoMappingFile")), FileSystem.get(jobConf));
                } else {
                    this.mDocMapping.loadMapping(DistributedCache.getLocalCacheFiles(jobConf)[0], FileSystem.getLocal(jobConf));
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error initializing DocnoMapping!");
            }
        }

        public void map(LongWritable longWritable, Aquaint2Document aquaint2Document, OutputCollector<IntWritable, Text> outputCollector, Reporter reporter) throws IOException {
            reporter.incrCounter(Count.DOCS, 1L);
            int length = aquaint2Document.getContent().getBytes().length;
            sInt.set(this.mDocMapping.getDocno(aquaint2Document.getDocid()));
            sText.set(longWritable + "\t" + length);
            outputCollector.collect(sInt, sText);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (Aquaint2Document) obj2, (OutputCollector<IntWritable, Text>) outputCollector, reporter);
        }
    }

    private static int printUsage() {
        System.out.println("usage: [collection-path] [output-path] [index-file] [docno-mapping-file]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length != 4) {
            printUsage();
            return -1;
        }
        return runTool(getConf(), strArr[0], strArr[1], strArr[2], strArr[3]);
    }

    public int runTool(Configuration configuration, String str, String str2, String str3, String str4) throws Exception {
        JobConf jobConf = new JobConf(configuration, DemoCountAquaint2Documents.class);
        FileSystem fileSystem = FileSystem.get(configuration);
        sLogger.info("Tool name: BuildAquaint2ForwardIndex");
        sLogger.info(" - collection path: " + str);
        sLogger.info(" - output path: " + str2);
        sLogger.info(" - index file: " + str3);
        sLogger.info(" - mapping file: " + str4);
        jobConf.setJobName("BuildAquaint2ForwardIndex");
        jobConf.set("mapred.child.java.opts", "-Xmx1024m");
        jobConf.setNumReduceTasks(1);
        if (jobConf.get("mapred.job.tracker").equals("local")) {
            jobConf.set("DocnoMappingFile", str4);
        } else {
            DistributedCache.addCacheFile(new URI(str4), jobConf);
        }
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(jobConf, new Path(str2));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.setInputFormat(Aquaint2DocumentInputFormat.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setReducerClass(IdentityReducer.class);
        FileSystem.get(jobConf).delete(new Path(str2), true);
        int counter = (int) JobClient.runJob(jobConf).getCounters().findCounter(Count.DOCS).getCounter();
        sLogger.info("Writing " + counter + " doc offseta to " + str3);
        FSLineReader fSLineReader = new FSLineReader(str2 + "/part-00000", fileSystem);
        FSDataOutputStream create = fileSystem.create(new Path(str3), true);
        create.writeUTF("edu.umd.cloud9.collection.aquaint2.Aquaint2ForwardIndex");
        create.writeUTF(str);
        create.writeInt(counter);
        int i = 0;
        Text text = new Text();
        while (fSLineReader.readLine(text) > 0) {
            String[] split = text.toString().split("\\t");
            long parseLong = Long.parseLong(split[1]);
            int parseInt = Integer.parseInt(split[2]);
            create.writeLong(parseLong);
            create.writeInt(parseInt);
            i++;
            if (i % 100000 == 0) {
                sLogger.info(i + " docs");
            }
        }
        fSLineReader.close();
        create.close();
        sLogger.info(i + " docs total. Done!");
        if (counter != i) {
            throw new RuntimeException("Unexpected number of documents in building forward index!");
        }
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new BuildAquaint2ForwardIndex(), strArr));
    }
}
