package edu.umd.hooka;

import edu.umd.hooka.CorpusInfo;
import edu.umd.hooka.alignment.IndexedFloatArray;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;

/* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount_stripe.class */
public class PhraseExtractAndCount_stripe {

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount_stripe$PPCountCombiner.class */
    public static class PPCountCombiner extends MapReduceBase implements Reducer<Phrase, Phrase2CountMap, Phrase, Phrase2CountMap> {
        Phrase2CountMap sum = new Phrase2CountMap();

        public void reduce(Phrase phrase, Iterator<Phrase2CountMap> it, OutputCollector<Phrase, Phrase2CountMap> outputCollector, Reporter reporter) throws IOException {
            this.sum.clear();
            while (it.hasNext()) {
                this.sum.plusEquals(it.next());
            }
            outputCollector.collect(phrase, this.sum);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((Phrase) obj, (Iterator<Phrase2CountMap>) it, (OutputCollector<Phrase, Phrase2CountMap>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount_stripe$PPNormalizingReducer.class */
    public static class PPNormalizingReducer extends MapReduceBase implements Reducer<Phrase, Phrase2CountMap, PhrasePair, IndexedFloatArray> {
        Phrase2CountMap sum = new Phrase2CountMap();
        PhrasePair ko = new PhrasePair();
        IndexedFloatArray scores = new IndexedFloatArray(2);

        public void reduce(Phrase phrase, Iterator<Phrase2CountMap> it, OutputCollector<PhrasePair, IndexedFloatArray> outputCollector, Reporter reporter) throws IOException {
            this.sum.clear();
            int i = 0;
            while (it.hasNext()) {
                i++;
                if (i % 1000 == 0) {
                    reporter.progress();
                }
                this.sum.plusEquals(it.next());
            }
            this.sum.normalize();
            boolean z = phrase.getLanguage() == 0;
            if (z) {
                this.ko.setE(phrase);
            } else {
                this.ko.setF(phrase);
            }
            for (Map.Entry<Phrase, FloatWritable> entry : this.sum.entrySet()) {
                this.scores.clear();
                if (z) {
                    this.ko.setF(entry.getKey());
                    this.scores.set(1, entry.getValue().get());
                } else {
                    this.ko.setE(entry.getKey());
                    this.scores.set(0, entry.getValue().get());
                }
                outputCollector.collect(this.ko, this.scores);
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((Phrase) obj, (Iterator<Phrase2CountMap>) it, (OutputCollector<PhrasePair, IndexedFloatArray>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount_stripe$PhrasePairExtractMapper.class */
    public static class PhrasePairExtractMapper extends MapReduceBase implements Mapper<IntWritable, PhrasePair, Phrase, Phrase2CountMap> {
        private final Phrase2CountMap pcm = new Phrase2CountMap();
        private static final FloatWritable one = new FloatWritable(1.0f);

        public void map(IntWritable intWritable, PhrasePair phrasePair, OutputCollector<Phrase, Phrase2CountMap> outputCollector, Reporter reporter) throws IOException {
            Iterator<PhrasePair> it = phrasePair.extractConsistentPhrasePairs(7).iterator();
            while (it.hasNext()) {
                PhrasePair next = it.next();
                this.pcm.clear();
                this.pcm.put(next.getF(), one);
                outputCollector.collect(next.getE(), this.pcm);
                this.pcm.clear();
                this.pcm.put(next.getE(), one);
                outputCollector.collect(next.getF(), this.pcm);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (PhrasePair) obj2, (OutputCollector<Phrase, Phrase2CountMap>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount_stripe$ReduceSumScores.class */
    public static class ReduceSumScores extends MapReduceBase implements Reducer<PhrasePair, IndexedFloatArray, PhrasePair, IndexedFloatArray> {
        IndexedFloatArray scores = new IndexedFloatArray(2);

        public void reduce(PhrasePair phrasePair, Iterator<IndexedFloatArray> it, OutputCollector<PhrasePair, IndexedFloatArray> outputCollector, Reporter reporter) throws IOException {
            this.scores.clear();
            while (it.hasNext()) {
                this.scores.plusEquals(it.next());
            }
            outputCollector.collect(phrasePair, this.scores);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PhrasePair) obj, (Iterator<IndexedFloatArray>) it, (OutputCollector<PhrasePair, IndexedFloatArray>) outputCollector, reporter);
        }
    }

    public static void main(String[] strArr) throws IOException {
        CorpusInfo corpus = CorpusInfo.getCorpus(CorpusInfo.Corpus.ARABIC_5000k);
        Path path = new Path("ppc.phase1.tmp");
        JobConf jobConf = new JobConf(PhraseExtractAndCount_stripe.class);
        jobConf.setJobName("BuildPT.ExtractAndCount_striped");
        FileSystem.get(jobConf).delete(path);
        FileSystem.get(jobConf).delete(corpus.getLocalPhraseTable());
        jobConf.setOutputKeyClass(PhrasePair.class);
        jobConf.setOutputValueClass(IndexedFloatArray.class);
        jobConf.setMapOutputKeyClass(Phrase.class);
        jobConf.setMapOutputValueClass(Phrase2CountMap.class);
        jobConf.setMapperClass(PhrasePairExtractMapper.class);
        jobConf.setCombinerClass(PPCountCombiner.class);
        jobConf.setReducerClass(PPNormalizingReducer.class);
        jobConf.setNumMapTasks(38);
        jobConf.setNumReduceTasks(38);
        FileInputFormat.setInputPaths(jobConf, new Path[]{corpus.getAlignedBitext()});
        FileOutputFormat.setOutputPath(jobConf, path);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        JobClient.runJob(jobConf);
        JobConf jobConf2 = new JobConf(PhraseExtractAndCount_stripe.class);
        jobConf2.setJobName("BuildPT.Merge");
        jobConf2.setInputFormat(SequenceFileInputFormat.class);
        jobConf2.setOutputKeyClass(PhrasePair.class);
        jobConf2.setOutputValueClass(IndexedFloatArray.class);
        jobConf2.setReducerClass(ReduceSumScores.class);
        jobConf2.setNumMapTasks(38);
        jobConf2.setNumReduceTasks(38);
        FileInputFormat.setInputPaths(jobConf2, new Path[]{path});
        FileOutputFormat.setOutputPath(jobConf2, corpus.getLocalPhraseTable());
        JobClient.runJob(jobConf2);
    }
}
