package edu.umd.cloud9.collection.wikipedia;

import edu.umd.cloud9.collection.DocumentForwardIndex;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/wikipedia/WikipediaForwardIndex.class */
public class WikipediaForwardIndex implements DocumentForwardIndex<WikipediaPage> {
    private static final Logger sLogger = Logger.getLogger(WikipediaPage.class);
    private Configuration mConf;
    private FileSystem mFS;
    private int[] mDocnos;
    private int[] mOffsets;
    private short[] mFileno;
    private String mCollectionPath;
    private WikipediaDocnoMapping mDocnoMapping;
    private int mLastDocno;

    public WikipediaForwardIndex() {
        this.mDocnoMapping = new WikipediaDocnoMapping();
        this.mLastDocno = -1;
        this.mConf = new Configuration();
    }

    public WikipediaForwardIndex(Configuration configuration) {
        this.mDocnoMapping = new WikipediaDocnoMapping();
        this.mLastDocno = -1;
        this.mConf = configuration;
    }

    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public void loadIndex(String str, String str2) throws IOException {
        sLogger.info("Loading forward index: " + str);
        this.mFS = FileSystem.get(this.mConf);
        this.mDocnoMapping.loadMapping(new Path(str2), this.mFS);
        FSDataInputStream open = this.mFS.open(new Path(str));
        open.readUTF();
        this.mCollectionPath = open.readUTF();
        int readInt = open.readInt();
        sLogger.info(String.valueOf(readInt) + " blocks expected");
        this.mDocnos = new int[readInt];
        this.mOffsets = new int[readInt];
        this.mFileno = new short[readInt];
        for (int i = 0; i < readInt; i++) {
            this.mDocnos[i] = open.readInt();
            this.mOffsets[i] = open.readInt();
            this.mFileno[i] = open.readShort();
            if (i > 0 && i % 100000 == 0) {
                sLogger.info(String.valueOf(i) + " blocks read");
            }
        }
        open.close();
    }

    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public String getCollectionPath() {
        return this.mCollectionPath;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public WikipediaPage getDocument(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        if (i < getFirstDocno() || i > getLastDocno()) {
            return null;
        }
        int binarySearch = Arrays.binarySearch(this.mDocnos, i);
        if (binarySearch < 0) {
            binarySearch = (-binarySearch) - 2;
        }
        String str = String.valueOf(this.mCollectionPath) + "/part-" + new DecimalFormat("00000").format(this.mFileno[binarySearch]);
        sLogger.info("fetching docno " + i + ": seeking to " + this.mOffsets[binarySearch] + " at " + str);
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(this.mFS, new Path(str), this.mConf);
            IntWritable intWritable = new IntWritable();
            WikipediaPage wikipediaPage = new WikipediaPage();
            reader.seek(this.mOffsets[binarySearch]);
            while (reader.next(intWritable) && intWritable.get() != i) {
            }
            reader.getCurrentValue(wikipediaPage);
            reader.close();
            sLogger.info(" docno " + i + " fetched in " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
            return wikipediaPage;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public WikipediaPage getDocument(String str) {
        return getDocument(this.mDocnoMapping.getDocno(str));
    }

    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public int getDocno(String str) {
        return this.mDocnoMapping.getDocno(str);
    }

    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public String getDocid(int i) {
        return this.mDocnoMapping.getDocid(i);
    }

    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public int getFirstDocno() {
        return this.mDocnos[0];
    }

    @Override // edu.umd.cloud9.collection.DocumentForwardIndex
    public int getLastDocno() {
        if (this.mLastDocno != -1) {
            return this.mLastDocno;
        }
        int length = this.mDocnos.length - 1;
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(this.mFS, new Path(String.valueOf(this.mCollectionPath) + "/part-" + new DecimalFormat("00000").format(this.mFileno[length])), this.mConf);
            IntWritable intWritable = new IntWritable();
            reader.seek(this.mOffsets[length]);
            do {
            } while (reader.next(intWritable));
            this.mLastDocno = intWritable.get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return this.mLastDocno;
    }
}
