package edu.umd.cloud9.collection.clue;

import edu.umd.cloud9.collection.DocnoMapping;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import tl.lin.data.map.HMapKI;
import tl.lin.data.map.MapKI;

/* loaded from: input_file:edu/umd/cloud9/collection/clue/ClueWarcDocnoMapping.class */
public class ClueWarcDocnoMapping implements DocnoMapping {
    private static final int[] offets = new int[13217];
    private static final HMapKI<String> subdirMapping = new HMapKI<>();
    private static final NumberFormat FormatW2 = new DecimalFormat("00");
    private static final NumberFormat FormatW5 = new DecimalFormat("00000");

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public int getDocno(String str) {
        if (str == null) {
            return -1;
        }
        int i = subdirMapping.get(str.substring(10, 16));
        int parseInt = Integer.parseInt(str.substring(17, 19));
        return offets[i + parseInt] + Integer.parseInt(str.substring(20, 25));
    }

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public String getDocid(int i) {
        int i2 = 0;
        while (i2 < offets.length && i >= offets[i2]) {
            i2++;
        }
        int i3 = i2 - 1;
        String str = null;
        MapKI.Entry[] entriesSortedByValue = subdirMapping.getEntriesSortedByValue();
        int length = entriesSortedByValue.length;
        int i4 = 0;
        while (true) {
            if (i4 >= length) {
                break;
            }
            MapKI.Entry entry = entriesSortedByValue[i4];
            if (entry.getValue() <= i3) {
                str = "clueweb09-" + ((String) entry.getKey()) + "-" + FormatW2.format(i3 - entry.getValue()) + "-" + FormatW5.format(i - offets[i3]);
                break;
            }
            i4++;
        }
        return str;
    }

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public void loadMapping(Path path, FileSystem fileSystem) throws IOException {
        LineReader lineReader = new LineReader(fileSystem.open(path));
        Text text = new Text();
        int i = 0;
        String str = null;
        while (lineReader.readLine(text) > 0) {
            String[] split = text.toString().split(",");
            if (str == null || !split[0].equals(str)) {
                subdirMapping.put(split[0], i);
            }
            offets[i] = Integer.parseInt(split[3]);
            str = split[0];
            i++;
        }
        lineReader.close();
    }

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public DocnoMapping.Builder getBuilder() {
        return new ClueWarcDocnoMappingBuilder();
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 2) {
            System.out.println("usage: (getDocno|getDocid) [mapping-file] [docid/docno]");
            System.exit(-1);
        }
        FileSystem fileSystem = FileSystem.get(new Configuration());
        System.out.println("loading mapping file " + strArr[1]);
        ClueWarcDocnoMapping clueWarcDocnoMapping = new ClueWarcDocnoMapping();
        clueWarcDocnoMapping.loadMapping(new Path(strArr[1]), fileSystem);
        if (strArr[0].equals("getDocno")) {
            System.out.println("looking up docno for \"" + strArr[2] + "\"");
            if (clueWarcDocnoMapping.getDocno(strArr[2]) > 0) {
                System.out.println(clueWarcDocnoMapping.getDocno(strArr[2]));
                return;
            } else {
                System.err.print("Invalid docid!");
                return;
            }
        }
        if (!strArr[0].equals("getDocid")) {
            System.out.println("Invalid command!");
            System.out.println("usage: (list|getDocno|getDocid) [mappings-file] [docid/docno]");
        } else {
            try {
                System.out.println("looking up docid for " + strArr[2]);
                System.out.println(clueWarcDocnoMapping.getDocid(Integer.parseInt(strArr[2])));
            } catch (Exception e) {
                System.err.print("Invalid docno!");
            }
        }
    }
}
