package it.unimi.di.mg4j.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.di.mg4j.index.BitStreamIndexWriter;
import it.unimi.di.mg4j.index.CompressionFlags;
import it.unimi.di.mg4j.index.DiskBasedIndex;
import it.unimi.di.mg4j.index.Index;
import it.unimi.di.mg4j.index.IndexReader;
import it.unimi.di.mg4j.index.IndexWriter;
import it.unimi.di.mg4j.index.VariableQuantumIndexWriter;
import it.unimi.di.mg4j.search.AlignDocumentIterator;
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceOpenHashMap;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.lang.ObjectParser;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.Properties;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;

/* loaded from: input_file:it/unimi/di/mg4j/tool/PrecomputeAlignments.class */
public class PrecomputeAlignments {
    private static final Logger LOGGER = Util.getLogger(PrecomputeAlignments.class);
    protected final int numberOfDocuments;
    protected final String outputBasename;
    private final long logInterval;
    protected IndexWriter indexWriter;
    protected VariableQuantumIndexWriter variableQuantumIndexWriter;
    private final Index alignerIndex;
    private final Index aligneeIndex;
    private final Collection<? extends CharSequence> terms;
    private int numberOfTerms;
    private boolean subset;

    public PrecomputeAlignments(String str, String str2, String str3, ObjectList<? extends CharSequence> objectList, long j) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
        this.logInterval = j;
        this.subset = objectList != null;
        this.aligneeIndex = Index.getInstance(str2, true, false, this.subset);
        this.alignerIndex = Index.getInstance(str3, true, false, false);
        if (this.aligneeIndex.numberOfDocuments != this.alignerIndex.numberOfDocuments) {
            LOGGER.warn("Alignee and aligner do not have the same number of documents");
        }
        this.numberOfDocuments = this.alignerIndex.numberOfTerms + 1;
        this.terms = objectList;
        this.numberOfTerms = this.subset ? objectList.size() : this.aligneeIndex.numberOfTerms;
        this.outputBasename = str;
        LOGGER.debug("Precomputing alignment of " + str2 + " with " + str3 + " into " + str);
        Reference2ReferenceOpenHashMap reference2ReferenceOpenHashMap = new Reference2ReferenceOpenHashMap(CompressionFlags.DEFAULT_STANDARD_INDEX);
        reference2ReferenceOpenHashMap.remove(CompressionFlags.Component.COUNTS);
        reference2ReferenceOpenHashMap.remove(CompressionFlags.Component.POSITIONS);
        this.indexWriter = new BitStreamIndexWriter(str, this.numberOfDocuments, true, reference2ReferenceOpenHashMap);
    }

    public void run() throws IOException, ConfigurationException {
        int i;
        ProgressLogger progressLogger = new ProgressLogger(Util.getLogger(getClass()), this.logInterval);
        progressLogger.displayFreeMemory = true;
        OutputBitStream outputBitStream = new OutputBitStream(this.outputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION);
        PrintWriter printWriter = this.subset ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.outputBasename + ".terms"), "UTF-8"))) : null;
        progressLogger.expectedUpdates = this.numberOfTerms;
        progressLogger.itemsName = "terms";
        progressLogger.logInterval = this.logInterval;
        progressLogger.start("Precomputing alignments...");
        IntArrayList intArrayList = new IntArrayList();
        IndexReader reader = this.aligneeIndex.getReader();
        Iterator<? extends CharSequence> it2 = this.subset ? this.terms.iterator() : null;
        MutableString mutableString = new MutableString();
        for (int i2 = 0; i2 < this.numberOfTerms; i2++) {
            if (this.subset) {
                mutableString.replace((CharSequence) it2.next());
                int i3 = (int) this.aligneeIndex.termMap.getLong(mutableString);
                i = i3;
                if (i3 == -1) {
                    LOGGER.warn("Term " + mutableString + " is not part of the index to be aligned");
                }
                printWriter.println(mutableString);
            } else {
                i = i2;
            }
            this.indexWriter.newInvertedList();
            intArrayList.clear();
            if (i != -1) {
                IndexReader reader2 = this.alignerIndex.getReader();
                for (int i4 = 0; i4 < this.numberOfDocuments - 1; i4++) {
                    if (AlignDocumentIterator.getInstance(reader.documents(i), reader2.nextIterator()).nextDocument() != Integer.MAX_VALUE) {
                        intArrayList.add(i4);
                    }
                }
                reader2.close();
            }
            intArrayList.add(this.numberOfDocuments - 1);
            this.indexWriter.writeFrequency(intArrayList.size());
            outputBitStream.writeGamma(intArrayList.size());
            Iterator it3 = intArrayList.iterator();
            while (it3.hasNext()) {
                this.indexWriter.writeDocumentPointer(this.indexWriter.newDocumentRecord(), ((Integer) it3.next()).intValue());
            }
            progressLogger.update();
        }
        progressLogger.done();
        this.indexWriter.close();
        if (this.subset) {
            printWriter.close();
        }
        Properties properties = this.indexWriter.properties();
        properties.addProperty(Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec(this.alignerIndex.termProcessor));
        properties.save(this.outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
        outputBitStream.close();
    }

    public static void main(String[] strArr) throws JSAPException, ConfigurationException, IOException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
        SimpleJSAP simpleJSAP = new SimpleJSAP(PrecomputeAlignments.class.getName(), "Precomputes alignments between two indices.", new Parameter[]{new FlaggedOption("logInterval", JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds."), new UnflaggedOption("outputBasename", JSAP.STRING_PARSER, true, "The basename of the resulting index."), new UnflaggedOption("aligneeBasename", JSAP.STRING_PARSER, true, "The basename of the index who has to be aligned."), new UnflaggedOption("alignerBasename", JSAP.STRING_PARSER, true, "The basename of the index used to align."), new UnflaggedOption("terms", JSAP.STRING_PARSER, false, "A file containing a selected subset of UTF-8 coded words on which the alignments must be computed. The terms must be in sorted order, and appear in the same form as they appear in the file of terms of the alignee (i.e., no term processing will be applied).")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        ObjectList allLines = parse.userSpecified("terms") ? new FileLinesCollection(parse.getString("terms"), "UTF-8").allLines() : null;
        if (allLines != null) {
            Collections.sort(allLines);
        }
        new PrecomputeAlignments(parse.getString("outputBasename"), parse.getString("aligneeBasename"), parse.getString("alignerBasename"), allLines, parse.getLong("logInterval")).run();
    }
}
