package dragon.ir.kngbase;

import dragon.ir.index.IRRelation;
import dragon.matrix.DoubleFlatSparseMatrix;
import dragon.matrix.DoubleGiantSparseMatrix;
import dragon.matrix.DoubleSparseMatrix;
import dragon.matrix.IntFlatSparseMatrix;
import dragon.matrix.IntGiantSparseMatrix;
import dragon.matrix.IntSparseMatrix;
import dragon.nlp.SimpleElementList;
import dragon.nlp.Token;
import dragon.nlp.extract.TokenExtractor;
import dragon.onlinedb.Article;
import dragon.onlinedb.CollectionReader;
import dragon.util.SortedArray;
import java.io.File;
import java.util.ArrayList;
import java.util.Date;

/* loaded from: input_file:dragon/ir/kngbase/HALSpace.class */
public class HALSpace implements KnowledgeBase {
    private SimpleElementList termList;
    private TokenExtractor te;
    private int windowSize;
    private IntSparseMatrix cooccurMatrix;
    private DoubleSparseMatrix halMatrix;
    private boolean fileBasedMatrix;
    private SortedArray relationCache;
    private boolean showProgress;
    private boolean useExternalTokenIndex;

    public HALSpace(TokenExtractor tokenExtractor, int i) {
        this(new SimpleElementList(), tokenExtractor, i);
    }

    public HALSpace(SimpleElementList simpleElementList, TokenExtractor tokenExtractor, int i) {
        this.termList = simpleElementList;
        this.useExternalTokenIndex = simpleElementList.size() > 0;
        this.te = tokenExtractor;
        tokenExtractor.setFilteringOption(false);
        this.windowSize = i;
        this.halMatrix = new DoubleFlatSparseMatrix();
        this.fileBasedMatrix = false;
        this.cooccurMatrix = new IntFlatSparseMatrix(true, true);
        this.relationCache = new SortedArray();
        this.showProgress = false;
    }

    public HALSpace(SimpleElementList simpleElementList, TokenExtractor tokenExtractor, int i, String str, String str2) {
        this.termList = simpleElementList;
        this.useExternalTokenIndex = simpleElementList.size() > 0;
        this.te = tokenExtractor;
        tokenExtractor.setFilteringOption(false);
        this.windowSize = i;
        this.halMatrix = new DoubleGiantSparseMatrix(str, str2, false, false);
        ((DoubleGiantSparseMatrix) this.halMatrix).setFlushInterval(Integer.MAX_VALUE);
        this.fileBasedMatrix = true;
        this.cooccurMatrix = new IntGiantSparseMatrix(str + ".tmp", str2 + ".tmp", true, true);
        this.relationCache = new SortedArray();
        this.showProgress = false;
    }

    @Override // dragon.ir.kngbase.KnowledgeBase
    public DoubleSparseMatrix getKnowledgeMatrix() {
        return this.halMatrix;
    }

    @Override // dragon.ir.kngbase.KnowledgeBase
    public SimpleElementList getRowKeyList() {
        return this.termList;
    }

    @Override // dragon.ir.kngbase.KnowledgeBase
    public SimpleElementList getColumnKeyList() {
        return this.termList;
    }

    public void setShowProgress(boolean z) {
        this.showProgress = z;
    }

    public void add(ArrayList arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            addArticle((Article) arrayList.get(i));
        }
    }

    public void add(CollectionReader collectionReader) {
        int i = 0;
        Article nextArticle = collectionReader.getNextArticle();
        while (true) {
            Article article = nextArticle;
            if (article == null) {
                return;
            }
            addArticle(article);
            i++;
            if (this.showProgress && i % 10 == 0) {
                System.out.println(new Date().toString() + " Processed Articles: " + i);
            }
            nextArticle = collectionReader.getNextArticle();
        }
    }

    public void finalizeData() {
        this.cooccurMatrix.finalizeData();
        int rows = this.cooccurMatrix.rows();
        for (int i = 0; i < rows; i++) {
            int[] nonZeroColumnsInRow = this.cooccurMatrix.getNonZeroColumnsInRow(i);
            int length = this.cooccurMatrix.getNonZeroIntScoresInRow(i).length;
            double rowSum = this.cooccurMatrix.getRowSum(i) / length;
            double d = 0.0d;
            for (int i2 = 0; i2 < length; i2++) {
                if (r0[i2] >= rowSum) {
                    d += r0[i2];
                }
            }
            for (int i3 = 0; i3 < length; i3++) {
                if (r0[i3] >= rowSum) {
                    this.halMatrix.add(i, nonZeroColumnsInRow[i3], r0[i3] / d);
                }
            }
            if (this.showProgress && i % 1000 == 0) {
                System.out.println("Processed Rows: " + i);
            }
            if (this.fileBasedMatrix && i % 5000 == 0) {
                ((DoubleGiantSparseMatrix) this.halMatrix).flush();
            }
        }
        this.halMatrix.finalizeData();
    }

    public DoubleSparseMatrix getHALMatrix() {
        return this.halMatrix;
    }

    public void close() {
        this.halMatrix.close();
        this.cooccurMatrix.close();
        if (this.fileBasedMatrix) {
            new File(((IntGiantSparseMatrix) this.cooccurMatrix).getIndexFilename()).delete();
            new File(((IntGiantSparseMatrix) this.cooccurMatrix).getMatrixFilename()).delete();
        }
    }

    private void addArticle(Article article) {
        ArrayList extractFromDoc;
        StringBuffer stringBuffer = new StringBuffer();
        if (article.getTitle() != null) {
            stringBuffer.append(article.getTitle());
            stringBuffer.append(' ');
        }
        if (article.getAbstract() != null) {
            stringBuffer.append(article.getAbstract());
            stringBuffer.append(' ');
        }
        if (article.getBody() != null) {
            stringBuffer.append(article.getBody());
            stringBuffer.append(' ');
        }
        if (stringBuffer.length() > 20 && (extractFromDoc = this.te.extractFromDoc(stringBuffer.toString().trim())) != null && extractFromDoc.size() >= this.windowSize) {
            Token[] tokenArr = new Token[extractFromDoc.size()];
            SortedArray sortedArray = new SortedArray();
            for (int i = 0; i < extractFromDoc.size(); i++) {
                tokenArr[i] = (Token) extractFromDoc.get(i);
                int binarySearch = sortedArray.binarySearch(tokenArr[i]);
                if (binarySearch >= 0) {
                    tokenArr[i].setIndex(((Token) sortedArray.get(binarySearch)).getIndex());
                } else {
                    tokenArr[i].setIndex(tokenSearch(tokenArr[i].getValue()));
                    if (tokenArr[i].getIndex() >= 0) {
                        sortedArray.add((binarySearch * (-1)) - 1, tokenArr[i]);
                    }
                }
            }
            sortedArray.clear();
            extractFromDoc.clear();
            for (int i2 = 0; i2 <= tokenArr.length - this.windowSize; i2++) {
                int index = tokenArr[i2].getIndex();
                if (index != -1) {
                    for (int i3 = 1; i3 < this.windowSize; i3++) {
                        int index2 = tokenArr[i2 + i3].getIndex();
                        if (index2 != -1) {
                            addRelation(index, index2, this.windowSize - i3);
                            addRelation(index2, index, this.windowSize - i3);
                        }
                    }
                }
            }
            for (int i4 = 0; i4 < this.relationCache.size(); i4++) {
                IRRelation iRRelation = (IRRelation) this.relationCache.get(i4);
                this.cooccurMatrix.add(iRRelation.getFirstTerm(), iRRelation.getSecondTerm(), iRRelation.getFrequency());
                this.cooccurMatrix.add(iRRelation.getSecondTerm(), iRRelation.getFirstTerm(), iRRelation.getFrequency());
            }
            this.relationCache.clear();
        }
    }

    private int tokenSearch(String str) {
        return this.useExternalTokenIndex ? this.termList.search(str) : this.termList.add(str);
    }

    private boolean addRelation(int i, int i2, int i3) {
        if (this.relationCache.add(new IRRelation(i, i2, i3))) {
            return true;
        }
        ((IRRelation) this.relationCache.get(this.relationCache.insertedPos())).addFrequency(i3);
        return true;
    }
}
