package edu.umd.cloud9.collection.trec;

import edu.umd.cloud9.collection.Indexable;
import edu.umd.cloud9.webgraph.data.AnchorTextConstants;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableUtils;

/* loaded from: input_file:edu/umd/cloud9/collection/trec/TrecDocument.class */
public class TrecDocument extends Indexable {
    public static final String XML_START_TAG = "<DOC>";
    public static final String XML_END_TAG = "</DOC>";
    private String mRawDoc;
    private String mDocid;

    public void write(DataOutput dataOutput) throws IOException {
        byte[] bytes = this.mRawDoc.getBytes();
        WritableUtils.writeVInt(dataOutput, bytes.length);
        dataOutput.write(bytes, 0, bytes.length);
    }

    public void readFields(DataInput dataInput) throws IOException {
        int readVInt = WritableUtils.readVInt(dataInput);
        byte[] bArr = new byte[readVInt];
        dataInput.readFully(bArr, 0, readVInt);
        readDocument(this, new String(bArr));
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getDocid() {
        if (this.mDocid == null) {
            int indexOf = this.mRawDoc.indexOf("<DOCNO>");
            if (indexOf == -1) {
                this.mDocid = AnchorTextConstants.EMPTY_STRING;
            } else {
                this.mDocid = this.mRawDoc.substring(indexOf + 7, this.mRawDoc.indexOf("</DOCNO>", indexOf)).trim();
            }
        }
        return this.mDocid;
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getContent() {
        return this.mRawDoc;
    }

    public static void readDocument(TrecDocument trecDocument, String str) {
        if (str == null) {
            throw new RuntimeException("Error, can't read null string!");
        }
        trecDocument.mRawDoc = str;
        trecDocument.mDocid = null;
    }
}
