package edu.umd.cloud9.collection.pmc;

import edu.umd.cloud9.collection.Indexable;
import edu.umd.cloud9.webgraph.data.AnchorTextConstants;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableUtils;

/* loaded from: input_file:edu/umd/cloud9/collection/pmc/PmcArticle.class */
public class PmcArticle extends Indexable {
    public static final String XML_START_TAG = "<article ";
    public static final String XML_END_TAG = "</article>";
    private String mPmcid;
    private String mDOI;
    private String mArticleText;

    public void write(DataOutput dataOutput) throws IOException {
        byte[] bytes = this.mArticleText.getBytes();
        WritableUtils.writeVInt(dataOutput, bytes.length);
        dataOutput.write(bytes, 0, bytes.length);
    }

    public void readFields(DataInput dataInput) throws IOException {
        int readVInt = WritableUtils.readVInt(dataInput);
        byte[] bArr = new byte[readVInt];
        dataInput.readFully(bArr, 0, readVInt);
        readArticle(this, new String(bArr));
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getDocid() {
        return getPmcid();
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getContent() {
        return AnchorTextConstants.EMPTY_STRING;
    }

    public String getPmcid() {
        if (this.mPmcid == null) {
            int indexOf = this.mArticleText.indexOf("<article-id pub-id-type=\"pmc\">");
            if (indexOf == -1) {
                throw new RuntimeException(getRawXML());
            }
            this.mPmcid = this.mArticleText.substring(indexOf + 30, this.mArticleText.indexOf("</article-id>", indexOf));
        }
        return this.mPmcid;
    }

    public String getDOI() {
        if (this.mDOI == null) {
            int indexOf = this.mArticleText.indexOf("<article-id pub-id-type=\"doi\">");
            if (indexOf == -1) {
                this.mDOI = AnchorTextConstants.EMPTY_STRING;
            } else {
                this.mDOI = this.mArticleText.substring(indexOf + 30, this.mArticleText.indexOf("</article-id>", indexOf));
            }
        }
        return this.mDOI;
    }

    public String getReferencesXML() {
        int indexOf = this.mArticleText.indexOf("<ref-list");
        if (indexOf == -1) {
            return AnchorTextConstants.EMPTY_STRING;
        }
        return this.mArticleText.substring(indexOf, this.mArticleText.indexOf("</ref-list>", indexOf));
    }

    public String getRawXML() {
        return this.mArticleText;
    }

    public static void readArticle(PmcArticle pmcArticle, String str) {
        if (str == null) {
            throw new RuntimeException("Error, can't read null string!");
        }
        pmcArticle.mArticleText = str;
        pmcArticle.mPmcid = null;
        pmcArticle.mDOI = null;
    }
}
