/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.crawl.parse.html;

import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.options.LoadOptions;
import ai.platon.pulsar.common.persist.ext.WebPageExKt;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.crawl.parse.html.PrimerParser;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.select.QueriesKt;
import ai.platon.pulsar.persist.WebPage;
import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.atomic.AtomicInteger;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import org.jetbrains.annotations.NotNull;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u0002\n\u0002\b\u0002\u0018\u0000 \u00142\u00020\u0001:\u0001\u0014B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0002\u0010\u0006J\u0006\u0010\u0011\u001a\u00020\rJ\u0018\u0010\u0012\u001a\u00020\u00132\u0006\u0010\u0002\u001a\u00020\u00032\u0006\u0010\u000e\u001a\u00020\rH\u0002R\u0019\u0010\u0007\u001a\n \t*\u0004\u0018\u00010\b0\b\u00a2\u0006\b\n\u0000\u001a\u0004\b\n\u0010\u000bR\u000e\u0010\u0004\u001a\u00020\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001e\u0010\u000e\u001a\u00020\r2\u0006\u0010\f\u001a\u00020\r@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000f\u0010\u0010R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0015"}, d2={"Lai/platon/pulsar/crawl/parse/html/JsoupParser;", "", "page", "Lai/platon/pulsar/persist/WebPage;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/persist/WebPage;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "LOG", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getLOG", "()Lorg/slf4j/Logger;", "<set-?>", "Lai/platon/pulsar/dom/FeaturedDocument;", "document", "getDocument", "()Lai/platon/pulsar/dom/FeaturedDocument;", "parse", "updateMetaInfos", "", "Companion", "pulsar-skeleton"})
public final class JsoupParser {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final WebPage page;
    @NotNull
    private final ImmutableConfig conf;
    private final Logger LOG;
    @NotNull
    private FeaturedDocument document;
    @NotNull
    private static final AtomicInteger numJsoupParses = new AtomicInteger();
    @NotNull
    private static final AtomicInteger numJsoupParsed = new AtomicInteger();

    public JsoupParser(@NotNull WebPage page, @NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.page = page;
        this.conf = conf;
        this.LOG = LoggerFactory.getLogger(JsoupParser.class);
        this.document = FeaturedDocument.Companion.getNIL();
    }

    public final Logger getLOG() {
        return this.LOG;
    }

    @NotNull
    public final FeaturedDocument getDocument() {
        return this.document;
    }

    @NotNull
    public final FeaturedDocument parse() {
        PrimerParser primerParser;
        numJsoupParses.incrementAndGet();
        if (this.page.getEncoding() == null) {
            primerParser = new PrimerParser(this.conf);
            primerParser.detectEncoding(this.page);
        }
        try {
            primerParser = Jsoup.parse((InputStream)this.page.getContentAsInputStream(), (String)this.page.getEncoding(), (String)this.page.getBaseUrl());
            Intrinsics.checkNotNullExpressionValue((Object)primerParser, (String)"parse(page.contentAsInpu\u2026e.encoding, page.baseUrl)");
            this.document = new FeaturedDocument((Document)primerParser);
            this.updateMetaInfos(this.page, this.document);
            return this.document;
        }
        catch (IOException e) {
            this.LOG.warn("Failed to parse page {}", (Object)this.page.getUrl());
            this.LOG.warn(e.toString());
            numJsoupParsed.incrementAndGet();
            return this.document;
        }
    }

    private final void updateMetaInfos(WebPage page, FeaturedDocument document) {
        String string;
        boolean bl;
        String selector = "#PulsarMetaInformation";
        Object object = QueriesKt.selectFirstOrNull((Node)((Node)document.getDocument()), (String)selector);
        if (object == null) {
            return;
        }
        Element metadata = object;
        object = page.getHref();
        if (object != null) {
            Object object2 = object;
            bl = false;
            boolean bl2 = false;
            Object it = object2;
            boolean bl3 = false;
            Object object3 = string = UrlUtils.isValidUrl((String)it) ? object2 : null;
            if (string != null) {
                object2 = string;
                bl = false;
                bl2 = false;
                it = object2;
                boolean bl4 = false;
                metadata.attr("href", (String)it);
            }
        }
        string = page.getReferrer();
        boolean bl5 = false;
        bl = false;
        String it = string;
        boolean bl6 = false;
        String string2 = object = UrlUtils.isValidUrl((String)it) ? string : null;
        if (object != null) {
            string = object;
            bl5 = false;
            bl = false;
            it = string;
            boolean bl7 = false;
            metadata.attr("referer", it);
        }
        LoadOptions options = WebPageExKt.getOptions(page);
        metadata.attr("normUrl", page.getUrl());
        metadata.attr("normalizedUrl", page.getUrl());
        metadata.attr("label", options.getLabel());
        metadata.attr("taskId", options.getTaskId());
        metadata.attr("taskTime", options.getTaskTime().toString());
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u0014\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u0011\u0010\u0003\u001a\u00020\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0005\u0010\u0006R\u0011\u0010\u0007\u001a\u00020\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\b\u0010\u0006\u00a8\u0006\t"}, d2={"Lai/platon/pulsar/crawl/parse/html/JsoupParser$Companion;", "", "()V", "numJsoupParsed", "Ljava/util/concurrent/atomic/AtomicInteger;", "getNumJsoupParsed", "()Ljava/util/concurrent/atomic/AtomicInteger;", "numJsoupParses", "getNumJsoupParses", "pulsar-skeleton"})
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final AtomicInteger getNumJsoupParses() {
            return numJsoupParses;
        }

        @NotNull
        public final AtomicInteger getNumJsoupParsed() {
            return numJsoupParsed;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

