/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.common.collect;

import ai.platon.pulsar.common.collect.HyperlinkExtractorsKt;
import ai.platon.pulsar.common.urls.Hyperlink;
import ai.platon.pulsar.common.urls.StatefulHyperlink;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.session.PulsarSession;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000@\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u001f\n\u0002\u0018\u0002\n\u0002\b\u0003\b\u0016\u0018\u00002\u00020\u0001B'\b\u0016\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\u0006\u0010\b\u001a\u00020\u0007\u00a2\u0006\u0002\u0010\tB%\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u0012\u0006\u0010\n\u001a\u00020\u000b\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\u0006\u0010\b\u001a\u00020\u0007\u00a2\u0006\u0002\u0010\fJ\f\u0010\u0019\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001aJ\u001a\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001a2\f\u0010\u001d\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001aR\u0011\u0010\n\u001a\u00020\u000b\u00a2\u0006\b\n\u0000\u001a\u0004\b\r\u0010\u000eR\u0016\u0010\u000f\u001a\n \u0011*\u0004\u0018\u00010\u00100\u0010X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0012\u0010\u0013R\u0011\u0010\u0006\u001a\u00020\u0007\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0014\u0010\u0015R\u0011\u0010\b\u001a\u00020\u0007\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0016\u0010\u0015R\u000e\u0010\u0017\u001a\u00020\u0018X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u001e"}, d2={"Lai/platon/pulsar/common/collect/RegexHyperlinkExtractor;", "", "session", "Lai/platon/pulsar/session/PulsarSession;", "page", "Lai/platon/pulsar/persist/WebPage;", "restrictCss", "", "urlPattern", "(Lai/platon/pulsar/session/PulsarSession;Lai/platon/pulsar/persist/WebPage;Ljava/lang/String;Ljava/lang/String;)V", "document", "Lai/platon/pulsar/dom/FeaturedDocument;", "(Lai/platon/pulsar/persist/WebPage;Lai/platon/pulsar/dom/FeaturedDocument;Ljava/lang/String;Ljava/lang/String;)V", "getDocument", "()Lai/platon/pulsar/dom/FeaturedDocument;", "log", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getPage", "()Lai/platon/pulsar/persist/WebPage;", "getRestrictCss", "()Ljava/lang/String;", "getUrlPattern", "urlRegex", "Lkotlin/text/Regex;", "extract", "", "Lai/platon/pulsar/common/urls/Hyperlink;", "extractTo", "fetchUrls", "pulsar-skeleton"})
public class RegexHyperlinkExtractor {
    @NotNull
    private final WebPage page;
    @NotNull
    private final FeaturedDocument document;
    @NotNull
    private final String restrictCss;
    @NotNull
    private final String urlPattern;
    private final Logger log;
    @NotNull
    private final Regex urlRegex;

    public RegexHyperlinkExtractor(@NotNull WebPage page, @NotNull FeaturedDocument document, @NotNull String restrictCss, @NotNull String urlPattern) {
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        Intrinsics.checkNotNullParameter((Object)document, (String)"document");
        Intrinsics.checkNotNullParameter((Object)restrictCss, (String)"restrictCss");
        Intrinsics.checkNotNullParameter((Object)urlPattern, (String)"urlPattern");
        this.page = page;
        this.document = document;
        this.restrictCss = restrictCss;
        this.urlPattern = urlPattern;
        this.log = LoggerFactory.getLogger(RegexHyperlinkExtractor.class);
        String string = this.urlPattern;
        boolean bl = false;
        this.urlRegex = new Regex(string);
    }

    @NotNull
    public final WebPage getPage() {
        return this.page;
    }

    @NotNull
    public final FeaturedDocument getDocument() {
        return this.document;
    }

    @NotNull
    public final String getRestrictCss() {
        return this.restrictCss;
    }

    @NotNull
    public final String getUrlPattern() {
        return this.urlPattern;
    }

    public RegexHyperlinkExtractor(@NotNull PulsarSession session, @NotNull WebPage page, @NotNull String restrictCss, @NotNull String urlPattern) {
        Intrinsics.checkNotNullParameter((Object)session, (String)"session");
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        Intrinsics.checkNotNullParameter((Object)restrictCss, (String)"restrictCss");
        Intrinsics.checkNotNullParameter((Object)urlPattern, (String)"urlPattern");
        this(page, PulsarSession.DefaultImpls.parse$default(session, page, false, 2, null), restrictCss, urlPattern);
    }

    @NotNull
    public final Collection<Hyperlink> extract() {
        return this.extractTo(new LinkedHashSet());
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final Collection<Hyperlink> extractTo(@NotNull Collection<Hyperlink> fetchUrls) {
        void $this$collectNotNullTo$iv$iv;
        void destination$iv$iv;
        Intrinsics.checkNotNullParameter(fetchUrls, (String)"fetchUrls");
        Element restrictedSection = this.document.getDocument().selectFirst(this.restrictCss);
        if (restrictedSection == null) {
            this.log.warn("There is no restricted section <{}> | {}", (Object)this.restrictCss, (Object)this.page.getUrl());
            return fetchUrls;
        }
        Ref.IntRef i = new Ref.IntRef();
        Node $this$collectNotNull$iv = (Node)restrictedSection;
        boolean $i$f$collectNotNull = false;
        Node node = $this$collectNotNull$iv;
        boolean bl = false;
        Collection collection = new ArrayList();
        boolean $i$f$collectNotNullTo = false;
        NodeTraversor.traverse((NodeVisitor)new NodeVisitor((Collection)destination$iv$iv, this, i){
            final /* synthetic */ Collection $destination;
            final /* synthetic */ RegexHyperlinkExtractor this$0;
            final /* synthetic */ Ref.IntRef $i$inlined;
            {
                this.$destination = $destination;
                this.this$0 = regexHyperlinkExtractor;
                this.$i$inlined = intRef;
            }

            /*
             * Enabled force condition propagation
             * Lifted jumps to return sites
             */
            public final void head(@NotNull Node node, int $noName_1) {
                Intrinsics.checkNotNullParameter((Object)node, (String)"node");
                Node node2 = node;
                boolean bl = false;
                Node node3 = node2;
                boolean bl2 = false;
                boolean bl3 = false;
                Node it = node3;
                boolean bl4 = false;
                if (!NodeExtKt.isAnchor((Node)it)) return;
                Node node4 = node3;
                Node node5 = node4;
                if (node5 == null) {
                    return;
                }
                String string = node5.attr("abs:href");
                if (string == null) {
                    return;
                }
                String string2 = string;
                boolean bl5 = false;
                boolean bl6 = false;
                String string3 = string2;
                boolean bl7 = false;
                if (!UrlUtils.isValidUrl((String)string3)) return;
                CharSequence charSequence = string3;
                Regex regex = RegexHyperlinkExtractor.access$getUrlRegex$p(this.this$0);
                boolean bl8 = false;
                if (!regex.matches(charSequence)) return;
                boolean bl9 = true;
                if (!bl9) return;
                String string4 = string2;
                String string5 = string4;
                if (string5 == null) {
                    return;
                }
                String string6 = string5;
                bl5 = false;
                bl6 = false;
                String string7 = string6;
                boolean bl10 = false;
                charSequence = NodeExtKt.getBestElement((Node)node2).text();
                Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"node.bestElement.text()");
                int n = this.$i$inlined.element;
                this.$i$inlined.element = n + 1;
                StatefulHyperlink statefulHyperlink2 = new StatefulHyperlink(string7, (String)charSequence, n, this.this$0.getPage().getUrl(), null, null, 48, null);
                statefulHyperlink2 = statefulHyperlink2;
                StatefulHyperlink statefulHyperlink3 = statefulHyperlink2;
                if (statefulHyperlink3 == null) return;
                StatefulHyperlink statefulHyperlink4 = statefulHyperlink3;
                Collection collection = this.$destination;
                boolean bl11 = false;
                boolean bl12 = false;
                StatefulHyperlink it2 = statefulHyperlink4;
                boolean bl13 = false;
                collection.add(it2);
            }
        }, (Node)$this$collectNotNullTo$iv$iv);
        List parsedUrls = (List)destination$iv$iv;
        CollectionsKt.toCollection((Iterable)parsedUrls, fetchUrls);
        Collection collection2 = parsedUrls;
        Logger logger = this.log;
        Intrinsics.checkNotNullExpressionValue((Object)logger, (String)"log");
        HyperlinkExtractorsKt.access$reportHyperlink(this.page, collection2, fetchUrls, logger);
        return fetchUrls;
    }

    public static final /* synthetic */ Regex access$getUrlRegex$p(RegexHyperlinkExtractor $this) {
        return $this.urlRegex;
    }
}

