package ai.platon.pulsar.test;

import ai.platon.pulsar.PulsarSession;
import ai.platon.pulsar.common.NetUtil;
import ai.platon.pulsar.common.options.LoadOptions;
import ai.platon.pulsar.common.urls.NormUrl;
import ai.platon.pulsar.common.urls.Urls;
import ai.platon.pulsar.context.PulsarContext;
import ai.platon.pulsar.context.PulsarContexts;
import ai.platon.pulsar.crawl.CrawlLoops;
import ai.platon.pulsar.crawl.JsEventHandler;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebDb;
import ai.platon.pulsar.persist.WebPage;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import kotlin.Metadata;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* compiled from: VerboseCrawler.kt */
@Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��T\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0010\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u001e\n\u0002\u0018\u0002\n\u0002\b\u0005\b\u0016\u0018��2\u00020\u0001B\u000f\b\u0016\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004B\u000f\u0012\b\b\u0002\u0010\u0005\u001a\u00020\u0006¢\u0006\u0002\u0010\u0007J\u0006\u0010\u0019\u001a\u00020\u001aJ\u0016\u0010\u001b\u001a\u00020\u001a2\u0006\u0010\u001c\u001a\u00020\u001d2\u0006\u0010\u001e\u001a\u00020\u001fJ\u0016\u0010\u001b\u001a\u00020\u001a2\u0006\u0010\u001c\u001a\u00020\u001d2\u0006\u0010 \u001a\u00020\u001dJ\u0016\u0010!\u001a\u00020\u001a2\u0006\u0010\"\u001a\u00020\u001d2\u0006\u0010\u001e\u001a\u00020\u001fJ\u001c\u0010#\u001a\b\u0012\u0004\u0012\u00020%0$2\u0006\u0010\"\u001a\u00020\u001d2\u0006\u0010\u001e\u001a\u00020\u001fJ\u001c\u0010#\u001a\b\u0012\u0004\u0012\u00020%0$2\u0006\u0010\"\u001a\u00020\u001d2\u0006\u0010 \u001a\u00020\u001dJ\u000e\u0010&\u001a\u00020\u001a2\u0006\u0010\u001c\u001a\u00020\u001dJ\u000e\u0010'\u001a\u00020\u001a2\u0006\u0010(\u001a\u00020\u001dJ\u0006\u0010)\u001a\u00020\u001aR\u0011\u0010\b\u001a\u00020\t¢\u0006\b\n��\u001a\u0004\b\n\u0010\u000bR\u001c\u0010\f\u001a\u0004\u0018\u00010\rX\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u000e\u0010\u000f\"\u0004\b\u0010\u0010\u0011R\u0019\u0010\u0012\u001a\n \u0014*\u0004\u0018\u00010\u00130\u0013¢\u0006\b\n��\u001a\u0004\b\u0015\u0010\u0016R\u0011\u0010\u0005\u001a\u00020\u0006¢\u0006\b\n��\u001a\u0004\b\u0017\u0010\u0018¨\u0006*"}, d2 = {"Lai/platon/pulsar/test/VerboseCrawler;", "", "context", "Lai/platon/pulsar/context/PulsarContext;", "(Lai/platon/pulsar/context/PulsarContext;)V", "session", "Lai/platon/pulsar/PulsarSession;", "(Lai/platon/pulsar/PulsarSession;)V", "crawlLoop", "Lai/platon/pulsar/crawl/CrawlLoops;", "getCrawlLoop", "()Lai/platon/pulsar/crawl/CrawlLoops;", "eventHandler", "Lai/platon/pulsar/crawl/JsEventHandler;", "getEventHandler", "()Lai/platon/pulsar/crawl/JsEventHandler;", "setEventHandler", "(Lai/platon/pulsar/crawl/JsEventHandler;)V", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getLogger", "()Lorg/slf4j/Logger;", "getSession", "()Lai/platon/pulsar/PulsarSession;", "extractAds", "", "load", "url", "", "options", "Lai/platon/pulsar/common/options/LoadOptions;", "args", "loadAllNews", "portalUrl", "loadOutPages", "", "Lai/platon/pulsar/persist/WebPage;", "open", "scan", "baseUri", "truncate", "pulsar-tests"})
/* loaded from: input_file:ai/platon/pulsar/test/VerboseCrawler.class */
public class VerboseCrawler {

    @NotNull
    private final PulsarSession session;
    private final Logger logger;

    @NotNull
    private final CrawlLoops crawlLoop;

    @Nullable
    private JsEventHandler eventHandler;

    public VerboseCrawler(@NotNull PulsarSession pulsarSession) {
        Intrinsics.checkNotNullParameter(pulsarSession, "session");
        this.session = pulsarSession;
        this.logger = LoggerFactory.getLogger(VerboseCrawler.class);
        this.crawlLoop = this.session.getContext().getCrawlLoops();
    }

    public /* synthetic */ VerboseCrawler(PulsarSession pulsarSession, int i, DefaultConstructorMarker defaultConstructorMarker) {
        this((i & 1) != 0 ? PulsarContexts.INSTANCE.createSession() : pulsarSession);
    }

    @NotNull
    public final PulsarSession getSession() {
        return this.session;
    }

    public final Logger getLogger() {
        return this.logger;
    }

    @NotNull
    public final CrawlLoops getCrawlLoop() {
        return this.crawlLoop;
    }

    @Nullable
    public final JsEventHandler getEventHandler() {
        return this.eventHandler;
    }

    public final void setEventHandler(@Nullable JsEventHandler jsEventHandler) {
        this.eventHandler = jsEventHandler;
    }

    /* JADX WARN: 'this' call moved to the top of the method (can break code semantics) */
    public VerboseCrawler(@NotNull PulsarContext pulsarContext) {
        this(pulsarContext.createSession());
        Intrinsics.checkNotNullParameter(pulsarContext, "context");
    }

    public final void open(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "url");
        load(str, this.session.options("-refresh"));
    }

    public final void load(@NotNull String str, @NotNull String str2) {
        Intrinsics.checkNotNullParameter(str, "url");
        Intrinsics.checkNotNullParameter(str2, "args");
        load(str, this.session.options(str2));
    }

    public final void load(@NotNull String str, @NotNull LoadOptions loadOptions) {
        Intrinsics.checkNotNullParameter(str, "url");
        Intrinsics.checkNotNullParameter(loadOptions, "options");
        loadOptions.addEventHandler(this.eventHandler);
        WebPage load = this.session.load(str, loadOptions);
        loadOptions.removeEventHandler(this.eventHandler);
        FeaturedDocument parse$default = PulsarSession.DefaultImpls.parse$default(this.session, load, false, 2, (Object) null);
        parse$default.absoluteLinks();
        parse$default.stripScripts();
        if (StringsKt.isBlank(loadOptions.getOutLinkSelector())) {
            return;
        }
        Sequence<String> filter = SequencesKt.filter(CollectionsKt.asSequence(FeaturedDocument.select$default(parse$default, loadOptions.getOutLinkSelector(), 0, 0, new Function1<Element, String>() { // from class: ai.platon.pulsar.test.VerboseCrawler$load$1
            public final String invoke(@NotNull Element element) {
                Intrinsics.checkNotNullParameter(element, "it");
                return element.attr("abs:href");
            }
        }, 6, (Object) null)), new Function1<String, Boolean>() { // from class: ai.platon.pulsar.test.VerboseCrawler$load$2
            @NotNull
            public final Boolean invoke(String str2) {
                return Boolean.valueOf(Urls.isValidUrl(str2));
            }
        });
        HashSet hashSet = new HashSet();
        for (String str2 : filter) {
            Intrinsics.checkNotNullExpressionValue(str2, "it");
            hashSet.add(StringsKt.substringBefore$default(str2, ".com", (String) null, 2, (Object) null));
        }
        Sequence filter2 = SequencesKt.filter(CollectionsKt.asSequence(hashSet), new Function1<String, Boolean>() { // from class: ai.platon.pulsar.test.VerboseCrawler$load$4
            @NotNull
            public final Boolean invoke(@NotNull String str3) {
                Intrinsics.checkNotNullParameter(str3, "it");
                return Boolean.valueOf(!StringsKt.isBlank(str3));
            }
        });
        HashSet hashSet2 = new HashSet();
        Iterator it = filter2.iterator();
        while (it.hasNext()) {
            hashSet2.add(((String) it.next()) + ".com");
        }
        HashSet hashSet3 = hashSet2;
        ArrayList arrayList = new ArrayList();
        for (Object obj : hashSet3) {
            if (NetUtil.testHttpNetwork(new URL((String) obj))) {
                arrayList.add(obj);
            }
        }
        System.out.println((Object) CollectionsKt.joinToString$default(CollectionsKt.take(arrayList, 10), "\n", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.platon.pulsar.test.VerboseCrawler$load$7
            @NotNull
            public final CharSequence invoke(@NotNull String str3) {
                Intrinsics.checkNotNullParameter(str3, "it");
                return str3;
            }
        }, 30, (Object) null));
        this.logger.info("Export to: file://{}", PulsarSession.DefaultImpls.export$default(this.session, parse$default, (String) null, 2, (Object) null));
    }

    @NotNull
    public final Collection<WebPage> loadOutPages(@NotNull String str, @NotNull String str2) {
        Intrinsics.checkNotNullParameter(str, "portalUrl");
        Intrinsics.checkNotNullParameter(str2, "args");
        return loadOutPages(str, LoadOptions.Companion.parse(str2, this.session.getSessionConfig()));
    }

    @NotNull
    public final Collection<WebPage> loadOutPages(@NotNull String str, @NotNull LoadOptions loadOptions) {
        Intrinsics.checkNotNullParameter(str, "portalUrl");
        Intrinsics.checkNotNullParameter(loadOptions, "options");
        loadOptions.addEventHandler(this.eventHandler);
        WebPage load = this.session.load(str, loadOptions);
        loadOptions.removeEventHandler(this.eventHandler);
        if (!load.getProtocolStatus().isSuccess()) {
            this.logger.warn("Failed to load page | {}", str);
        }
        FeaturedDocument parse$default = PulsarSession.DefaultImpls.parse$default(this.session, load, false, 2, (Object) null);
        parse$default.absoluteLinks();
        parse$default.stripScripts();
        this.logger.info("Portal page is exported to: file://" + PulsarSession.DefaultImpls.export$default(this.session, parse$default, (String) null, 2, (Object) null));
        List<String> select$default = FeaturedDocument.select$default(parse$default, loadOptions.getCorrectedOutLinkSelector(), 0, 0, new Function1<Element, String>() { // from class: ai.platon.pulsar.test.VerboseCrawler$loadOutPages$links$1
            public final String invoke(@NotNull Element element) {
                Intrinsics.checkNotNullParameter(element, "it");
                return element.attr("abs:href");
            }
        }, 6, (Object) null);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (String str2 : select$default) {
            PulsarSession session = getSession();
            Intrinsics.checkNotNullExpressionValue(str2, "it");
            linkedHashSet.add(PulsarSession.DefaultImpls.normalize$default(session, str2, loadOptions, false, 4, (Object) null));
        }
        List take = CollectionsKt.take(linkedHashSet, loadOptions.getTopLinks());
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(take, 10));
        Iterator it = take.iterator();
        while (it.hasNext()) {
            arrayList.add(((NormUrl) it.next()).getSpec());
        }
        ArrayList arrayList2 = arrayList;
        this.logger.info("Total {} items to load", Integer.valueOf(arrayList2.size()));
        LoadOptions createItemOptions = loadOptions.createItemOptions(this.session.getSessionConfig());
        createItemOptions.setParse(true);
        loadOptions.addEventHandler(this.eventHandler);
        Collection<WebPage> loadAll$default = PulsarSession.DefaultImpls.loadAll$default(this.session, arrayList2, createItemOptions, false, 4, (Object) null);
        loadOptions.removeEventHandler(this.eventHandler);
        return loadAll$default;
    }

    public final void loadAllNews(@NotNull String str, @NotNull LoadOptions loadOptions) {
        Intrinsics.checkNotNullParameter(str, "portalUrl");
        Intrinsics.checkNotNullParameter(loadOptions, "options");
        Collection simpleLiveLinks = this.session.load(str, loadOptions).getSimpleLiveLinks();
        Intrinsics.checkNotNullExpressionValue(simpleLiveLinks, "portal.simpleLiveLinks");
        Collection collection = simpleLiveLinks;
        ArrayList arrayList = new ArrayList();
        for (Object obj : collection) {
            String str2 = (String) obj;
            Intrinsics.checkNotNullExpressionValue(str2, "it");
            if (StringsKt.contains$default(str2, "jinrong", false, 2, (Object) null)) {
                arrayList.add(obj);
            }
        }
        for (WebPage webPage : PulsarSession.DefaultImpls.loadAll$default(this.session, arrayList, this.session.options("--parse"), false, 4, (Object) null)) {
            System.out.println((Object) (webPage.getUrl() + " " + webPage.getContentTitle()));
        }
    }

    public final void extractAds() {
        Iterable<Element> select$default = FeaturedDocument.select$default(this.session.loadDocument("https://wuhan.baixing.com/xianhualipin/a1100414743.html", ""), "a[href~=mssp.baidu]", 0, 0, 6, (Object) null);
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(select$default, 10));
        for (Element element : select$default) {
            arrayList.add(Unit.INSTANCE);
        }
    }

    public final void scan(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "baseUri");
        this.session.getContext().scan(str).forEachRemaining(VerboseCrawler::m0scan$lambda10);
    }

    public final void truncate() {
        WebDb.truncate$default(this.session.getContext().getWebDb(), false, 1, (Object) null);
    }

    /* renamed from: scan$lambda-10, reason: not valid java name */
    private static final void m0scan$lambda10(WebPage webPage) {
        int length;
        Intrinsics.checkNotNullParameter(webPage, "it");
        ByteBuffer content = webPage.getContent();
        if (content == null) {
            length = 0;
        } else {
            byte[] array = content.array();
            length = array == null ? 0 : array.length;
        }
        System.out.println(length);
    }

    public VerboseCrawler() {
        this(null, 1, null);
    }
}
