/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.crawl.parse;

import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.config.Parameterized;
import ai.platon.pulsar.common.config.Params;
import ai.platon.pulsar.common.options.LinkOptions;
import ai.platon.pulsar.crawl.common.URLUtil;
import ai.platon.pulsar.crawl.filter.CrawlFilters;
import ai.platon.pulsar.persist.HyperlinkPersistable;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.metadata.Name;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Predicate;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * Illegal identifiers - consider using --renameillegalidents true
 */
@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000n\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\b\n\u0000\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010#\n\u0002\b\u0002\n\u0002\u0010!\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0002\n\u0002\b\u0002\u0018\u0000 *2\u00020\u0001:\u0001*B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0002\u0010\u0006J\u0014\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020!0 2\u0006\u0010\"\u001a\u00020#J\u000e\u0010$\u001a\u00020\n2\u0006\u0010%\u001a\u00020!J\b\u0010&\u001a\u00020'H\u0016J\u000e\u0010(\u001a\u00020)2\u0006\u0010\"\u001a\u00020#R\u0011\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\t\u001a\u00020\nX\u0082\u000e\u00a2\u0006\u0002\n\u0000R\u0017\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\f8F\u00a2\u0006\u0006\u001a\u0004\b\u000e\u0010\u000fR\u0016\u0010\u0010\u001a\n \u0012*\u0004\u0018\u00010\u00110\u0011X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0013\u001a\u00020\u0014X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0010\u0010\u0015\u001a\u0004\u0018\u00010\u0016X\u0082\u000e\u00a2\u0006\u0002\n\u0000R\u0014\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\r0\u0018X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0019\u001a\u00020\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\u001a\u001a\b\u0012\u0004\u0012\u00020\r0\u001bX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u001c\u001a\u00020\u0014X\u0082\u000e\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u001d\u001a\u00020\u0014X\u0082\u000e\u00a2\u0006\u0002\n\u0000R\u0010\u0010\u001e\u001a\u0004\u0018\u00010\rX\u0082\u000e\u00a2\u0006\u0002\n\u0000\u00a8\u0006+"}, d2={"Lai/platon/pulsar/crawl/parse/LinkFilter;", "Lai/platon/pulsar/common/config/Parameterized;", "crawlFilters", "Lai/platon/pulsar/crawl/filter/CrawlFilters;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/crawl/filter/CrawlFilters;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "getConf", "()Lai/platon/pulsar/common/config/ImmutableConfig;", "debugLevel", "", "filterReport", "", "", "getFilterReport", "()Ljava/util/List;", "groupMode", "Lai/platon/pulsar/crawl/common/URLUtil$GroupMode;", "kotlin.jvm.PlatformType", "ignoreExternalLinks", "", "linkOptions", "Lai/platon/pulsar/common/options/LinkOptions;", "links", "", "maxUrlLength", "mutableFilterReport", "", "noFilter", "reparseLinks", "sourceHost", "asPredicate", "Ljava/util/function/Predicate;", "Lai/platon/pulsar/persist/HyperlinkPersistable;", "page", "Lai/platon/pulsar/persist/WebPage;", "filter", "link", "getParams", "Lai/platon/pulsar/common/config/Params;", "reset", "", "Companion", "pulsar-skeleton"})
public final class LinkFilter
implements Parameterized {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final CrawlFilters crawlFilters;
    @NotNull
    private final ImmutableConfig conf;
    private final URLUtil.GroupMode groupMode;
    private final boolean ignoreExternalLinks;
    private final int maxUrlLength;
    @Nullable
    private String sourceHost;
    @Nullable
    private LinkOptions linkOptions;
    private boolean reparseLinks;
    private boolean noFilter;
    private int debugLevel;
    @NotNull
    private final Set<String> links;
    @NotNull
    private final List<String> mutableFilterReport;
    private static final Logger LOG = LoggerFactory.getLogger(LinkFilter.class);

    public LinkFilter(@NotNull CrawlFilters crawlFilters, @NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)crawlFilters, (String)"crawlFilters");
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.crawlFilters = crawlFilters;
        this.conf = conf;
        this.groupMode = (URLUtil.GroupMode)this.conf.getEnum("fetch.queue.mode", (Enum)URLUtil.GroupMode.BY_HOST);
        this.ignoreExternalLinks = this.conf.getBoolean("parse.ignore.external.links", false);
        this.maxUrlLength = this.conf.getInt("parse.max.url.length", 1024);
        this.links = new TreeSet();
        boolean bl = false;
        this.mutableFilterReport = new ArrayList();
    }

    @NotNull
    public final ImmutableConfig getConf() {
        return this.conf;
    }

    @NotNull
    public final List<String> getFilterReport() {
        return this.mutableFilterReport;
    }

    @NotNull
    public Params getParams() {
        Object[] objectArray = new Object[]{"ignoreExternalLinks", this.ignoreExternalLinks, "maxUrlLength", this.maxUrlLength, "defaultAnchorLenMin", this.conf.get("parse.min.anchor.length"), "defaultAnchorLenMax", this.conf.get("parse.max.anchor.length")};
        Params params = Params.of((String)"groupMode", (Object)((Object)this.groupMode), (Object[])objectArray);
        Intrinsics.checkNotNullExpressionValue((Object)params, (String)"of(\n                \"gro\u2026_ANCHOR_LENGTH]\n        )");
        return params;
    }

    public final void reset(@NotNull WebPage page) {
        String string;
        Object object;
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        this.linkOptions = LinkOptions.Companion.parse(page.getArgs().toString(), this.conf);
        if (this.ignoreExternalLinks) {
            object = page.getUrl();
            Intrinsics.checkNotNullExpressionValue((Object)object, (String)"page.url");
            String string2 = object;
            object = this.groupMode;
            Intrinsics.checkNotNullExpressionValue((Object)object, (String)"groupMode");
            string = URLUtil.INSTANCE.getHost(string2, (URLUtil.GroupMode)((Object)object));
        } else {
            string = "";
        }
        this.sourceHost = string;
        this.reparseLinks = page.getVariables().contains(Name.REPARSE_LINKS);
        this.noFilter = page.getVariables().contains(Name.PARSE_NO_LINK_FILTER);
        object = page.getVariables().get(Name.PARSE_LINK_FILTER_DEBUG_LEVEL, (Object)0);
        Intrinsics.checkNotNullExpressionValue((Object)object, (String)"page.variables.get(Name.\u2026NK_FILTER_DEBUG_LEVEL, 0)");
        this.debugLevel = ((Number)object).intValue();
        this.links.clear();
        object = page.getLinks();
        Intrinsics.checkNotNullExpressionValue((Object)object, (String)"page.links");
        Iterable $this$forEach$iv = (Iterable)object;
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            CharSequence l = (CharSequence)element$iv;
            boolean bl = false;
            this.links.add(((Object)l).toString());
        }
        this.mutableFilterReport.clear();
    }

    @NotNull
    public final Predicate<HyperlinkPersistable> asPredicate(@NotNull WebPage page) {
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        this.reset(page);
        return arg_0 -> LinkFilter.asPredicate$lambda-1(this, arg_0);
    }

    public final int filter(@NotNull HyperlinkPersistable link) {
        String destHost;
        String url;
        block11: {
            block10: {
                Intrinsics.checkNotNullParameter((Object)link, (String)"link");
                if (this.noFilter) {
                    return 0;
                }
                url = link.getUrl();
                CharSequence charSequence = link.getUrl();
                Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"link.url");
                charSequence = charSequence;
                boolean bl = false;
                if (charSequence.length() == 0) {
                    return 110;
                }
                if (link.getUrl().length() > this.maxUrlLength) {
                    return 112;
                }
                Object object = url;
                Intrinsics.checkNotNullExpressionValue((Object)object, (String)"url");
                String string = object;
                object = this.groupMode;
                Intrinsics.checkNotNullExpressionValue((Object)object, (String)"groupMode");
                destHost = URLUtil.INSTANCE.getHost(string, (URLUtil.GroupMode)((Object)object));
                if (destHost == null) break block10;
                object = destHost;
                boolean bl2 = false;
                if (!(object.length() == 0)) break block11;
            }
            return 104;
        }
        if (this.ignoreExternalLinks && !Intrinsics.areEqual((Object)this.sourceHost, (Object)destHost)) {
            return 106;
        }
        LinkOptions linkOptions = this.linkOptions;
        Intrinsics.checkNotNull((Object)linkOptions);
        CharSequence charSequence = link.getUrl();
        Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"link.url");
        String string = charSequence;
        charSequence = link.getText();
        Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"link.text");
        int r = linkOptions.filter(string, (String)charSequence);
        if (r > 0) {
            return 2000 + r;
        }
        if (!this.reparseLinks && this.links.contains(link.getUrl())) {
            return 118;
        }
        charSequence = link.getUrl();
        Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"link.url");
        url = this.crawlFilters.normalizeToEmpty((String)charSequence);
        charSequence = url;
        Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"url");
        charSequence = charSequence;
        boolean bl = false;
        if (charSequence.length() == 0) {
            return 1000;
        }
        return !this.reparseLinks && this.links.contains(url) ? 120 : 0;
    }

    private static final boolean asPredicate$lambda-1(LinkFilter this$0, HyperlinkPersistable l) {
        Intrinsics.checkNotNullParameter((Object)this$0, (String)"this$0");
        Intrinsics.checkNotNullParameter((Object)l, (String)"l");
        int r = this$0.filter(l);
        if (this$0.debugLevel > 0) {
            this$0.mutableFilterReport.add(r + " <- " + l.getUrl() + "\t" + l.getText());
        }
        return r == 0;
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u0014\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u0019\u0010\u0003\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0006\u0010\u0007\u00a8\u0006\b"}, d2={"Lai/platon/pulsar/crawl/parse/LinkFilter$Companion;", "", "()V", "LOG", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getLOG", "()Lorg/slf4j/Logger;", "pulsar-skeleton"})
    public static final class Companion {
        private Companion() {
        }

        public final Logger getLOG() {
            return LOG;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

