/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.crawl.filter;

import ai.platon.pulsar.common.ExceptionsKt;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.crawl.filter.BlockFilter;
import ai.platon.pulsar.crawl.filter.TextFilter;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.metadata.PageCategory;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import java.util.regex.Pattern;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000H\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u000e\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\b\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u000e\u0018\u0000 32\u00020\u0001:\u00013B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u000e\u0010 \u001a\u00020!2\u0006\u0010\"\u001a\u00020!J\u000e\u0010#\u001a\u00020$2\u0006\u0010%\u001a\u00020&J\u000e\u0010'\u001a\u00020$2\u0006\u0010(\u001a\u00020\fJ\u000e\u0010)\u001a\u00020$2\u0006\u0010%\u001a\u00020&J\u000e\u0010*\u001a\u00020$2\u0006\u0010(\u001a\u00020\fJ\u000e\u0010+\u001a\u00020$2\u0006\u0010(\u001a\u00020\fJ\u000e\u0010,\u001a\u00020$2\u0006\u0010(\u001a\u00020\fJ\u0010\u0010-\u001a\u00020$2\b\u0010.\u001a\u0004\u0018\u00010\fJ\u0010\u0010/\u001a\u00020$2\b\u00100\u001a\u0004\u0018\u00010\fJ\u0010\u00101\u001a\u00020$2\b\u0010(\u001a\u0004\u0018\u00010\fJ\b\u00102\u001a\u00020\fH\u0016R\"\u0010\u0007\u001a\u0004\u0018\u00010\u00062\b\u0010\u0005\u001a\u0004\u0018\u00010\u0006@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\b\u0010\tR\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\n\u0010\u000bR\"\u0010\r\u001a\u0004\u0018\u00010\f2\b\u0010\u0005\u001a\u0004\u0018\u00010\f@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000e\u0010\u000fR\u0011\u0010\u0010\u001a\u00020\u0011\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0012\u0010\u0013R\"\u0010\u0014\u001a\u0004\u0018\u00010\f2\b\u0010\u0005\u001a\u0004\u0018\u00010\f@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0015\u0010\u000fR\"\u0010\u0016\u001a\u0004\u0018\u00010\f2\b\u0010\u0005\u001a\u0004\u0018\u00010\f@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0017\u0010\u000fR\"\u0010\u0018\u001a\u0004\u0018\u00010\f2\b\u0010\u0005\u001a\u0004\u0018\u00010\f@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0019\u0010\u000fR\"\u0010\u001b\u001a\u0004\u0018\u00010\u001a2\b\u0010\u0005\u001a\u0004\u0018\u00010\u001a@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u001c\u0010\u001dR\"\u0010\u001e\u001a\u0004\u0018\u00010\f2\b\u0010\u0005\u001a\u0004\u0018\u00010\f@BX\u0086\u000e\u00a2\u0006\b\n\u0000\u001a\u0004\b\u001f\u0010\u000f\u00a8\u00064"}, d2={"Lai/platon/pulsar/crawl/filter/CrawlFilter;", "", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/common/config/ImmutableConfig;)V", "<set-?>", "Lai/platon/pulsar/crawl/filter/BlockFilter;", "blockFilter", "getBlockFilter", "()Lai/platon/pulsar/crawl/filter/BlockFilter;", "getConf", "()Lai/platon/pulsar/common/config/ImmutableConfig;", "", "endKey", "getEndKey", "()Ljava/lang/String;", "pageType", "Lai/platon/pulsar/persist/metadata/PageCategory;", "getPageType", "()Lai/platon/pulsar/persist/metadata/PageCategory;", "reversedEndKey", "getReversedEndKey", "reversedStartKey", "getReversedStartKey", "startKey", "getStartKey", "Lai/platon/pulsar/crawl/filter/TextFilter;", "textFilter", "getTextFilter", "()Lai/platon/pulsar/crawl/filter/TextFilter;", "urlFilter", "getUrlFilter", "filter", "Lai/platon/pulsar/persist/WebPage;", "page", "isAllowed", "", "node", "Lorg/w3c/dom/Node;", "isDetailUrl", "url", "isDisallowed", "isIndexUrl", "isMediaUrl", "isSearchUrl", "testKeyRangeSatisfied", "reversedUrl", "testTextSatisfied", "text", "testUrlSatisfied", "toString", "Companion", "pulsar-skeleton"})
public final class CrawlFilter {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final ImmutableConfig conf;
    @NotNull
    private final PageCategory pageType;
    @Nullable
    private String urlFilter;
    @Nullable
    private TextFilter textFilter;
    @Nullable
    private BlockFilter blockFilter;
    @Nullable
    private String startKey;
    @Nullable
    private String endKey;
    @Nullable
    private String reversedStartKey;
    @Nullable
    private String reversedEndKey;
    private static final Logger LOG = LoggerFactory.getLogger(CrawlFilter.class);
    @NotNull
    private static final String[] MEDIA_URL_SUFFIXES;
    @NotNull
    private static final Pattern[] INDEX_PAGE_URL_PATTERNS;
    private static final Pattern SEARCH_PAGE_URL_PATTERN;
    @NotNull
    private static final Pattern[] DETAIL_PAGE_URL_PATTERNS;
    private static final Pattern MEDIA_PAGE_URL_PATTERN;

    public CrawlFilter(@NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.conf = conf;
        this.pageType = PageCategory.UNKNOWN;
        try {
            boolean bl;
            String string;
            CharSequence charSequence;
            if (this.urlFilter != null) {
                // empty if block
            }
            if (this.startKey != null) {
                String string2 = this.startKey;
                Intrinsics.checkNotNull((Object)string2);
                charSequence = string2;
                string = "\\u0001";
                boolean bl2 = false;
                string = new Regex(string);
                String string3 = "\u0001";
                bl = false;
                String string4 = this.startKey = string.replace(charSequence, string3);
                Intrinsics.checkNotNull((Object)string4);
                charSequence = string4;
                string = "\\\\u0001";
                boolean bl3 = false;
                string = new Regex(string);
                String string5 = "\u0001";
                bl = false;
                String string6 = this.startKey = string.replace(charSequence, string5);
                Intrinsics.checkNotNull((Object)string6);
                this.reversedStartKey = UrlUtils.reverseUrl((String)string6);
            }
            if (this.endKey != null) {
                String string7 = this.endKey;
                Intrinsics.checkNotNull((Object)string7);
                charSequence = string7;
                string = "\\uFFFF";
                boolean bl4 = false;
                string = new Regex(string);
                String string8 = "\uffff";
                bl = false;
                String string9 = this.endKey = string.replace(charSequence, string8);
                Intrinsics.checkNotNull((Object)string9);
                charSequence = string9;
                string = "\\\\uFFFF";
                boolean bl5 = false;
                string = new Regex(string);
                String string10 = "\uffff";
                bl = false;
                String string11 = this.endKey = string.replace(charSequence, string10);
                Intrinsics.checkNotNull((Object)string11);
                this.reversedEndKey = UrlUtils.reverseUrl((String)string11);
            }
        }
        catch (RuntimeException e) {
            LOG.error(ExceptionsKt.stringify$default((Throwable)e, null, null, (int)3, null));
        }
    }

    @NotNull
    public final ImmutableConfig getConf() {
        return this.conf;
    }

    @NotNull
    public final PageCategory getPageType() {
        return this.pageType;
    }

    @Nullable
    public final String getUrlFilter() {
        return this.urlFilter;
    }

    @Nullable
    public final TextFilter getTextFilter() {
        return this.textFilter;
    }

    @Nullable
    public final BlockFilter getBlockFilter() {
        return this.blockFilter;
    }

    @Nullable
    public final String getStartKey() {
        return this.startKey;
    }

    @Nullable
    public final String getEndKey() {
        return this.endKey;
    }

    @Nullable
    public final String getReversedStartKey() {
        return this.reversedStartKey;
    }

    @Nullable
    public final String getReversedEndKey() {
        return this.reversedEndKey;
    }

    @NotNull
    public final WebPage filter(@NotNull WebPage page) {
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        return page;
    }

    public final boolean testUrlSatisfied(@Nullable String url) {
        return url != null;
    }

    public final boolean testKeyRangeSatisfied(@Nullable String reversedUrl) {
        return Companion.keyGreaterEqual(reversedUrl, this.reversedStartKey) && Companion.keyLessEqual(reversedUrl, this.reversedEndKey);
    }

    public final boolean testTextSatisfied(@Nullable String text) {
        boolean bl;
        if (text == null) {
            return false;
        }
        if (this.textFilter == null) {
            bl = true;
        } else {
            TextFilter textFilter = this.textFilter;
            Intrinsics.checkNotNull((Object)textFilter);
            bl = textFilter.test(text);
        }
        return bl;
    }

    public final boolean isAllowed(@NotNull Node node) {
        boolean bl;
        Intrinsics.checkNotNullParameter((Object)node, (String)"node");
        if (this.blockFilter == null) {
            bl = true;
        } else {
            BlockFilter blockFilter = this.blockFilter;
            Intrinsics.checkNotNull((Object)blockFilter);
            bl = blockFilter.isAllowed(node);
        }
        return bl;
    }

    public final boolean isDisallowed(@NotNull Node node) {
        Intrinsics.checkNotNullParameter((Object)node, (String)"node");
        return !this.isAllowed(node);
    }

    public final boolean isDetailUrl(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        return this.pageType == PageCategory.DETAIL && this.testUrlSatisfied(url);
    }

    public final boolean isSearchUrl(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        return this.pageType == PageCategory.SEARCH && this.testUrlSatisfied(url);
    }

    public final boolean isMediaUrl(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        return this.pageType == PageCategory.MEDIA && this.testUrlSatisfied(url);
    }

    public final boolean isIndexUrl(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        return this.pageType == PageCategory.INDEX && this.testUrlSatisfied(url);
    }

    @NotNull
    public String toString() {
        Gson gson = new GsonBuilder().excludeFieldsWithoutExposeAnnotation().create();
        String string = gson.toJson((Object)this);
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"gson.toJson(this)");
        return string;
    }

    static {
        Object[] objectArray = new String[]{"js", "css", "jpg", "png", "jpeg", "gif"};
        MEDIA_URL_SUFFIXES = objectArray;
        objectArray = new Pattern[]{Pattern.compile(".+tieba.baidu.com/.+search.+"), Pattern.compile(".+(index|list|tags|chanel).+")};
        INDEX_PAGE_URL_PATTERNS = objectArray;
        SEARCH_PAGE_URL_PATTERN = Pattern.compile(".+(search|query|select).+");
        objectArray = new Pattern[]{Pattern.compile(".+tieba.baidu.com/p/(\\d+)"), Pattern.compile(".+(detail|item|article|book|good|product|thread|view|post|content|/20[012][0-9]/{0,1}[01][0-9]/|/20[012]-[0-9]{0,1}-[01][0-9]/|/\\d{2,}/\\d{5,}|\\d{7,}).+")};
        DETAIL_PAGE_URL_PATTERNS = objectArray;
        MEDIA_PAGE_URL_PATTERN = Pattern.compile(".+(pic|picture|photo|avatar|photoshow|video).+");
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u00008\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0010\u000e\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u000b\n\u0002\b\u0004\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u000e\u0010\u001a\u001a\u00020\u001b2\u0006\u0010\u001c\u001a\u00020\u0014J\u000e\u0010\u001d\u001a\u00020\u001b2\u0006\u0010\u001c\u001a\u00020\u0014J\u001a\u0010\u001e\u001a\u00020\u001f2\b\u0010 \u001a\u0004\u0018\u00010\u00142\b\u0010!\u001a\u0004\u0018\u00010\u0014J\u001a\u0010\"\u001a\u00020\u001f2\b\u0010 \u001a\u0004\u0018\u00010\u00142\b\u0010!\u001a\u0004\u0018\u00010\u0014R!\u0010\u0003\u001a\u0010\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u00050\u0004\u00a2\u0006\n\n\u0002\u0010\t\u001a\u0004\b\u0007\u0010\bR!\u0010\n\u001a\u0010\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u00050\u0004\u00a2\u0006\n\n\u0002\u0010\t\u001a\u0004\b\u000b\u0010\bR\u0019\u0010\f\u001a\n \u0006*\u0004\u0018\u00010\r0\r\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000e\u0010\u000fR\u0019\u0010\u0010\u001a\n \u0006*\u0004\u0018\u00010\u00050\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0011\u0010\u0012R\u0019\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u00140\u0004\u00a2\u0006\n\n\u0002\u0010\u0017\u001a\u0004\b\u0015\u0010\u0016R\u0019\u0010\u0018\u001a\n \u0006*\u0004\u0018\u00010\u00050\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0019\u0010\u0012\u00a8\u0006#"}, d2={"Lai/platon/pulsar/crawl/filter/CrawlFilter$Companion;", "", "()V", "DETAIL_PAGE_URL_PATTERNS", "", "Ljava/util/regex/Pattern;", "kotlin.jvm.PlatformType", "getDETAIL_PAGE_URL_PATTERNS", "()[Ljava/util/regex/Pattern;", "[Ljava/util/regex/Pattern;", "INDEX_PAGE_URL_PATTERNS", "getINDEX_PAGE_URL_PATTERNS", "LOG", "Lorg/slf4j/Logger;", "getLOG", "()Lorg/slf4j/Logger;", "MEDIA_PAGE_URL_PATTERN", "getMEDIA_PAGE_URL_PATTERN", "()Ljava/util/regex/Pattern;", "MEDIA_URL_SUFFIXES", "", "getMEDIA_URL_SUFFIXES", "()[Ljava/lang/String;", "[Ljava/lang/String;", "SEARCH_PAGE_URL_PATTERN", "getSEARCH_PAGE_URL_PATTERN", "getPageCategory", "Lai/platon/pulsar/persist/metadata/PageCategory;", "url", "guessPageCategory", "keyGreaterEqual", "", "test", "bound", "keyLessEqual", "pulsar-skeleton"})
    public static final class Companion {
        private Companion() {
        }

        public final Logger getLOG() {
            return LOG;
        }

        @NotNull
        public final String[] getMEDIA_URL_SUFFIXES() {
            return MEDIA_URL_SUFFIXES;
        }

        @NotNull
        public final Pattern[] getINDEX_PAGE_URL_PATTERNS() {
            return INDEX_PAGE_URL_PATTERNS;
        }

        public final Pattern getSEARCH_PAGE_URL_PATTERN() {
            return SEARCH_PAGE_URL_PATTERN;
        }

        @NotNull
        public final Pattern[] getDETAIL_PAGE_URL_PATTERNS() {
            return DETAIL_PAGE_URL_PATTERNS;
        }

        public final Pattern getMEDIA_PAGE_URL_PATTERN() {
            return MEDIA_PAGE_URL_PATTERN;
        }

        @NotNull
        public final PageCategory getPageCategory(@NotNull String url) {
            Intrinsics.checkNotNullParameter((Object)url, (String)"url");
            if (StringsKt.contains$default((CharSequence)url, (CharSequence)"amazon.com", (boolean)false, (int)2, null)) {
                if (StringsKt.contains$default((CharSequence)url, (CharSequence)"/s?i=", (boolean)false, (int)2, null)) {
                    return PageCategory.INDEX;
                }
                if (StringsKt.contains$default((CharSequence)url, (CharSequence)"/dp/", (boolean)false, (int)2, null)) {
                    return PageCategory.DETAIL;
                }
            }
            return PageCategory.UNKNOWN;
        }

        @NotNull
        public final PageCategory guessPageCategory(@NotNull String url) {
            Intrinsics.checkNotNullParameter((Object)url, (String)"url");
            CharSequence charSequence = url;
            boolean bl = false;
            if (charSequence.length() == 0) {
                return PageCategory.UNKNOWN;
            }
            PageCategory pageCategory = PageCategory.UNKNOWN;
            String string = url;
            boolean bl2 = false;
            String string2 = string.toLowerCase();
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"(this as java.lang.String).toLowerCase()");
            String u = string2;
            if (StringsKt.endsWith$default((String)u, (String)"/", (boolean)false, (int)2, null)) {
                pageCategory = PageCategory.INDEX;
            } else if (StringUtils.countMatches((CharSequence)u, (CharSequence)"/") <= 3) {
                pageCategory = PageCategory.INDEX;
            } else {
                boolean bl3;
                Pattern it;
                Pattern element$iv;
                int n;
                int n2;
                Pattern[] patternArray;
                boolean $i$f$any;
                Pattern[] $this$any$iv;
                block14: {
                    $this$any$iv = this.getINDEX_PAGE_URL_PATTERNS();
                    $i$f$any = false;
                    patternArray = $this$any$iv;
                    n2 = patternArray.length;
                    for (n = 0; n < n2; ++n) {
                        it = element$iv = patternArray[n];
                        boolean bl4 = false;
                        if (!it.matcher(u).matches()) continue;
                        bl3 = true;
                        break block14;
                    }
                    bl3 = false;
                }
                if (bl3) {
                    pageCategory = PageCategory.INDEX;
                } else {
                    boolean bl5;
                    block15: {
                        $this$any$iv = this.getDETAIL_PAGE_URL_PATTERNS();
                        $i$f$any = false;
                        patternArray = $this$any$iv;
                        n2 = patternArray.length;
                        for (n = 0; n < n2; ++n) {
                            it = element$iv = patternArray[n];
                            boolean bl6 = false;
                            if (!it.matcher(u).matches()) continue;
                            bl5 = true;
                            break block15;
                        }
                        bl5 = false;
                    }
                    if (bl5) {
                        pageCategory = PageCategory.DETAIL;
                    } else if (this.getSEARCH_PAGE_URL_PATTERN().matcher(u).matches()) {
                        pageCategory = PageCategory.SEARCH;
                    } else if (this.getMEDIA_PAGE_URL_PATTERN().matcher(u).matches()) {
                        pageCategory = PageCategory.MEDIA;
                    }
                }
            }
            return pageCategory;
        }

        public final boolean keyGreaterEqual(@Nullable String test, @Nullable String bound) {
            if (test == null) {
                return false;
            }
            return bound == null ? true : test.compareTo(bound) >= 0;
        }

        public final boolean keyLessEqual(@Nullable String test, @Nullable String bound) {
            if (test == null) {
                return false;
            }
            return bound == null ? true : test.compareTo(bound) <= 0;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

