/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.crawl.protocol.http;

import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.config.VolatileConfig;
import ai.platon.pulsar.crawl.protocol.Protocol;
import ai.platon.pulsar.crawl.protocol.Response;
import ai.platon.pulsar.crawl.protocol.RobotRulesParser;
import ai.platon.pulsar.crawl.protocol.http.AbstractHttpProtocol;
import ai.platon.pulsar.persist.PageDatum;
import ai.platon.pulsar.persist.WebPage;
import crawlercommons.robots.BaseRobotRules;
import java.net.URL;
import java.util.Map;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000,\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\b\u0016\u0018\u0000 \r2\u00020\u0001:\u0001\rB\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0018\u0010\u0007\u001a\u00020\b2\u0006\u0010\t\u001a\u00020\n2\u0006\u0010\u000b\u001a\u00020\fH\u0016R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u000e"}, d2={"Lai/platon/pulsar/crawl/protocol/http/HttpRobotRulesParser;", "Lai/platon/pulsar/crawl/protocol/RobotRulesParser;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/common/config/ImmutableConfig;)V", "allowForbidden", "", "getRobotRulesSet", "Lcrawlercommons/robots/BaseRobotRules;", "protocol", "Lai/platon/pulsar/crawl/protocol/Protocol;", "url", "Ljava/net/URL;", "Companion", "pulsar-skeleton"})
public class HttpRobotRulesParser
extends RobotRulesParser {
    @NotNull
    public static final Companion Companion = new Companion(null);
    private final boolean allowForbidden;
    private static final Logger LOG = LoggerFactory.getLogger(HttpRobotRulesParser.class);

    public HttpRobotRulesParser(@NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        super(conf);
        this.allowForbidden = conf.getBoolean("http.robots.403.allow", false);
    }

    @Override
    @NotNull
    public BaseRobotRules getRobotRulesSet(@NotNull Protocol protocol, @NotNull URL url) {
        BaseRobotRules baseRobotRules;
        Intrinsics.checkNotNullParameter((Object)protocol, (String)"protocol");
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        VolatileConfig volatileConfig = this.getConf().toVolatileConfig();
        String cacheKey = Companion.getCacheKey(url);
        BaseRobotRules robotRules = RobotRulesParser.Companion.getCACHE().get(cacheKey);
        boolean cacheRule = true;
        if (robotRules == null) {
            URL redir = null;
            if (LOG.isTraceEnabled()) {
                LOG.trace("cache miss " + url);
            }
            try {
                PageDatum pageDatum;
                byte[] content;
                Object object;
                AbstractHttpProtocol abstractHttpProtocol;
                AbstractHttpProtocol abstractHttpProtocol2 = abstractHttpProtocol = protocol instanceof AbstractHttpProtocol ? (AbstractHttpProtocol)protocol : null;
                if (abstractHttpProtocol == null) {
                    return RobotRulesParser.EMPTY_RULES;
                }
                AbstractHttpProtocol http = abstractHttpProtocol;
                WebPage webPage = WebPage.newWebPage((String)new URL(url, "/robots.txt").toString(), (VolatileConfig)volatileConfig);
                Intrinsics.checkNotNullExpressionValue((Object)webPage, (String)"newWebPage(URL(url, \"/ro\u2026String(), volatileConfig)");
                WebPage page = webPage;
                Response response2 = http.getResponse(page, true);
                if (response2 == null) {
                    return RobotRulesParser.EMPTY_RULES;
                }
                Response response3 = response2;
                if (response3.getHttpCode() == 301 || response3.getHttpCode() == 302) {
                    String redirection = response3.getHeader("Location");
                    if (redirection == null) {
                        redirection = response3.getHeader("location");
                    }
                    if (redirection != null) {
                        redir = !StringsKt.startsWith$default((String)redirection, (String)"http", (boolean)false, (int)2, null) ? new URL(url, redirection) : new URL(redirection);
                        object = WebPage.newWebPage((String)redir.toString(), (VolatileConfig)volatileConfig);
                        Intrinsics.checkNotNullExpressionValue((Object)object, (String)"newWebPage(redir.toString(), volatileConfig)");
                        response3 = http.getResponse((WebPage)object, true);
                    }
                }
                byte[] byArray = (object = response3) == null ? null : (content = (pageDatum = ((Response)object).getPageDatum()) == null ? null : pageDatum.getContent());
                if (response3 != null && content != null) {
                    if (response3.getHttpCode() == 200) {
                        object = url.toString();
                        Intrinsics.checkNotNullExpressionValue((Object)object, (String)"url.toString()");
                        Object object2 = object;
                        object = response3.getHeader("Content-Type");
                        robotRules = this.parseRules((String)object2, content, (String)(object == null ? "" : object), this.getAgentNames());
                    } else if (response3.getHttpCode() == 403 && !this.allowForbidden) {
                        robotRules = RobotRulesParser.Companion.getFORBID_ALL_RULES();
                    } else if (response3.getHttpCode() >= 500) {
                        cacheRule = false;
                        robotRules = RobotRulesParser.EMPTY_RULES;
                    } else {
                        robotRules = RobotRulesParser.EMPTY_RULES;
                    }
                }
            }
            catch (Throwable t) {
                if (LOG.isInfoEnabled()) {
                    LOG.info("Couldn't get robots.txt for " + url + ": " + t);
                }
                cacheRule = false;
                robotRules = RobotRulesParser.EMPTY_RULES;
            }
            if (cacheRule) {
                Map map2 = RobotRulesParser.Companion.getCACHE();
                boolean bl = false;
                map2.put(cacheKey, robotRules);
                if (redir != null && !StringsKt.equals((String)redir.getHost(), (String)url.getHost(), (boolean)true)) {
                    map2 = RobotRulesParser.Companion.getCACHE();
                    String string = Companion.getCacheKey(redir);
                    boolean bl2 = false;
                    map2.put(string, robotRules);
                }
            }
        }
        return (baseRobotRules = robotRules) == null ? RobotRulesParser.EMPTY_RULES : baseRobotRules;
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000 \n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u000e\n\u0000\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u0010\u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000bH\u0004R\u0019\u0010\u0003\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0006\u0010\u0007\u00a8\u0006\f"}, d2={"Lai/platon/pulsar/crawl/protocol/http/HttpRobotRulesParser$Companion;", "", "()V", "LOG", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getLOG", "()Lorg/slf4j/Logger;", "getCacheKey", "", "url", "Ljava/net/URL;", "pulsar-skeleton"})
    public static final class Companion {
        private Companion() {
        }

        public final Logger getLOG() {
            return LOG;
        }

        @NotNull
        protected final String getCacheKey(@NotNull URL url) {
            Intrinsics.checkNotNullParameter((Object)url, (String)"url");
            String string = url.getProtocol();
            Intrinsics.checkNotNullExpressionValue((Object)string, (String)"url.protocol");
            boolean bl = false;
            String string2 = string.toLowerCase();
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"(this as java.lang.String).toLowerCase()");
            String protocol = string2;
            String string3 = url.getHost();
            Intrinsics.checkNotNullExpressionValue((Object)string3, (String)"url.host");
            boolean bl2 = false;
            String string4 = string3.toLowerCase();
            Intrinsics.checkNotNullExpressionValue((Object)string4, (String)"(this as java.lang.String).toLowerCase()");
            String host = string4;
            int port = url.getPort();
            if (port == -1) {
                port = url.getDefaultPort();
            }
            return protocol + ":" + host + ":" + port;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

