/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.crawl.parse.html;

import ai.platon.pulsar.common.DomUtil;
import ai.platon.pulsar.common.EncodingDetector;
import ai.platon.pulsar.common.NodeWalker;
import ai.platon.pulsar.common.Strings;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.crawl.filter.CrawlFilters;
import ai.platon.pulsar.crawl.parse.ParseResult;
import ai.platon.pulsar.crawl.parse.Parser;
import ai.platon.pulsar.crawl.parse.html.HTMLMetaTags;
import ai.platon.pulsar.crawl.parse.html.JsoupParser;
import ai.platon.pulsar.crawl.parse.html.ParseContext;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.HyperlinkPersistable;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.metadata.MultiMetadata;
import com.google.common.collect.Maps;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import kotlin.Deprecated;
import kotlin.Metadata;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.helper.W3CDom;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u00b6\u0001\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010#\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010$\n\u0002\b\u0002\n\u0002\u0010%\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000b\n\u0002\b\u0004\n\u0002\u0010\b\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\u0018\u00002\u00020\u0001:\u0001EB\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J4\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u00150\u00142\u0006\u0010\u0016\u001a\u00020\u00172\f\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u00150\u00142\u0006\u0010\u0019\u001a\u00020\u001a2\b\u0010\u001b\u001a\u0004\u0018\u00010\u001cJ\u001c\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u00150\u00142\u0006\u0010\u0016\u001a\u00020\u00172\u0006\u0010\u0019\u001a\u00020\u001aJ&\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u00150\u00142\u0006\u0010\u0016\u001a\u00020\u00172\u0006\u0010\u0019\u001a\u00020\u001a2\b\u0010\u001b\u001a\u0004\u0018\u00010\u001cJ\u000e\u0010\u001d\u001a\u00020\u001e2\u0006\u0010\u001f\u001a\u00020 J\u0010\u0010!\u001a\u0004\u0018\u00010\u00172\u0006\u0010\u0019\u001a\u00020\u001aJ0\u0010\"\u001a\u00020\u001e2\u0006\u0010\u0016\u001a\u00020\u00172\f\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u00150\u00142\u0006\u0010\u0019\u001a\u00020\u001a2\b\u0010\u001b\u001a\u0004\u0018\u00010\u001cH\u0002J\u001a\u0010#\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00060$2\u0006\u0010\u0019\u001a\u00020\u001aJ$\u0010%\u001a\u00020\u001e2\u0012\u0010&\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00060'2\u0006\u0010(\u001a\u00020\u001aH\u0002J\u001a\u0010)\u001a\u00020\u001e2\n\u0010*\u001a\u00060+j\u0002`,2\u0006\u0010\u0019\u001a\u00020\u001aJ\"\u0010)\u001a\u00020-2\n\u0010*\u001a\u00060+j\u0002`,2\u0006\u0010\u0019\u001a\u00020\u001a2\u0006\u0010.\u001a\u00020-J\u000e\u0010)\u001a\u00020\u00062\u0006\u0010\u0019\u001a\u00020\u001aJ\u001c\u0010/\u001a\u00020-2\n\u0010*\u001a\u00060+j\u0002`,2\u0006\u0010\u0019\u001a\u00020\u001aH\u0002J\u000e\u0010/\u001a\u00020\u00062\u0006\u0010\u0019\u001a\u00020\u001aJ,\u00100\u001a\u00020-2\n\u0010*\u001a\u00060+j\u0002`,2\u0006\u0010\u0019\u001a\u00020\u001a2\u0006\u0010.\u001a\u00020-2\u0006\u00101\u001a\u000202H\u0002J\u0010\u00103\u001a\u00020-2\u0006\u0010\u0019\u001a\u00020\u001aH\u0002J\u0010\u00104\u001a\u0002052\u0006\u00106\u001a\u000207H\u0002J\u000e\u00108\u001a\u0002092\u0006\u0010\u001f\u001a\u00020 J\u0018\u0010:\u001a\u00020\u001e2\u0006\u0010\u001f\u001a\u00020 2\u0006\u0010;\u001a\u00020<H\u0002J \u0010:\u001a\u0002072\u0006\u0010=\u001a\u00020\u00172\u0006\u0010>\u001a\u00020?2\u0006\u0010\u001f\u001a\u00020 H\u0003J(\u0010@\u001a\u00020-2\u0006\u0010\u0019\u001a\u00020\u001a2\u0006\u0010A\u001a\u00020B2\u0006\u0010C\u001a\u0002022\u0006\u0010D\u001a\u00020\rH\u0002R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u000e\u0010\t\u001a\u00020\nX\u0082\u000e\u00a2\u0006\u0002\n\u0000R*\u0010\u000b\u001a\u001e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\r0\fj\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\r`\u000eX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\u000f\u001a\n \u0011*\u0004\u0018\u00010\u00100\u0010X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0010\u0010\u0012\u001a\u0004\u0018\u00010\u0010X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006F"}, d2={"Lai/platon/pulsar/crawl/parse/html/PrimerParser;", "", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/common/config/ImmutableConfig;)V", "cachingPolicy", "", "getConf", "()Lai/platon/pulsar/common/config/ImmutableConfig;", "encodingDetector", "Lai/platon/pulsar/common/EncodingDetector;", "linkParams", "Ljava/util/HashMap;", "Lai/platon/pulsar/crawl/parse/html/PrimerParser$LinkParams;", "Lkotlin/collections/HashMap;", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "tracer", "collectLinks", "", "Lai/platon/pulsar/persist/HyperlinkPersistable;", "base", "Ljava/net/URL;", "hyperlinks", "root", "Lorg/w3c/dom/Node;", "crawlFilters", "Lai/platon/pulsar/crawl/filter/CrawlFilters;", "detectEncoding", "", "page", "Lai/platon/pulsar/persist/WebPage;", "getBaseURLFromTag", "getLinksStep2", "getMetadata", "", "getMetadataFromMetaTag", "metadata", "", "metaNode", "getPageText", "sb", "Ljava/lang/StringBuilder;", "Lkotlin/text/StringBuilder;", "", "abortOnNestedAnchors", "getPageTitle", "getTextHelper", "anchorDepth_", "", "hasOnlyWhiteSpace", "initParseResult", "Lai/platon/pulsar/crawl/parse/ParseResult;", "metaTags", "Lai/platon/pulsar/crawl/parse/html/HTMLMetaTags;", "parseHTMLDocument", "Lai/platon/pulsar/crawl/parse/html/ParseContext;", "parseMetaTags", "document", "Lai/platon/pulsar/dom/FeaturedDocument;", "baseURL", "docRoot", "Lorg/w3c/dom/DocumentFragment;", "shouldThrowAwayLink", "children", "Lorg/w3c/dom/NodeList;", "childLen", "params", "LinkParams", "pulsar-skeleton"})
public final class PrimerParser {
    @NotNull
    private final ImmutableConfig conf;
    private final Logger logger;
    @Nullable
    private final Logger tracer;
    @NotNull
    private final String cachingPolicy;
    @NotNull
    private EncodingDetector encodingDetector;
    @NotNull
    private final HashMap<String, LinkParams> linkParams;

    /*
     * WARNING - void declaration
     */
    public PrimerParser(@NotNull ImmutableConfig conf) {
        void it;
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.conf = conf;
        Logger logger = this.logger = LoggerFactory.getLogger(Parser.class);
        boolean bl = false;
        boolean bl2 = false;
        Object object = logger;
        PrimerParser primerParser = this;
        boolean bl3 = false;
        boolean bl4 = it.isTraceEnabled();
        primerParser.tracer = bl4 ? logger : null;
        this.cachingPolicy = this.conf.get("parser.caching.forbidden.policy", "content");
        this.encodingDetector = new EncodingDetector(this.conf);
        this.linkParams = new HashMap();
        bl = false;
        ArrayList<String> forceTags = new ArrayList<String>();
        this.linkParams.clear();
        Map map2 = this.linkParams;
        String string = "a";
        object = new LinkParams("a", "href", 1);
        boolean bl5 = false;
        map2.put(string, object);
        map2 = this.linkParams;
        string = "area";
        object = new LinkParams("area", "href", 0);
        bl5 = false;
        map2.put(string, object);
        if (this.conf.getBoolean("parser.html.form.use_action", true)) {
            map2 = this.linkParams;
            string = "form";
            object = new LinkParams("form", "action", 1);
            bl5 = false;
            map2.put(string, object);
            if (this.conf.get("parser.html.form.use_action") != null) {
                forceTags.add("form");
            }
        }
        map2 = this.linkParams;
        string = "frame";
        object = new LinkParams("frame", "src", 0);
        bl5 = false;
        map2.put(string, object);
        map2 = this.linkParams;
        string = "iframe";
        object = new LinkParams("iframe", "src", 0);
        bl5 = false;
        map2.put(string, object);
        map2 = this.linkParams;
        string = "script";
        object = new LinkParams("script", "src", 0);
        bl5 = false;
        map2.put(string, object);
        map2 = this.linkParams;
        string = "link";
        object = new LinkParams("link", "href", 0);
        bl5 = false;
        map2.put(string, object);
        map2 = this.linkParams;
        string = "img";
        object = new LinkParams("img", "src", 0);
        bl5 = false;
        map2.put(string, object);
        String[] ignoreTags = this.conf.getStrings("parser.html.outlinks.ignore_tags");
        int i = 0;
        while (i < ignoreTags.length) {
            if (!forceTags.contains(ignoreTags[i])) {
                this.linkParams.remove(ignoreTags[i]);
            }
            int n = i;
            i = n + 1;
        }
    }

    @NotNull
    public final ImmutableConfig getConf() {
        return this.conf;
    }

    /*
     * Enabled aggressive block sorting
     */
    public final void detectEncoding(@NotNull WebPage page) {
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        String encoding = this.encodingDetector.sniffEncoding(page);
        if (encoding != null) {
            CharSequence charSequence = encoding;
            boolean bl = false;
            if (charSequence.length() > 0) {
                page.setEncoding(encoding);
                page.setEncodingClues(this.encodingDetector.getCluesAsString());
                return;
            }
        }
        this.logger.warn("Failed to detect encoding, url: " + page.getUrl());
    }

    @NotNull
    public final ParseContext parseHTMLDocument(@NotNull WebPage page) throws Exception {
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        Logger logger = this.tracer;
        if (logger != null) {
            Object[] objectArray = new Object[]{page.getId(), Strings.readableBytes((long)page.getContentLength()), page.getProtocolStatus(), page.getHtmlIntegrity(), page.getUrl()};
            logger.trace("{}.\tParsing page | {} | {} | {} | {}", objectArray);
        }
        if (page.getEncoding() == null) {
            this.detectEncoding(page);
        }
        JsoupParser jsoupParser = new JsoupParser(page, this.conf);
        jsoupParser.parse();
        return new ParseContext(page, jsoupParser.getDocument());
    }

    private final void parseMetaTags(WebPage page, FeaturedDocument document) {
        String string;
        String string2 = page.getBaseUrl();
        if (string2 == null) {
            String string3 = page.getUrl();
            Intrinsics.checkNotNullExpressionValue((Object)string3, (String)"page.url");
            string = string3;
        } else {
            string = string2;
        }
        String baseUrl = string;
        URL baseURL = new URL(baseUrl);
        DocumentFragment fragment = new W3CDom().fromJsoup(document.getDocument()).createDocumentFragment();
        Intrinsics.checkNotNullExpressionValue((Object)fragment, (String)"fragment");
        HTMLMetaTags metaTags = this.parseMetaTags(baseURL, fragment, page);
        ParseResult parseResult = this.initParseResult(metaTags);
    }

    @Deprecated(message="Just use jsoup to parse meta tags")
    private final HTMLMetaTags parseMetaTags(URL baseURL, DocumentFragment docRoot, WebPage page) {
        HTMLMetaTags metaTags = new HTMLMetaTags(docRoot, baseURL);
        MultiMetadata tags = metaTags.getGeneralTags();
        ai.platon.pulsar.persist.Metadata metadata = page.getMetadata();
        Set set = tags.names();
        Intrinsics.checkNotNullExpressionValue((Object)set, (String)"tags.names()");
        Iterable $this$forEach$iv = set;
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            String name = (String)element$iv;
            boolean bl = false;
            metadata.set("meta_" + name, tags.get(name));
        }
        if (metaTags.getNoCache()) {
            metadata.set("caching.forbidden", this.cachingPolicy);
        }
        return metaTags;
    }

    private final ParseResult initParseResult(HTMLMetaTags metaTags) {
        if (metaTags.getNoIndex()) {
            return new ParseResult(1, 101, null, 4, null);
        }
        ParseResult parseResult = new ParseResult(1, 0, null, 4, null);
        if (metaTags.getRefresh()) {
            parseResult.setMinorCode(100);
            Map map2 = parseResult.getArgs();
            Intrinsics.checkNotNullExpressionValue((Object)map2, (String)"parseResult.args");
            String string = "refreshHref";
            String string2 = String.valueOf(metaTags.getRefreshHref());
            boolean bl = false;
            map2.put(string, string2);
            map2 = parseResult.getArgs();
            Intrinsics.checkNotNullExpressionValue((Object)map2, (String)"parseResult.args");
            string = "refreshTime";
            string2 = String.valueOf(metaTags.getRefreshTime());
            bl = false;
            map2.put(string, string2);
        }
        return parseResult;
    }

    public final boolean getPageText(@NotNull StringBuilder sb, @NotNull Node root, boolean abortOnNestedAnchors) {
        Intrinsics.checkNotNullParameter((Object)sb, (String)"sb");
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        return this.getTextHelper(sb, root, abortOnNestedAnchors, 0);
    }

    public final void getPageText(@NotNull StringBuilder sb, @NotNull Node root) {
        Intrinsics.checkNotNullParameter((Object)sb, (String)"sb");
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        this.getPageText(sb, root, false);
    }

    @NotNull
    public final String getPageText(@NotNull Node root) {
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        StringBuilder sb = new StringBuilder();
        this.getPageText(sb, root, false);
        String string = sb.toString();
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"sb.toString()");
        return string;
    }

    @NotNull
    public final String getPageTitle(@NotNull Node root) {
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        StringBuilder sb = new StringBuilder();
        this.getPageTitle(sb, root);
        String string = sb.toString();
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"sb.toString()");
        return string;
    }

    private final boolean getPageTitle(StringBuilder sb, Node root) {
        NodeWalker walker = new NodeWalker(root);
        while (walker.hasNext()) {
            Node node = walker.nextNode();
            String name = node.getNodeName();
            short type = node.getNodeType();
            if (StringsKt.equals((String)"body", (String)name, (boolean)true)) {
                return false;
            }
            if (type != 1 || !StringsKt.equals((String)"title", (String)name, (boolean)true)) continue;
            Intrinsics.checkNotNullExpressionValue((Object)node, (String)"node");
            this.getPageText(sb, node);
            return true;
        }
        return false;
    }

    @NotNull
    public final Map<String, String> getMetadata(@NotNull Node root) {
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        LinkedHashMap linkedHashMap = Maps.newLinkedHashMap();
        Intrinsics.checkNotNullExpressionValue((Object)linkedHashMap, (String)"newLinkedHashMap()");
        Map metadata = linkedHashMap;
        StringBuilder sb = new StringBuilder();
        NodeWalker walker = new NodeWalker(root);
        while (walker.hasNext()) {
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            if (StringsKt.equals((String)"body", (String)nodeName, (boolean)true)) {
                return metadata;
            }
            if (nodeType != 1) continue;
            if (StringsKt.equals((String)"title", (String)nodeName, (boolean)true)) {
                sb.setLength(0);
                Intrinsics.checkNotNullExpressionValue((Object)currentNode, (String)"currentNode");
                this.getPageText(sb, currentNode);
                Map map2 = metadata;
                String string = "meta-title";
                String string2 = sb.toString();
                Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"sb.toString()");
                boolean bl = false;
                map2.put(string, string2);
                continue;
            }
            if (!StringsKt.equals((String)"meta", (String)nodeName, (boolean)true)) continue;
            this.getMetadataFromMetaTag(metadata, root);
        }
        return metadata;
    }

    private final void getMetadataFromMetaTag(Map<String, String> metadata, Node metaNode) {
        Object object = DomUtil.getAttribute((Node)metaNode, (String)"name");
        if (object == null) {
            return;
        }
        Object attrValue = object;
        if (StringsKt.equals((String)"keywords", (String)attrValue, (boolean)true)) {
            attrValue = DomUtil.getAttribute((Node)metaNode, (String)"content");
            if (attrValue != null) {
                object = metadata;
                String string = "meta-keywords";
                boolean bl = false;
                object.put((String)string, (String)attrValue);
            }
        } else if (StringsKt.equals((String)"description", (String)attrValue, (boolean)true) && (attrValue = DomUtil.getAttribute((Node)metaNode, (String)"content")) != null) {
            object = metadata;
            String string = "meta-description";
            boolean bl = false;
            object.put((String)string, (String)attrValue);
        }
    }

    @Nullable
    public final URL getBaseURLFromTag(@NotNull Node root) {
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        NodeWalker walker = new NodeWalker(root);
        while (walker.hasNext()) {
            NamedNodeMap attrs;
            int n;
            int n2;
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            if (nodeType != 1) continue;
            if (StringsKt.equals((String)"body", (String)nodeName, (boolean)true)) {
                return null;
            }
            if (!StringsKt.equals((String)"base", (String)nodeName, (boolean)true) || (n2 = 0) >= (n = (attrs = currentNode.getAttributes()).getLength())) continue;
            do {
                int i;
                Node attr;
                if (!StringsKt.equals((String)"href", (String)(attr = attrs.item(i = n2++)).getNodeName(), (boolean)true)) continue;
                try {
                    return new URL(attr.getNodeValue());
                }
                catch (MalformedURLException malformedURLException) {
                    // empty catch block
                }
            } while (n2 < n);
        }
        return null;
    }

    private final boolean getTextHelper(StringBuilder sb, Node root, boolean abortOnNestedAnchors, int anchorDepth_) {
        int anchorDepth = anchorDepth_;
        boolean abort = false;
        NodeWalker walker = new NodeWalker(root);
        while (walker.hasNext()) {
            int n;
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            if (StringsKt.equals((String)"script", (String)nodeName, (boolean)true)) {
                walker.skipChildren();
            }
            if (StringsKt.equals((String)"style", (String)nodeName, (boolean)true)) {
                walker.skipChildren();
            }
            if (abortOnNestedAnchors && StringsKt.equals((String)"a", (String)nodeName, (boolean)true) && (anchorDepth = (n = anchorDepth) + 1) > 1) {
                abort = true;
                break;
            }
            if (nodeType == 8) {
                walker.skipChildren();
            }
            if (nodeType != 3) continue;
            String text = currentNode.getNodeValue();
            CharSequence charSequence = text;
            Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"text");
            charSequence = charSequence;
            String string = "\\s+";
            boolean bl = false;
            string = new Regex(string);
            String string2 = " ";
            boolean bl2 = false;
            text = string.replace(charSequence, string2);
            charSequence = text;
            Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"text");
            CharSequence $this$trim$iv = charSequence;
            boolean $i$f$trim = false;
            CharSequence $this$trim$iv$iv = $this$trim$iv;
            boolean $i$f$trim2 = false;
            int startIndex$iv$iv = 0;
            int endIndex$iv$iv = $this$trim$iv$iv.length() - 1;
            boolean startFound$iv$iv = false;
            while (startIndex$iv$iv <= endIndex$iv$iv) {
                boolean match$iv$iv;
                int index$iv$iv = !startFound$iv$iv ? startIndex$iv$iv : endIndex$iv$iv;
                char it = $this$trim$iv$iv.charAt(index$iv$iv);
                boolean bl3 = false;
                boolean bl4 = match$iv$iv = Intrinsics.compare((int)it, (int)32) <= 0;
                if (!startFound$iv$iv) {
                    if (!match$iv$iv) {
                        startFound$iv$iv = true;
                        continue;
                    }
                    ++startIndex$iv$iv;
                    continue;
                }
                if (!match$iv$iv) break;
                --endIndex$iv$iv;
            }
            text = ((Object)$this$trim$iv$iv.subSequence(startIndex$iv$iv, endIndex$iv$iv + 1)).toString();
            charSequence = text;
            Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"text");
            charSequence = charSequence;
            boolean bl5 = false;
            if (!(charSequence.length() > 0)) continue;
            charSequence = sb;
            bl5 = false;
            if (charSequence.length() > 0) {
                sb.append(' ');
            }
            sb.append(text);
        }
        return abort;
    }

    private final boolean hasOnlyWhiteSpace(Node root) {
        String string = root.getNodeValue();
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"root.nodeValue");
        String string2 = string;
        int n = string2.length();
        for (int i = 0; i < n; ++i) {
            char element = string2.charAt(i);
            if (Character.isWhitespace(element)) continue;
            return false;
        }
        return true;
    }

    private final boolean shouldThrowAwayLink(Node root, NodeList children, int childLen, LinkParams params) {
        if (childLen == 0) {
            return params.getChildLen() != 0;
        }
        if (childLen == 1 && children.item(0).getNodeType() == 1 && StringsKt.equals((String)params.getElName(), (String)children.item(0).getNodeName(), (boolean)true)) {
            return true;
        }
        if (childLen == 2) {
            Node c0 = children.item(0);
            Node c1 = children.item(1);
            if (c0.getNodeType() == 1 && StringsKt.equals((String)params.getElName(), (String)c0.getNodeName(), (boolean)true) && c1.getNodeType() == 3) {
                Intrinsics.checkNotNullExpressionValue((Object)c1, (String)"c1");
                if (this.hasOnlyWhiteSpace(c1)) {
                    return true;
                }
            }
            if (c1.getNodeType() == 1 && StringsKt.equals((String)params.getElName(), (String)c1.getNodeName(), (boolean)true) && c0.getNodeType() == 3) {
                Intrinsics.checkNotNullExpressionValue((Object)c0, (String)"c0");
                if (this.hasOnlyWhiteSpace(c0)) {
                    return true;
                }
            }
        } else if (childLen == 3) {
            Node c0 = children.item(0);
            Node c1 = children.item(1);
            Node c2 = children.item(2);
            if (c1.getNodeType() == 1 && StringsKt.equals((String)params.getElName(), (String)c1.getNodeName(), (boolean)true) && c0.getNodeType() == 3 && c2.getNodeType() == 3) {
                Intrinsics.checkNotNullExpressionValue((Object)c0, (String)"c0");
                if (this.hasOnlyWhiteSpace(c0)) {
                    Intrinsics.checkNotNullExpressionValue((Object)c2, (String)"c2");
                    if (this.hasOnlyWhiteSpace(c2)) {
                        return true;
                    }
                }
            }
        }
        return false;
    }

    @NotNull
    public final Set<HyperlinkPersistable> collectLinks(@NotNull URL base, @NotNull Node root) {
        Intrinsics.checkNotNullParameter((Object)base, (String)"base");
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        return this.collectLinks(base, root, null);
    }

    @NotNull
    public final Set<HyperlinkPersistable> collectLinks(@NotNull URL base, @NotNull Node root, @Nullable CrawlFilters crawlFilters) {
        Intrinsics.checkNotNullParameter((Object)base, (String)"base");
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        boolean bl = false;
        return this.collectLinks(base, new LinkedHashSet(), root, crawlFilters);
    }

    @NotNull
    public final Set<HyperlinkPersistable> collectLinks(@NotNull URL base, @NotNull Set<HyperlinkPersistable> hyperlinks, @NotNull Node root, @Nullable CrawlFilters crawlFilters) {
        Intrinsics.checkNotNullParameter((Object)base, (String)"base");
        Intrinsics.checkNotNullParameter(hyperlinks, (String)"hyperlinks");
        Intrinsics.checkNotNullParameter((Object)root, (String)"root");
        NodeWalker walker = new NodeWalker(root);
        while (walker.hasNext()) {
            Node currentNode = walker.nextNode();
            if (crawlFilters == null || crawlFilters.isAllowed(currentNode)) {
                Intrinsics.checkNotNullExpressionValue((Object)currentNode, (String)"currentNode");
                this.getLinksStep2(base, hyperlinks, currentNode, crawlFilters);
                walker.skipChildren();
                continue;
            }
            this.logger.debug("Block disallowed, skip : " + DomUtil.getPrettyName((Node)currentNode));
        }
        return hyperlinks;
    }

    /*
     * WARNING - void declaration
     */
    private final void getLinksStep2(URL base, Set<HyperlinkPersistable> hyperlinks, Node root, CrawlFilters crawlFilters) {
        NodeWalker walker = new NodeWalker(root);
        while (walker.hasNext()) {
            int n;
            int childLen;
            Node currentNode = walker.nextNode();
            if (crawlFilters != null && crawlFilters.isDisallowed(currentNode)) {
                this.logger.debug("Block disallowed, skip : " + DomUtil.getPrettyName((Node)currentNode));
                walker.skipChildren();
                continue;
            }
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            NodeList children = currentNode.getChildNodes();
            Object object = children;
            int n2 = childLen = object == null ? 0 : (n = object.getLength());
            if (nodeType != 1) continue;
            object = nodeName;
            Intrinsics.checkNotNullExpressionValue((Object)object, (String)"nodeName");
            n = 0;
            String string = ((String)object).toLowerCase();
            Intrinsics.checkNotNullExpressionValue((Object)string, (String)"(this as java.lang.String).toLowerCase()");
            nodeName = string;
            LinkParams params = this.linkParams.get(nodeName);
            if (params == null) continue;
            Intrinsics.checkNotNullExpressionValue((Object)currentNode, (String)"currentNode");
            Intrinsics.checkNotNullExpressionValue((Object)children, (String)"children");
            if (!this.shouldThrowAwayLink(currentNode, children, childLen, params)) {
                StringBuilder linkText = new StringBuilder();
                this.getPageText(linkText, currentNode, true);
                NamedNodeMap attrs = currentNode.getAttributes();
                String target = null;
                boolean noFollow = false;
                boolean post = false;
                boolean allow = true;
                int n3 = 0;
                int n4 = attrs.getLength();
                if (n3 < n4) {
                    do {
                        int i = n3++;
                        Node attr = attrs.item(i);
                        String attrName = attr.getNodeName();
                        if (StringsKt.equals((String)params.getAttrName(), (String)attrName, (boolean)true)) {
                            target = attr.getNodeValue();
                            continue;
                        }
                        if (StringsKt.equals((String)"rel", (String)attrName, (boolean)true) && StringsKt.equals((String)"nofollow", (String)attr.getNodeValue(), (boolean)true)) {
                            noFollow = true;
                            continue;
                        }
                        if (StringsKt.equals((String)"rel", (String)attrName, (boolean)true) && StringsKt.equals((String)"qi-nofollow", (String)attr.getNodeValue(), (boolean)true)) {
                            allow = false;
                            continue;
                        }
                        if (!StringsKt.equals((String)"method", (String)attrName, (boolean)true) || !StringsKt.equals((String)"post", (String)attr.getNodeValue(), (boolean)true)) continue;
                        post = true;
                    } while (n3 < n4);
                }
                if (target != null && !noFollow && !post) {
                    try {
                        String string2;
                        void $this$trim$iv;
                        URL url = UrlUtils.resolveURL((URL)base, target);
                        String string3 = url.toString();
                        String string4 = linkText.toString();
                        Intrinsics.checkNotNullExpressionValue((Object)string4, (String)"linkText.toString()");
                        String string5 = string3;
                        Set<HyperlinkPersistable> set = hyperlinks;
                        boolean $i$f$trim = false;
                        CharSequence $this$trim$iv$iv = (CharSequence)$this$trim$iv;
                        boolean $i$f$trim2 = false;
                        int startIndex$iv$iv = 0;
                        int endIndex$iv$iv = $this$trim$iv$iv.length() - 1;
                        boolean startFound$iv$iv = false;
                        while (startIndex$iv$iv <= endIndex$iv$iv) {
                            boolean match$iv$iv;
                            int index$iv$iv = !startFound$iv$iv ? startIndex$iv$iv : endIndex$iv$iv;
                            char it = $this$trim$iv$iv.charAt(index$iv$iv);
                            boolean bl = false;
                            boolean bl2 = match$iv$iv = Intrinsics.compare((int)it, (int)32) <= 0;
                            if (!startFound$iv$iv) {
                                if (!match$iv$iv) {
                                    startFound$iv$iv = true;
                                    continue;
                                }
                                ++startIndex$iv$iv;
                                continue;
                            }
                            if (!match$iv$iv) break;
                            --endIndex$iv$iv;
                        }
                        String string6 = string2 = ((Object)$this$trim$iv$iv.subSequence(startIndex$iv$iv, endIndex$iv$iv + 1)).toString();
                        String string7 = string5;
                        set.add(new HyperlinkPersistable(string7, string6));
                    }
                    catch (MalformedURLException malformedURLException) {
                        // empty catch block
                    }
                }
            }
            if (params.getChildLen() != 0) continue;
        }
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u001a\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\r\b\u0002\u0018\u00002\u00020\u0001B\u001d\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0003\u0012\u0006\u0010\u0005\u001a\u00020\u0006\u00a2\u0006\u0002\u0010\u0007J\b\u0010\u0012\u001a\u00020\u0003H\u0016R\u001a\u0010\u0004\u001a\u00020\u0003X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\b\u0010\t\"\u0004\b\n\u0010\u000bR\u001a\u0010\u0005\u001a\u00020\u0006X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\f\u0010\r\"\u0004\b\u000e\u0010\u000fR\u001a\u0010\u0002\u001a\u00020\u0003X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0010\u0010\t\"\u0004\b\u0011\u0010\u000b\u00a8\u0006\u0013"}, d2={"Lai/platon/pulsar/crawl/parse/html/PrimerParser$LinkParams;", "", "elName", "", "attrName", "childLen", "", "(Ljava/lang/String;Ljava/lang/String;I)V", "getAttrName", "()Ljava/lang/String;", "setAttrName", "(Ljava/lang/String;)V", "getChildLen", "()I", "setChildLen", "(I)V", "getElName", "setElName", "toString", "pulsar-skeleton"})
    private static final class LinkParams {
        @NotNull
        private String elName;
        @NotNull
        private String attrName;
        private int childLen;

        public LinkParams(@NotNull String elName, @NotNull String attrName, int childLen) {
            Intrinsics.checkNotNullParameter((Object)elName, (String)"elName");
            Intrinsics.checkNotNullParameter((Object)attrName, (String)"attrName");
            this.elName = elName;
            this.attrName = attrName;
            this.childLen = childLen;
        }

        @NotNull
        public final String getElName() {
            return this.elName;
        }

        public final void setElName(@NotNull String string) {
            Intrinsics.checkNotNullParameter((Object)string, (String)"<set-?>");
            this.elName = string;
        }

        @NotNull
        public final String getAttrName() {
            return this.attrName;
        }

        public final void setAttrName(@NotNull String string) {
            Intrinsics.checkNotNullParameter((Object)string, (String)"<set-?>");
            this.attrName = string;
        }

        public final int getChildLen() {
            return this.childLen;
        }

        public final void setChildLen(int n) {
            this.childLen = n;
        }

        @NotNull
        public String toString() {
            return "LP[el=" + this.elName + ",attr=" + this.attrName + ",len=" + this.childLen + "]";
        }
    }
}

