/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.crawl.signature;

import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.crawl.signature.MD5Signature;
import ai.platon.pulsar.crawl.signature.Signature;
import ai.platon.pulsar.persist.WebPage;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import org.apache.hadoop.io.MD5Hash;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u00002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\b\n\u0000\n\u0002\u0010\u0007\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0012\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\u0018\u0000 \u000f2\u00020\u0001:\u0003\u000f\u0010\u0011B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0010\u0010\u000b\u001a\u00020\f2\u0006\u0010\r\u001a\u00020\u000eH\u0016R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082\u000e\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u000e\u00a2\u0006\u0002\n\u0000R\u000e\u0010\t\u001a\u00020\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0012"}, d2={"Lai/platon/pulsar/crawl/signature/TextProfileSignature;", "Lai/platon/pulsar/crawl/signature/Signature;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/common/config/ImmutableConfig;)V", "MIN_TOKEN_LEN", "", "QUANT_RATE", "", "fallback", "Lai/platon/pulsar/crawl/signature/MD5Signature;", "calculate", "", "page", "Lai/platon/pulsar/persist/WebPage;", "Companion", "Token", "TokenComparator", "pulsar-skeleton"})
public final class TextProfileSignature
extends Signature {
    @NotNull
    public static final Companion Companion = new Companion(null);
    private int MIN_TOKEN_LEN;
    private float QUANT_RATE;
    @NotNull
    private final MD5Signature fallback;
    private static int GOOD_CONTENT_TEXT_LENGTH = 2000;

    public TextProfileSignature(@NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.MIN_TOKEN_LEN = 2;
        this.QUANT_RATE = 0.01f;
        this.fallback = new MD5Signature();
        this.MIN_TOKEN_LEN = conf.getInt("db.signature.text_profile.min_token_len", 2);
        this.QUANT_RATE = conf.getFloat("db.signature.text_profile.quant_rate", 0.01f);
    }

    @Override
    @NotNull
    public byte[] calculate(@NotNull WebPage page) {
        Object s2;
        String string;
        int c;
        Intrinsics.checkNotNullParameter((Object)page, (String)"page");
        HashMap tokens = new HashMap();
        Object object = page.getContentText();
        Intrinsics.checkNotNullExpressionValue((Object)object, (String)"page.contentText");
        CharSequence text = object;
        object = text;
        boolean bl = false;
        if (object.length() == 0 || ((String)text).length() < GOOD_CONTENT_TEXT_LENGTH) {
            object = page.getPageText();
            Intrinsics.checkNotNullExpressionValue((Object)object, (String)"page.pageText");
            text = object;
        }
        object = text;
        bl = false;
        if (object.length() == 0) {
            object = this.fallback.calculate(page);
            Intrinsics.checkNotNullExpressionValue((Object)object, (String)"fallback.calculate(page)");
            return object;
        }
        StringBuilder curToken = new StringBuilder();
        int maxFreq = 0;
        int n = 0;
        int n2 = ((String)text).length();
        if (n < n2) {
            do {
                int i;
                if (Character.isLetterOrDigit((char)(c = ((String)text).charAt(i = n++)))) {
                    curToken.append(Character.toLowerCase((char)c));
                    continue;
                }
                if (curToken.length() <= 0) continue;
                if (curToken.length() > this.MIN_TOKEN_LEN) {
                    int n3;
                    Object object2;
                    string = curToken.toString();
                    Intrinsics.checkNotNullExpressionValue((Object)string, (String)"curToken.toString()");
                    s2 = string;
                    Token tok = (Token)tokens.get(s2);
                    if (tok == null) {
                        tok = new Token(0, (String)s2);
                        object2 = tokens;
                        n3 = 0;
                        object2.put(s2, tok);
                    }
                    object2 = tok;
                    n3 = ((Token)object2).getCnt();
                    ((Token)object2).setCnt(n3 + 1);
                    if (tok.getCnt() > maxFreq) {
                        maxFreq = tok.getCnt();
                    }
                }
                curToken.setLength(0);
            } while (n < n2);
        }
        if (curToken.length() > this.MIN_TOKEN_LEN) {
            String string2 = curToken.toString();
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"curToken.toString()");
            String s3 = string2;
            Token tok = (Token)tokens.get(s3);
            if (tok == null) {
                tok = new Token(0, s3);
                Map i = tokens;
                c = 0;
                i.put(s3, tok);
            }
            Token i = tok;
            c = i.getCnt();
            i.setCnt(c + 1);
            if (tok.getCnt() > maxFreq) {
                maxFreq = tok.getCnt();
            }
        }
        Object it = tokens.values().iterator();
        ArrayList<Token> profile = new ArrayList<Token>();
        int QUANT = Math.round((float)maxFreq * this.QUANT_RATE);
        if (QUANT < 2) {
            QUANT = maxFreq > 1 ? 2 : 1;
        }
        while (it.hasNext()) {
            s2 = it.next();
            Intrinsics.checkNotNullExpressionValue((Object)s2, (String)"it.next()");
            Token t = (Token)s2;
            t.setCnt(t.getCnt() / QUANT * QUANT);
            if (t.getCnt() < QUANT) continue;
            profile.add(t);
        }
        CollectionsKt.sortWith((List)profile, (Comparator)new TokenComparator());
        StringBuilder newText = new StringBuilder();
        s2 = profile.iterator();
        Intrinsics.checkNotNullExpressionValue((Object)s2, (String)"profile.iterator()");
        it = s2;
        while (it.hasNext()) {
            string = it.next();
            Intrinsics.checkNotNullExpressionValue((Object)string, (String)"it.next()");
            Token t = (Token)((Object)string);
            if (newText.length() > 0) {
                newText.append("\n");
            }
            newText.append(t.toString());
        }
        byte[] byArray = MD5Hash.digest((String)newText.toString()).getDigest();
        Intrinsics.checkNotNullExpressionValue((Object)byArray, (String)"digest(newText.toString()).digest");
        return byArray;
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u0018\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010\b\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u000b\b\u0002\u0018\u00002\u00020\u0001B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0002\u0010\u0006J\b\u0010\u000f\u001a\u00020\u0005H\u0016R\u001a\u0010\u0002\u001a\u00020\u0003X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0007\u0010\b\"\u0004\b\t\u0010\nR\u001a\u0010\u0004\u001a\u00020\u0005X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u000b\u0010\f\"\u0004\b\r\u0010\u000e\u00a8\u0006\u0010"}, d2={"Lai/platon/pulsar/crawl/signature/TextProfileSignature$Token;", "", "cnt", "", "val_", "", "(ILjava/lang/String;)V", "getCnt", "()I", "setCnt", "(I)V", "getVal_", "()Ljava/lang/String;", "setVal_", "(Ljava/lang/String;)V", "toString", "pulsar-skeleton"})
    private static final class Token {
        private int cnt;
        @NotNull
        private String val_;

        public Token(int cnt, @NotNull String val_) {
            Intrinsics.checkNotNullParameter((Object)val_, (String)"val_");
            this.cnt = cnt;
            this.val_ = val_;
        }

        public final int getCnt() {
            return this.cnt;
        }

        public final void setCnt(int n) {
            this.cnt = n;
        }

        @NotNull
        public final String getVal_() {
            return this.val_;
        }

        public final void setVal_(@NotNull String string) {
            Intrinsics.checkNotNullParameter((Object)string, (String)"<set-?>");
            this.val_ = string;
        }

        @NotNull
        public String toString() {
            return this.val_ + " " + this.cnt;
        }
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u0018\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\u0003\b\u0002\u0018\u00002\b\u0012\u0004\u0012\u00020\u00020\u0001B\u0005\u00a2\u0006\u0002\u0010\u0003J\u0018\u0010\u0004\u001a\u00020\u00052\u0006\u0010\u0006\u001a\u00020\u00022\u0006\u0010\u0007\u001a\u00020\u0002H\u0016\u00a8\u0006\b"}, d2={"Lai/platon/pulsar/crawl/signature/TextProfileSignature$TokenComparator;", "Ljava/util/Comparator;", "Lai/platon/pulsar/crawl/signature/TextProfileSignature$Token;", "()V", "compare", "", "t1", "t2", "pulsar-skeleton"})
    private static final class TokenComparator
    implements Comparator<Token> {
        @Override
        public int compare(@NotNull Token t1, @NotNull Token t2) {
            Intrinsics.checkNotNullParameter((Object)t1, (String)"t1");
            Intrinsics.checkNotNullParameter((Object)t2, (String)"t2");
            return t2.getCnt() - t1.getCnt();
        }
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u0014\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\u0005\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u001a\u0010\u0003\u001a\u00020\u0004X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0005\u0010\u0006\"\u0004\b\u0007\u0010\b\u00a8\u0006\t"}, d2={"Lai/platon/pulsar/crawl/signature/TextProfileSignature$Companion;", "", "()V", "GOOD_CONTENT_TEXT_LENGTH", "", "getGOOD_CONTENT_TEXT_LENGTH", "()I", "setGOOD_CONTENT_TEXT_LENGTH", "(I)V", "pulsar-skeleton"})
    public static final class Companion {
        private Companion() {
        }

        public final int getGOOD_CONTENT_TEXT_LENGTH() {
            return GOOD_CONTENT_TEXT_LENGTH;
        }

        public final void setGOOD_CONTENT_TEXT_LENGTH(int n) {
            GOOD_CONTENT_TEXT_LENGTH = n;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

