package nl.basjes.parse.httpdlog.dissectors;

import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nl.basjes.parse.core.Casts;
import nl.basjes.parse.core.Dissector;
import nl.basjes.parse.core.Parsable;
import nl.basjes.parse.core.ParsedField;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import nl.basjes.parse.httpdlog.Utils;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.http.cookie.ClientCookie;

/* loaded from: input_file:nl/basjes/parse/httpdlog/dissectors/HttpUriDissector.class */
public class HttpUriDissector extends Dissector {
    private static final String INPUT_TYPE = "HTTP.URI";
    private boolean wantProtocol = false;
    private boolean wantUserinfo = false;
    private boolean wantHost = false;
    private boolean wantPort = false;
    private boolean wantPath = false;
    private boolean wantQuery = false;
    private boolean wantRef = false;
    private static final BitSet BAD_URI_CHARS = new BitSet(256);
    private static final Pattern BAD_EXCAPE_PATTERN;
    private static final Pattern EQUALS_HASH_PATTERN;
    private static final Pattern HASH_AMP_PATTERN;
    private static final Pattern DOUBLE_HASH_PATTERN;
    private static final Pattern ALMOST_HTML_ENCODED;

    @Override // nl.basjes.parse.core.Dissector
    public String getInputType() {
        return INPUT_TYPE;
    }

    @Override // nl.basjes.parse.core.Dissector
    public List<String> getPossibleOutput() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("HTTP.PROTOCOL:protocol");
        arrayList.add("HTTP.USERINFO:userinfo");
        arrayList.add("HTTP.HOST:host");
        arrayList.add("HTTP.PORT:port");
        arrayList.add("HTTP.PATH:path");
        arrayList.add("HTTP.QUERYSTRING:query");
        arrayList.add("HTTP.REF:ref");
        return arrayList;
    }

    @Override // nl.basjes.parse.core.Dissector
    public EnumSet<Casts> prepareForDissect(String str, String str2) {
        String extractFieldName = extractFieldName(str, str2);
        if ("protocol".equals(extractFieldName)) {
            this.wantProtocol = true;
            return Casts.STRING_ONLY;
        }
        if ("userinfo".equals(extractFieldName)) {
            this.wantUserinfo = true;
            return Casts.STRING_ONLY;
        }
        if ("host".equals(extractFieldName)) {
            this.wantHost = true;
            return Casts.STRING_ONLY;
        }
        if (ClientCookie.PORT_ATTR.equals(extractFieldName)) {
            this.wantPort = true;
            return Casts.STRING_OR_LONG;
        }
        if (ClientCookie.PATH_ATTR.equals(extractFieldName)) {
            this.wantPath = true;
            return Casts.STRING_ONLY;
        }
        if ("query".equals(extractFieldName)) {
            this.wantQuery = true;
            return Casts.STRING_ONLY;
        }
        if (!"ref".equals(extractFieldName)) {
            return Casts.NO_CASTS;
        }
        this.wantRef = true;
        return Casts.STRING_ONLY;
    }

    @Override // nl.basjes.parse.core.Dissector
    public void dissect(Parsable<?> parsable, String str) throws DissectionFailure {
        String str2;
        URI create;
        String fragment;
        String path;
        String rawQuery;
        int port;
        String host;
        String userInfo;
        String scheme;
        ParsedField parsableField = parsable.getParsableField(INPUT_TYPE, str);
        String string = parsableField.getValue().getString();
        if (string == null || string.isEmpty()) {
            return;
        }
        String makeHTMLEncodedInert = Utils.makeHTMLEncodedInert(new String(URLCodec.encodeUrl(BAD_URI_CHARS, string.getBytes(StandardCharsets.UTF_8)), StandardCharsets.US_ASCII));
        int indexOf = makeHTMLEncodedInert.indexOf(63);
        if (makeHTMLEncodedInert.indexOf(38) != -1 || indexOf != -1) {
            makeHTMLEncodedInert = makeHTMLEncodedInert.replaceAll("\\?", "&").replaceFirst("&", "?&");
        }
        String replaceAll = HASH_AMP_PATTERN.matcher(EQUALS_HASH_PATTERN.matcher(StringEscapeUtils.unescapeHtml4(ALMOST_HTML_ENCODED.matcher(BAD_EXCAPE_PATTERN.matcher(BAD_EXCAPE_PATTERN.matcher(makeHTMLEncodedInert).replaceAll("%25$1")).replaceAll("%25$1")).replaceAll("$1&$2"))).replaceAll("=")).replaceAll("&");
        while (true) {
            str2 = replaceAll;
            Matcher matcher = DOUBLE_HASH_PATTERN.matcher(str2);
            if (!matcher.find()) {
                break;
            } else {
                replaceAll = matcher.replaceAll("~$1#");
            }
        }
        boolean z = true;
        try {
            if (str2.charAt(0) == '/') {
                create = URI.create("dummy-protocol://dummy.host.name" + str2);
                z = false;
            } else {
                create = URI.create(str2);
            }
            if (this.wantQuery || this.wantPath || this.wantRef) {
                if (this.wantQuery && (rawQuery = create.getRawQuery()) != null && !rawQuery.isEmpty()) {
                    parsable.addDissection(str, "HTTP.QUERYSTRING", "query", rawQuery);
                }
                if (this.wantPath && (path = create.getPath()) != null && !path.isEmpty()) {
                    parsable.addDissection(str, "HTTP.PATH", ClientCookie.PATH_ATTR, path);
                }
                if (this.wantRef && (fragment = create.getFragment()) != null && !fragment.isEmpty()) {
                    parsable.addDissection(str, "HTTP.REF", "ref", fragment);
                }
            }
            if (z) {
                if (this.wantProtocol && (scheme = create.getScheme()) != null && !scheme.isEmpty()) {
                    parsable.addDissection(str, "HTTP.PROTOCOL", "protocol", scheme);
                }
                if (this.wantUserinfo && (userInfo = create.getUserInfo()) != null && !userInfo.isEmpty()) {
                    parsable.addDissection(str, "HTTP.USERINFO", "userinfo", userInfo);
                }
                if (this.wantHost && (host = create.getHost()) != null && !host.isEmpty()) {
                    parsable.addDissection(str, "HTTP.HOST", "host", host);
                }
                if (!this.wantPort || (port = create.getPort()) == -1) {
                    return;
                }
                parsable.addDissection(str, "HTTP.PORT", ClientCookie.PORT_ATTR, port);
            }
        } catch (IllegalArgumentException e) {
            throw new DissectionFailure("Failed to parse URI >>" + parsableField.getValue().getString() + "<< because of : " + e.getMessage());
        }
    }

    static {
        BAD_URI_CHARS.set(0, 255);
        BAD_URI_CHARS.clear(123);
        BAD_URI_CHARS.clear(125);
        BAD_URI_CHARS.clear(124);
        BAD_URI_CHARS.clear(92);
        BAD_URI_CHARS.clear(94);
        BAD_URI_CHARS.clear(91);
        BAD_URI_CHARS.clear(93);
        BAD_URI_CHARS.clear(96);
        BAD_URI_CHARS.clear(32);
        BAD_URI_CHARS.clear(0, 31);
        BAD_URI_CHARS.clear(127);
        BAD_URI_CHARS.clear(60);
        BAD_URI_CHARS.clear(62);
        BAD_URI_CHARS.clear(34);
        BAD_EXCAPE_PATTERN = Pattern.compile("%([^0-9a-fA-F]|[0-9a-fA-F][^0-9a-fA-F]|.$|$)");
        EQUALS_HASH_PATTERN = Pattern.compile("=#");
        HASH_AMP_PATTERN = Pattern.compile("#&");
        DOUBLE_HASH_PATTERN = Pattern.compile("#(.*)#");
        ALMOST_HTML_ENCODED = Pattern.compile("([^&])(#x[0-9a-fA-F][0-9a-fA-F];)");
    }
}
