package de.uni_mannheim.informatik.dws.melt.matching_ml.python;

import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.lib.Chars;
import org.apache.jena.sparql.sse.Tags;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_ml/python/StringUtil.class */
public class StringUtil {
    private static HashMap<String, List<String>> tokenMap = new HashMap<>();
    private static String myFormat = String.format("%s|%s|%s", "(?<=[\\p{Lu}])(?=[A-Z][a-z])", "(?<=[^A-Z])(?=[\\p{Lu}])", "(?<=[A-Za-z])(?=[^\\p{L}])");
    private static final Set<String> ENGLISH_STOPWORDS = new HashSet(Arrays.asList("a", "an", Tags.tagAnd, "are", "as", "at", "be", "but", "by", "for", Tags.tagIf, Tags.tagIn, "into", "is", "it", BooleanUtils.NO, Tags.tagNot, "of", BooleanUtils.ON, Tags.tagOr, "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", JsonPOJOBuilder.DEFAULT_WITH_PREFIX));

    public static List<String> tokenize(String str) {
        if (tokenMap.containsKey(str)) {
            return tokenMap.get(str);
        }
        LinkedList linkedList = new LinkedList();
        for (String str2 : str.replace(",", " ").replace(";", " ").replace(":", " ").replace("(", " ").replace(")", " ").replace("?", " ").replace("!", " ").replace(".", " ").replace("_", " ").replace("-", " ").replace(Chars.S_QUOTE2, " ").replace(StringUtils.CR, " ").replace("\n", " ").replace("\t", " ").replaceAll(myFormat, " ").split(" ")) {
            if (str2.length() != 0) {
                linkedList.add(str2.toLowerCase());
            }
        }
        tokenMap.put(str, linkedList);
        return linkedList;
    }

    public static String tokenizeToString(String str) {
        return String.join(" ", tokenize(str));
    }

    public static boolean containsMostlyNumbers(String str) {
        int i = 0;
        int i2 = 0;
        for (int i3 = 0; i3 < str.length(); i3++) {
            char charAt = str.charAt(i3);
            if (charAt >= '0' && charAt <= '9') {
                i++;
            }
            if (!Character.isWhitespace(charAt)) {
                i2++;
            }
        }
        return i >= i2 / 2;
    }

    public static String getProcessedString(String str) {
        return containsMostlyNumbers(str) ? "" : String.join(" ", removeStopwords(tokenize(str)));
    }

    public static List<String> getTokensWithoutStopword(String str) {
        return removeStopwords(tokenize(str));
    }

    public static List<String> removeStopwords(List<String> list) {
        return removeStopwords(list, ENGLISH_STOPWORDS);
    }

    public static List<String> removeStopwords(List<String> list, Set<String> set) {
        LinkedList linkedList = new LinkedList();
        for (String str : list) {
            if (!set.contains(str)) {
                linkedList.add(str);
            }
        }
        return linkedList;
    }

    public static int editDistance(String str, String str2, boolean z) {
        String str3;
        String str4;
        if (z) {
            if (str == null) {
                str = "";
            }
            if (str2 == null) {
                str2 = "";
            }
            str3 = str.toLowerCase();
            str4 = str2.toLowerCase();
        } else {
            str3 = str;
            str4 = str2;
        }
        int[][] iArr = new int[str3.length() + 1][str4.length() + 1];
        for (int i = 1; i < str3.length() + 1; i++) {
            iArr[i][0] = i;
        }
        for (int i2 = 1; i2 < str4.length() + 1; i2++) {
            iArr[0][i2] = i2;
        }
        for (int i3 = 1; i3 < str3.length() + 1; i3++) {
            for (int i4 = 1; i4 < str4.length() + 1; i4++) {
                if (str3.charAt(i3 - 1) == str4.charAt(i4 - 1)) {
                    iArr[i3][i4] = iArr[i3 - 1][i4 - 1];
                } else {
                    iArr[i3][i4] = iArr[i3 - 1][i4] + 1;
                    if (iArr[i3][i4] > iArr[i3][i4 - 1] + 1) {
                        iArr[i3][i4] = iArr[i3][i4 - 1] + 1;
                    }
                    if (iArr[i3][i4] > iArr[i3 - 1][i4 - 1] + 1) {
                        iArr[i3][i4] = iArr[i3 - 1][i4 - 1] + 1;
                    }
                }
            }
        }
        return iArr[str3.length()][str4.length()];
    }

    public static double editDistanceNormalised(String str, String str2) {
        return getNormalised(editDistance(str, str2, true), getMaxLength(str, str2));
    }

    public static boolean isSuffix(String str, String str2) {
        return str2.endsWith(str);
    }

    public static boolean isPrefix(String str, String str2) {
        return str2.startsWith(str);
    }

    public static int damerauLevenshtein(String str, String str2) {
        int length = str.length() + str2.length();
        int length2 = str.length() > str2.length() ? str.length() : str2.length();
        int[][] iArr = new int[str.length() + 1][str2.length() + 1];
        for (int i = 0; i < str.length(); i++) {
            iArr[i + 1][1] = i;
            iArr[i + 1][0] = length;
        }
        for (int i2 = 0; i2 < str2.length(); i2++) {
            iArr[1][i2 + 1] = i2;
            iArr[0][i2 + 1] = length;
        }
        int[] iArr2 = new int[length2];
        for (int i3 = 0; i3 < length2; i3++) {
            iArr2[i3] = 0;
        }
        for (int i4 = 1; i4 < str.length(); i4++) {
            int i5 = 0;
            for (int i6 = 1; i6 < str2.length(); i6++) {
                int i7 = iArr2[str2.indexOf(str2.charAt(i6 - 1))];
                int i8 = i5;
                int i9 = str.charAt(i4 - 1) == str2.charAt(i6 - 1) ? 0 : 1;
                if (i9 == 0) {
                    i5 = i6;
                }
                iArr[i4 + 1][i6 + 1] = Math.min(Math.min(iArr[i4][i6] + i9, iArr[i4 + 1][i6] + 1), Math.min(iArr[i4][i6 + 1] + 1, iArr[i7][i8] + ((i4 - i7) - 1) + 1 + ((i6 - i8) - 1)));
            }
            iArr2[str.indexOf(str.charAt(i4 - 1))] = i4;
        }
        return iArr[str.length()][str2.length()];
    }

    public static double damerauLevenshteinNormalised(String str, String str2) {
        return getNormalised(damerauLevenshtein(str, str2), getMaxLength(str, str2));
    }

    public static String exactLength(String str, int i) {
        String concat;
        if (str.length() > i) {
            concat = str.substring(0, i);
        } else {
            int length = i - str.length();
            String str2 = "";
            for (int i2 = 0; i2 < length; i2++) {
                str2 = str2.concat(" ");
            }
            concat = str.concat(str2);
        }
        return concat;
    }

    private static double getNormalised(double d, double d2) {
        return 1.0d - (d / d2);
    }

    private static double getMaxLength(String str, String str2) {
        return str.length() > str2.length() ? str.length() : str2.length();
    }
}
