package com.genesys.roberta.tokenizer.logic;

import com.genesys.roberta.tokenizer.resources.RobertaTokenizerResources;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import lombok.NonNull;

/* loaded from: input_file:com/genesys/roberta/tokenizer/logic/BytePairEncoder.class */
public class BytePairEncoder {
    public List<String> encode(@NonNull String str, @NonNull RobertaTokenizerResources robertaTokenizerResources) {
        if (str == null) {
            throw new NullPointerException("word is marked non-null but is null");
        }
        if (robertaTokenizerResources == null) {
            throw new NullPointerException("robertaTokenizerRobertaResources is marked non-null but is null");
        }
        List<String> list = (List) str.chars().mapToObj(Character::toString).collect(Collectors.toList());
        Set<BiGram> biGrams = getBiGrams(list);
        while (true) {
            long j = 2147483647L;
            BiGram biGram = null;
            for (BiGram biGram2 : biGrams) {
                long intValue = robertaTokenizerResources.getRankOrDefault(biGram2, Integer.MAX_VALUE).intValue();
                if (intValue < j) {
                    j = intValue;
                    biGram = biGram2;
                }
            }
            if (biGram == null) {
                break;
            }
            String left = biGram.getLeft();
            String right = biGram.getRight();
            ArrayList arrayList = new ArrayList();
            int i = 0;
            while (true) {
                if (i >= list.size()) {
                    break;
                }
                int indexWithStartPosition = getIndexWithStartPosition(list, left, i);
                if (indexWithStartPosition == -1) {
                    arrayList.addAll(list.subList(i, list.size()));
                    break;
                }
                arrayList.addAll(list.subList(i, indexWithStartPosition));
                if (list.get(indexWithStartPosition).equals(left) && indexWithStartPosition < list.size() - 1 && list.get(indexWithStartPosition + 1).equals(right)) {
                    arrayList.add(left + right);
                    i = indexWithStartPosition + 2;
                } else {
                    arrayList.add(list.get(indexWithStartPosition));
                    i = indexWithStartPosition + 1;
                }
            }
            list = arrayList;
            if (list.size() == 1) {
                break;
            }
            biGrams = getBiGrams(list);
        }
        return list;
    }

    private Set<BiGram> getBiGrams(@NonNull List<String> list) {
        if (list == null) {
            throw new NullPointerException("wordStrChars is marked non-null but is null");
        }
        return (Set) IntStream.range(0, list.size() - 1).mapToObj(i -> {
            return BiGram.of((String) list.get(i), (String) list.get(i + 1));
        }).collect(Collectors.toSet());
    }

    private int getIndexWithStartPosition(@NonNull List<String> list, @NonNull String str, int i) {
        if (list == null) {
            throw new NullPointerException("wordCharsList is marked non-null but is null");
        }
        if (str == null) {
            throw new NullPointerException("word is marked non-null but is null");
        }
        return IntStream.range(i, list.size()).filter(i2 -> {
            return ((String) list.get(i2)).equals(str);
        }).findFirst().orElse(-1);
    }
}
