package edu.emory.clir.clearnlp.experiment;

import edu.emory.clir.clearnlp.collection.map.ObjectIntHashMap;
import edu.emory.clir.clearnlp.collection.pair.ObjectIntPair;
import edu.emory.clir.clearnlp.util.BinUtils;
import edu.emory.clir.clearnlp.util.CharTokenizer;
import edu.emory.clir.clearnlp.util.DSUtils;
import edu.emory.clir.clearnlp.util.FileUtils;
import edu.emory.clir.clearnlp.util.Joiner;
import edu.emory.clir.clearnlp.util.Splitter;
import edu.emory.clir.clearnlp.util.constant.StringConst;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.kohsuke.args4j.Option;

/* loaded from: input_file:edu/emory/clir/clearnlp/experiment/Kaist2CoNLL.class */
public class Kaist2CoNLL {

    @Option(name = "-i", usage = "input path (required)", required = true, metaVar = "<filepath>")
    private String s_inputPath;

    @Option(name = "-oe", usage = "output file extension (required)", required = true, metaVar = "<string>")
    private String s_outputExt;

    @Option(name = "-ie", usage = "input file extension (default: .*)", required = false, metaVar = "<regex>")
    private String s_inputExt = ".*";

    @Option(name = "-ir", usage = "if set, process all files under the input path recursively.", required = false, metaVar = "<boolean>")
    private boolean b_recursive = false;

    @Option(name = "-src", usage = "encoding of source files (default: euc-kr)", required = false, metaVar = "<string>")
    private String s_source = "euc-kr";

    @Option(name = "-trg", usage = "encoding of target files (default: utf8)", required = false, metaVar = "<string>")
    private String s_target = "utf8";
    private final String S_REPL = "_P_";
    private final String S_SLASH = "\\//sp";
    private final CharTokenizer T_PLUS = new CharTokenizer('+');
    private final Pattern P_PLUS = Pattern.compile("\\\\\\+");
    private final Pattern P_REPL = Pattern.compile("_P_");

    public Kaist2CoNLL(String[] strArr) {
        BinUtils.initArgs(strArr, this);
        try {
            encode(this.s_inputPath, this.s_inputExt, this.s_outputExt, this.b_recursive, this.s_source, this.s_target);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void encode(String str, String str2, String str3, boolean z, String str4, String str5) throws IOException {
        ObjectIntHashMap<String> objectIntHashMap = new ObjectIntHashMap<>();
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (String str6 : FileUtils.getFileList(str, str2, z)) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str6), str4));
            StringBuilder sb = new StringBuilder();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (trim.isEmpty()) {
                    String coNLL = toCoNLL(arrayList, objectIntHashMap);
                    if (coNLL != null) {
                        sb.append(coNLL);
                        sb.append(StringConst.NEW_LINE);
                        i += arrayList.size();
                    }
                    arrayList = new ArrayList();
                } else {
                    arrayList.add(trim);
                }
            }
            bufferedReader.close();
            if (sb.length() > 0) {
                PrintStream printStream = new PrintStream((OutputStream) new BufferedOutputStream(new FileOutputStream(str6 + "." + str3), 65536), false, str5);
                printStream.println(sb.toString());
                printStream.close();
            } else {
                System.err.println("Empty file: " + str6);
            }
        }
        List<ObjectIntPair<String>> list = objectIntHashMap.toList();
        DSUtils.sortReverseOrder(list);
        System.out.println("WC: " + i);
        for (ObjectIntPair<String> objectIntPair : list) {
            System.out.println(objectIntPair.o + " " + objectIntPair.i);
        }
    }

    private String toCoNLL(List<String> list, ObjectIntHashMap<String> objectIntHashMap) {
        if (list.isEmpty()) {
            return null;
        }
        ObjectIntHashMap<String> objectIntHashMap2 = new ObjectIntHashMap<>();
        StringBuilder sb = new StringBuilder();
        int size = list.size();
        for (int i = 0; i < size; i++) {
            String coNLL = toCoNLL(objectIntHashMap2, list.get(i), i + 1);
            if (coNLL == null) {
                return null;
            }
            sb.append(coNLL);
            sb.append(StringConst.NEW_LINE);
        }
        Iterator<ObjectIntPair<String>> it = objectIntHashMap2.iterator();
        while (it.hasNext()) {
            ObjectIntPair<String> next = it.next();
            objectIntHashMap.add(next.o, next.i);
        }
        return sb.toString();
    }

    private String toCoNLL(ObjectIntHashMap<String> objectIntHashMap, String str, int i) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        String[] splitTabs = Splitter.splitTabs(str);
        if (splitTabs.length < 2) {
            if (!str.equals("\\//sp")) {
                System.err.println("Incomplete: " + i + " " + str);
                return null;
            }
            splitTabs = new String[]{StringConst.FW_SLASH, "\\//sp"};
        }
        String str2 = splitTabs[0];
        for (String str3 : this.T_PLUS.tokenize(this.P_PLUS.matcher(splitTabs[1]).replaceAll("_P_"))) {
            int lastIndexOf = str3.lastIndexOf(47);
            if (lastIndexOf <= 0 || lastIndexOf + 1 >= str3.length()) {
                return null;
            }
            String replaceAll = this.P_REPL.matcher(str3.substring(0, lastIndexOf)).replaceAll("\\" + StringConst.PLUS);
            String substring = str3.substring(lastIndexOf + 1);
            if (substring.equals("eff")) {
                substring = "ef";
            }
            arrayList.add(replaceAll);
            arrayList2.add(substring);
            objectIntHashMap.add(substring);
        }
        if (arrayList.isEmpty()) {
            System.err.println("Empty: " + i + " " + str);
            return null;
        }
        return i + StringConst.TAB + str2 + StringConst.TAB + Joiner.join(arrayList, StringConst.PLUS) + StringConst.TAB + Joiner.join(arrayList2, StringConst.PLUS) + StringConst.TAB + "_\t_\t_\t_\t_";
    }

    public static void main(String[] strArr) {
        new Kaist2CoNLL(strArr);
    }
}
