package umcg.genetica.io.trityper.converters;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Pattern;
import net.sf.picard.sam.AbstractAlignmentMerger;
import org.broadinstitute.variant.vcf.VCFHeader;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.WGAFileMatrixGenotype;
import umcg.genetica.io.trityper.util.BaseAnnot;
import umcg.genetica.io.trityper.util.ChrAnnotation;
import umcg.genetica.text.Strings;

/* loaded from: input_file:umcg/genetica/io/trityper/converters/VCFToTriTyper.class */
public class VCFToTriTyper {
    Integer[] colToIndId = null;
    private HashMap<String, Integer> individualMap;
    private ArrayList<String> individuals;
    private HashMap<String, Integer> snpMap;
    private HashMap<String, Byte> snpChrMap;
    private HashMap<String, Integer> snpChrPosMap;
    private ArrayList<String> snpList;
    private int multiallelicSNPsExcluded;
    private int snpctr;
    private int finalNrInds;
    private Pattern snppattern;
    private static final Pattern zero = Pattern.compile("0");
    private static final Pattern one = Pattern.compile("1");

    public void parse(String str, String str2) throws IOException {
        parse(str, str2, null);
    }

    public void parse(String str, String str2, String str3) throws IOException {
        if (str3 == null) {
            this.snppattern = null;
            System.out.println("All variants are assumed to be SNPs");
        } else {
            this.snppattern = Pattern.compile(str3);
            System.out.println("Variants are filtered for SNPs using this pattern on the INFO column: " + str3);
        }
        if (!Gpio.exists(str)) {
            throw new IOException("Error: could not find dir: " + str);
        }
        if (!str2.endsWith("/")) {
            str2 = str2 + "/";
        }
        Gpio.createDir(str2);
        String[] listOfFiles = Gpio.getListOfFiles(str);
        ArrayList arrayList = new ArrayList();
        for (String str4 : listOfFiles) {
            if (str4.endsWith(".vcf") || str4.endsWith(".vcf.gz")) {
                arrayList.add(str4);
            }
        }
        String[] strArr = (String[]) arrayList.toArray(new String[0]);
        System.out.println("Found " + strArr.length + " vcf files");
        if (strArr.length == 0) {
            System.exit(0);
        }
        for (String str5 : strArr) {
            parseFile(str, str5, str2);
        }
    }

    private void parseFile(String str, String str2, String str3) throws IOException {
        Pair<byte[], byte[]> parseVCFSNPLine;
        String str4 = "";
        for (String str5 : Strings.dot.split(str2)) {
            String lowerCase = str5.toLowerCase();
            if (lowerCase.contains("chr")) {
                str4 = "Chr" + lowerCase.replaceAll("[^\\d.]", "");
            }
        }
        String str6 = str + "/" + str2;
        String str7 = str3 + str4 + "/";
        Gpio.createDir(str7);
        System.out.println("Will write to " + str7);
        this.snpMap = new HashMap<>();
        this.snpChrMap = new HashMap<>();
        this.snpChrPosMap = new HashMap<>();
        this.snpctr = 0;
        HashMap hashMap = new HashMap();
        HashSet hashSet = new HashSet();
        System.out.println("Parsing file: " + str6);
        TextFile textFile = new TextFile(str6, false);
        String[] readLineElems = textFile.readLineElems(TextFile.tab);
        while (true) {
            String[] strArr = readLineElems;
            if (strArr == null) {
                break;
            }
            if (strArr[0].startsWith(VCFHeader.METADATA_INDICATOR) || !strArr[0].startsWith("#CHROM")) {
                readLineElems = textFile.readLineElems(TextFile.tab);
            } else if (strArr.length > 9) {
                for (int i = 9; i < strArr.length; i++) {
                    String str8 = strArr[i];
                    Integer num = (Integer) hashMap.get(str8);
                    if (num == null) {
                        num = 0;
                    }
                    hashMap.put(str8, Integer.valueOf(num.intValue() + 1));
                    hashSet.add(str8);
                }
            }
        }
        textFile.close();
        String[] strArr2 = (String[]) hashSet.toArray(new String[0]);
        this.individualMap = new HashMap<>();
        this.individuals = new ArrayList<>();
        int i2 = 0;
        for (String str9 : strArr2) {
            this.individualMap.put(str9, Integer.valueOf(i2));
            this.individuals.add(str9);
            i2++;
        }
        System.out.println("Total number of detected individuals: " + this.individuals.size());
        System.out.println("Now writing individuals to output directory");
        TextFile textFile2 = new TextFile(str7 + "Individuals.txt", true);
        TextFile textFile3 = new TextFile(str7 + "PhenotypeInformation.txt", true);
        Iterator<String> it = this.individuals.iterator();
        while (it.hasNext()) {
            String next = it.next();
            textFile2.writeln(next);
            textFile3.writeln(next + "\tunknown\tinclude\tunknown");
        }
        textFile2.close();
        textFile3.close();
        this.multiallelicSNPsExcluded = 0;
        this.snpList = new ArrayList<>();
        System.out.println("Parsing file: " + str6);
        int i3 = 0;
        TextFile textFile4 = new TextFile(str6, false);
        this.colToIndId = null;
        String[] readLineElemsReturnObjects = textFile4.readLineElemsReturnObjects(TextFile.tab);
        while (readLineElemsReturnObjects != null) {
            if (!readLineElemsReturnObjects[0].startsWith(VCFHeader.METADATA_INDICATOR)) {
                if (readLineElemsReturnObjects[0].startsWith("#CHROM")) {
                    parseHeaderLine(readLineElemsReturnObjects);
                } else {
                    parseVCFSNPLine(readLineElemsReturnObjects, true);
                }
            }
            readLineElemsReturnObjects = textFile4.readLineElemsReturnObjects(TextFile.tab);
            i3++;
            if (i3 % AbstractAlignmentMerger.MAX_RECORDS_IN_RAM == 0) {
                System.out.println("Parsed\t" + this.snpList.size() + "\tsnps.");
            }
        }
        textFile4.close();
        int i4 = 0 + 1;
        System.out.println(this.snpMap.size() + "\tsnps detected");
        System.out.println(this.multiallelicSNPsExcluded + "\tmulti allelic SNPs excluded.");
        System.out.println("Final totals: ");
        System.out.println(this.snpMap.size() + "\tsnps detected");
        System.out.println(this.multiallelicSNPsExcluded + "\tmulti allelic SNPs excluded.");
        System.out.println("Now writing snps to output directory!");
        String[] strArr3 = (String[]) this.snpList.toArray(new String[this.snpList.size()]);
        TextFile textFile5 = new TextFile(str7 + "SNPs.txt", true);
        TextFile textFile6 = new TextFile(str7 + "SNPMappings.txt", true);
        for (String str10 : strArr3) {
            textFile5.writeln(str10);
            textFile6.writeln(this.snpChrMap.get(str10) + "\t" + this.snpChrPosMap.get(str10) + "\t" + str10);
        }
        textFile5.close();
        textFile6.close();
        this.finalNrInds = this.individuals.size();
        WGAFileMatrixGenotype wGAFileMatrixGenotype = new WGAFileMatrixGenotype(this.snpctr, this.individuals.size(), new File(str7 + "GenotypeMatrix.dat"), false);
        ProgressBar progressBar = new ProgressBar(i3, "writing genotypes from file: " + str6);
        TextFile textFile7 = new TextFile(str6, false);
        String[] readLineElems2 = textFile7.readLineElems(TextFile.tab);
        this.colToIndId = null;
        int i5 = 0;
        this.snpctr = 0;
        while (readLineElems2 != null) {
            if (!readLineElems2[0].startsWith(VCFHeader.METADATA_INDICATOR)) {
                if (readLineElems2[0].startsWith("#CHROM")) {
                    parseHeaderLine(readLineElems2);
                } else {
                    String str11 = readLineElems2[2];
                    if (str11.equals(".")) {
                        str11 = readLineElems2[0] + "_" + readLineElems2[1];
                    }
                    Integer num2 = this.snpMap.get(str11);
                    if (num2 != null && (parseVCFSNPLine = parseVCFSNPLine(readLineElems2, false)) != null) {
                        wGAFileMatrixGenotype.setAlleles(num2.intValue(), parseVCFSNPLine.getLeft(), parseVCFSNPLine.getRight());
                        this.snpctr++;
                    }
                }
            }
            readLineElems2 = textFile7.readLineElems(TextFile.tab);
            progressBar.set(i5);
            i5++;
        }
        progressBar.close();
        int i6 = 0 + 1;
        textFile7.close();
        System.out.println("");
        wGAFileMatrixGenotype.close();
    }

    private Pair<byte[], byte[]> parseVCFSNPLine(String[] strArr, boolean z) {
        boolean z2;
        String[] split = Strings.semicolon.split(strArr[7]);
        if (this.snppattern == null) {
            z2 = true;
        } else {
            z2 = false;
            for (String str : split) {
                if (this.snppattern.matcher(str).matches()) {
                    z2 = true;
                }
            }
        }
        if (!z2) {
            return null;
        }
        String str2 = strArr[2];
        if (Strings.dot.matcher(str2).matches()) {
            str2 = strArr[0] + "_" + strArr[1];
        }
        String str3 = strArr[3];
        String str4 = strArr[4];
        if (Strings.comma.split(str4).length != 1 || Strings.comma.split(str3).length != 1) {
            this.multiallelicSNPsExcluded++;
            System.out.println("SNP " + str2 + " is multi-allelic, therefore exlcuding it! " + str3 + "\t" + str4);
            return null;
        }
        byte b = BaseAnnot.toByte(strArr[3]);
        byte b2 = BaseAnnot.toByte(strArr[4]);
        if (b == 0 || b2 == 0) {
            System.err.println("WARNING: could not properly parse reference or alternative allele for snp\t" + str2 + "\t" + strArr[3] + "-" + strArr[4]);
            return null;
        }
        if (z || !this.snpMap.containsKey(str2)) {
            if (!z) {
                return null;
            }
            if (this.snpMap.containsKey(str2)) {
                System.err.println("WARNING: " + str2 + " already parsed?\n" + Strings.concat(strArr, Strings.tab));
                return null;
            }
            Byte valueOf = Byte.valueOf(ChrAnnotation.parseChr(strArr[0]));
            Integer valueOf2 = Integer.valueOf(Integer.parseInt(strArr[1]));
            this.snpList.add(str2);
            this.snpMap.put(str2, Integer.valueOf(this.snpctr));
            this.snpChrMap.put(str2, valueOf);
            this.snpChrPosMap.put(str2, valueOf2);
            this.snpctr++;
            return null;
        }
        byte[] bArr = new byte[this.finalNrInds];
        byte[] bArr2 = new byte[this.finalNrInds];
        for (int i = 9; i < strArr.length; i++) {
            Integer num = this.colToIndId[i];
            if (num != null) {
                String[] split2 = Strings.colon.split(strArr[i]);
                String[] split3 = Strings.pipe.split(split2[0]);
                if (split3.length == 1) {
                    split3 = Strings.forwardslash.split(split2[0]);
                }
                if (split3.length == 1 || split3.length > 2) {
                    System.err.println("WARNING: genotype could not be parsed for sample " + this.individuals.get(num.intValue()) + "\t" + split2[0]);
                } else {
                    if (zero.matcher(split3[0]).matches()) {
                        bArr[num.intValue()] = b;
                    } else if (one.matcher(split3[0]).matches()) {
                        bArr[num.intValue()] = b2;
                    } else if (Strings.dot.matcher(split3[0]).matches()) {
                        bArr[num.intValue()] = 0;
                    } else {
                        System.err.println("Could not parse allele1 of genotype for sample " + this.individuals.get(num.intValue()) + "\t" + split2[0]);
                    }
                    if (zero.matcher(split3[1]).matches()) {
                        bArr2[num.intValue()] = b;
                    } else if (one.matcher(split3[1]).matches()) {
                        bArr2[num.intValue()] = b2;
                    } else if (Strings.dot.matcher(split3[1]).matches()) {
                        bArr2[num.intValue()] = 0;
                    } else {
                        System.err.println("Could not parse allele2 of genotype for sample " + this.individuals.get(num.intValue()) + "\t" + split2[0]);
                    }
                }
            }
        }
        return new Pair<>(bArr, bArr2);
    }

    private void parseHeaderLine(String[] strArr) {
        this.colToIndId = new Integer[strArr.length];
        for (int i = 9; i < strArr.length; i++) {
            this.colToIndId[i] = this.individualMap.get(strArr[i]);
        }
    }
}
