package net.maizegenetics.pangenome.hapcollapse;

import com.google.common.collect.Multimap;
import java.awt.Frame;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import net.maizegenetics.analysis.distance.DistanceMatrixPlugin;
import net.maizegenetics.analysis.filter.FilterSiteBuilderPlugin;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.taxa.distance.DistanceMatrix;
import net.maizegenetics.util.DirectoryCrawler;
import net.maizegenetics.util.LoggingUtils;
import net.maizegenetics.util.Utils;

/* loaded from: input_file:net/maizegenetics/pangenome/hapcollapse/CreateHaplotypesFromFasta.class */
public class CreateHaplotypesFromFasta {
    public static final String localDirectory = "/Users/edbuckler/temp/chr10fastafilesFilt/";
    public static final String localConsensusDirectoryOut = "/Users/edbuckler/temp/consensus/";
    public static final String loggingFile = "/Users/edbuckler/temp/logging.txt";
    public static final String anchorSummaryFile = "/Users/edbuckler/temp/consensusSummaryMAF02_170621_all.txt";
    private static final double maxDistance = 0.002d;
    private static final int minSites = 100;
    private static final String[] assemblies = {"W22Assembly", "B104Assembly", "CML247Assembly", "PH207Assembly", "EP1Assembly"};

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:net/maizegenetics/pangenome/hapcollapse/CreateHaplotypesFromFasta$DistanceReport.class */
    public static class DistanceReport {
        double b73dist = Double.NaN;
        double cml247dist = Double.NaN;
        double w22dist = Double.NaN;
        int closeB73Haplotypes;
        double meanDivergence;

        public String formatString() {
            return String.format("%.4f\t%.4f\t%.4f\t%d\t%.4f\t", Double.valueOf(this.b73dist), Double.valueOf(this.cml247dist), Double.valueOf(this.w22dist), Integer.valueOf(this.closeB73Haplotypes), Double.valueOf(this.meanDivergence));
        }

        public static String headers() {
            return "b73dist\tcml247dist\tw22dist\tcloseB73Haplotypes\tmeanDivergence\t";
        }
    }

    public static void main(String[] strArr) {
        try {
            LoggingUtils.setupStdOutLogging();
            BufferedWriter newBufferedWriter = Files.newBufferedWriter(Paths.get(anchorSummaryFile, new String[0]), new OpenOption[0]);
            newBufferedWriter.write("AnchorID\tOriginalSites\tSegSites\tConsensusSeqSite\tFilteredTaxa\tConsesusHaplotypes\tNumTaxaInTop10\t");
            newBufferedWriter.write(DistanceReport.headers());
            newBufferedWriter.newLine();
            List listPaths = DirectoryCrawler.listPaths("glob:*.fa.gz", Paths.get("/Users/edbuckler/temp/chr10fastafilesFilt/", new String[0]));
            Pattern compile = Pattern.compile("Id(\\d*)");
            listPaths.stream().limit(10000L).forEach(path -> {
                try {
                    System.out.println(path.toString());
                    Matcher matcher = compile.matcher(path.toString());
                    matcher.find();
                    String group = matcher.group(1);
                    GenotypeTable readFasta = ImportUtils.readFasta(path.toString());
                    GenotypeTable filterGenotypeTable = FilterGenotypeTable.getInstance(readFasta, IntStream.range(0, readFasta.numberOfSites()).filter(i -> {
                        double minorAlleleFrequency = readFasta.minorAlleleFrequency(i);
                        return minorAlleleFrequency < 1.0E-4d || minorAlleleFrequency > 0.02d;
                    }).toArray());
                    if (filterGenotypeTable == null) {
                        return;
                    }
                    System.out.println("OrigSites=" + readFasta.numberOfSites() + "\tMAF filter segsites=" + filterGenotypeTable.numberOfSites() + "\tnew maxDistance=" + ((maxDistance * readFasta.numberOfSites()) / filterGenotypeTable.numberOfSites()));
                    Multimap<Taxon, Taxon> findHapClusterHighCoverage = FindHaplotypeClustersPlugin.findHapClusterHighCoverage(filterGenotypeTable, minSites, maxDistance);
                    GenotypeTable createConsensusGenotypeTable = createConsensusGenotypeTable(readFasta, findHapClusterHighCoverage);
                    findHapClusterHighCoverage.asMap().forEach((taxon, collection) -> {
                        System.out.print(taxon.getName() + "\t");
                        System.out.println(collection.size() + "\t");
                    });
                    int haplotypesCaptured = haplotypesCaptured(findHapClusterHighCoverage, 10);
                    System.out.println(haplotypesCaptured);
                    newBufferedWriter.write(String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t", group, Integer.valueOf(readFasta.numberOfSites()), Integer.valueOf(getSegSites(readFasta, 0, 0, 1.0E-4d)), Integer.valueOf(getSegSites(createConsensusGenotypeTable, 0, 0, 1.0E-4d)), Integer.valueOf(readFasta.numberOfTaxa()), Integer.valueOf(createConsensusGenotypeTable.numberOfTaxa()), Integer.valueOf(haplotypesCaptured)));
                    newBufferedWriter.write(diversityReport(readFasta).formatString());
                    newBufferedWriter.newLine();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
            newBufferedWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static DistanceReport diversityReport(GenotypeTable genotypeTable) {
        DistanceMatrix distanceMatrix = DistanceMatrixPlugin.getDistanceMatrix(genotypeTable);
        DistanceReport distanceReport = new DistanceReport();
        int indexOf = genotypeTable.taxa().indexOf("B73Ref");
        int indexOf2 = genotypeTable.taxa().indexOf("B73_Haplotype_Caller");
        int indexOf3 = genotypeTable.taxa().indexOf("CML247Assembly");
        int indexOf4 = genotypeTable.taxa().indexOf("CML247_Haplotype_Caller");
        int indexOf5 = genotypeTable.taxa().indexOf("W22Assembly");
        int indexOf6 = genotypeTable.taxa().indexOf("W22_Haplotype_Caller");
        if (indexOf2 > -1) {
            distanceReport.b73dist = distanceMatrix.getDistance(indexOf, indexOf2);
        }
        if (indexOf3 > -1 && indexOf4 > -1) {
            distanceReport.cml247dist = distanceMatrix.getDistance(indexOf3, indexOf4);
        }
        if (indexOf5 > -1 && indexOf6 > -1) {
            distanceReport.w22dist = distanceMatrix.getDistance(indexOf5, indexOf6);
        }
        distanceReport.closeB73Haplotypes = (int) IntStream.range(0, genotypeTable.numberOfTaxa()).mapToDouble(i -> {
            return distanceMatrix.getDistance(indexOf, i);
        }).filter(Double::isFinite).filter(d -> {
            return d < maxDistance;
        }).count();
        distanceReport.meanDivergence = distanceMatrix.meanDistance();
        return distanceReport;
    }

    private static int haplotypesCaptured(Multimap<Taxon, Taxon> multimap, int i) {
        int[] array = multimap.asMap().entrySet().stream().mapToInt(entry -> {
            return ((Collection) entry.getValue()).size();
        }).sorted().toArray();
        return IntStream.range(Math.max(0, array.length - i), array.length).map(i2 -> {
            return array[i2];
        }).sum();
    }

    private static GenotypeTable createConsensusGenotypeTable(GenotypeTable genotypeTable, Multimap<Taxon, Taxon> multimap) {
        GenotypeTableBuilder taxaIncremental = GenotypeTableBuilder.getTaxaIncremental(genotypeTable.positions());
        for (Map.Entry entry : multimap.asMap().entrySet()) {
            taxaIncremental.addTaxon((Taxon) entry.getKey(), consensusGameteCalls(genotypeTable, (Collection) entry.getValue()));
        }
        return taxaIncremental.build();
    }

    private static int getSegSites(GenotypeTable genotypeTable, int i, int i2, double d) {
        DataSet performFunction = new FilterSiteBuilderPlugin((Frame) null, false).siteMinCount(Integer.valueOf(i)).siteMinAlleleFreq(Double.valueOf(d)).performFunction(DataSet.getDataSet(genotypeTable));
        if (performFunction.getSize() == 2) {
            return ((GenotypeTable) performFunction.getData(0).getData()).numberOfSites();
        }
        return 0;
    }

    private static GenotypeTable getSegSitesGT(GenotypeTable genotypeTable, int i, int i2, double d, double d2) {
        DataSet performFunction = new FilterSiteBuilderPlugin((Frame) null, false).siteMinCount(Integer.valueOf(i)).siteMinAlleleFreq(Double.valueOf(d)).siteMinCount(Integer.valueOf(i2)).siteMaxAlleleFreq(Double.valueOf(d2)).performFunction(DataSet.getDataSet(genotypeTable));
        if (performFunction.getSize() == 2) {
            return (GenotypeTable) performFunction.getData(0).getData();
        }
        return null;
    }

    private static int numberOfGaps(GenotypeTable genotypeTable, int i) {
        return (int) IntStream.range(0, genotypeTable.numberOfSites()).filter(i2 -> {
            return genotypeTable.genotype(i, i2) == 85;
        }).count();
    }

    private static String sequenceToString(GenotypeTable genotypeTable, int i) {
        StringBuilder sb = new StringBuilder();
        IntStream.range(0, genotypeTable.numberOfSites()).forEach(i2 -> {
            sb.append(genotypeTable.genotypeAsString(i, i2));
        });
        return sb.toString();
    }

    private static void writeFasta(String str, GenotypeTable genotypeTable, Optional<Multimap<Taxon, Taxon>> optional) {
        try {
            BufferedWriter bufferedWriter = Utils.getBufferedWriter(str);
            for (int i = 0; i < genotypeTable.numberOfTaxa(); i++) {
                try {
                    bufferedWriter.write(">" + ((Taxon) genotypeTable.taxa().get(i)).getName());
                    if (optional.isPresent()) {
                        bufferedWriter.write("=" + ((String) optional.get().get((Taxon) genotypeTable.taxa().get(i)).stream().map((v0) -> {
                            return v0.getName();
                        }).collect(Collectors.joining(","))));
                    }
                    bufferedWriter.newLine();
                    bufferedWriter.write(sequenceToString(genotypeTable, i));
                    bufferedWriter.newLine();
                } finally {
                }
            }
            if (bufferedWriter != null) {
                bufferedWriter.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static Multimap<Taxon, Taxon> findHapClusterByUPGMA(GenotypeTable genotypeTable, int i, double d) {
        return null;
    }

    private static Multimap<Taxon, Taxon> findHapClusterByCentrality(GenotypeTable genotypeTable, int i, double d) {
        return null;
    }

    private static Multimap<Taxon, Taxon> findHapClusterByTerry(GenotypeTable genotypeTable, int i, double d) {
        return null;
    }

    private static int[] countUnknown(byte[] bArr) {
        int i = 0;
        int i2 = 0;
        for (int i3 = 0; i3 < bArr.length; i3++) {
            if (bArr[i3] == -1) {
                i++;
            } else if (GenotypeTableUtils.isHeterozygous(bArr[i3])) {
                i2++;
            }
        }
        return new int[]{i, i2};
    }

    private static byte[] consensusGameteCalls(GenotypeTable genotypeTable, Collection<Taxon> collection) {
        int[] array = collection.stream().mapToInt(taxon -> {
            return genotypeTable.taxa().indexOf(taxon);
        }).toArray();
        byte[] bArr = new byte[genotypeTable.numberOfSites()];
        Arrays.fill(bArr, (byte) -1);
        for (int i = 0; i < genotypeTable.numberOfSites(); i++) {
            int[] iArr = new int[20];
            for (int i2 : array) {
                byte[] diploidValues = GenotypeTableUtils.getDiploidValues(genotypeTable.genotype(i2, i));
                byte b = diploidValues[0];
                iArr[b] = iArr[b] + 1;
                byte b2 = diploidValues[1];
                iArr[b2] = iArr[b2] + 1;
            }
            byte b3 = 0;
            byte b4 = 0;
            while (true) {
                byte b5 = b4;
                if (b5 >= iArr.length) {
                    break;
                }
                b3 = iArr[b5] > iArr[b3] ? b5 : b3;
                b4 = (byte) (b5 + 1);
            }
            if (iArr[b3] > 0) {
                bArr[i] = GenotypeTableUtils.getDiploidValue(b3, b3);
            }
        }
        return bArr;
    }
}
