package edu.stanford.futuredata.macrobase.ingest;

import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.datamodel.Row;
import edu.stanford.futuredata.macrobase.datamodel.Schema;
import java.io.File;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

/* loaded from: input_file:edu/stanford/futuredata/macrobase/ingest/CSVDataFrameLoader.class */
public class CSVDataFrameLoader implements DataFrameLoader {
    private String fileName;
    private Map<String, Schema.ColType> columnTypes = new HashMap();
    private int badRecords;

    public CSVDataFrameLoader(String str) {
        this.fileName = str;
    }

    @Override // edu.stanford.futuredata.macrobase.ingest.DataFrameLoader
    public DataFrameLoader setColumnTypes(Map<String, Schema.ColType> map) {
        this.columnTypes = map;
        return this;
    }

    @Override // edu.stanford.futuredata.macrobase.ingest.DataFrameLoader
    public DataFrame load() throws Exception {
        CSVParser parse = CSVParser.parse(new File(this.fileName), Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader(new String[0]));
        Map headerMap = parse.getHeaderMap();
        int size = headerMap.size();
        String[] strArr = new String[size];
        Schema.ColType[] colTypeArr = new Schema.ColType[size];
        for (String str : headerMap.keySet()) {
            int intValue = ((Integer) headerMap.get(str)).intValue();
            Schema.ColType orDefault = this.columnTypes.getOrDefault(str, Schema.ColType.STRING);
            strArr[intValue] = str;
            colTypeArr[intValue] = orDefault;
        }
        Schema schema = new Schema();
        for (int i = 0; i < size; i++) {
            schema.addColumn(colTypeArr[i], strArr[i]);
        }
        this.badRecords = 0;
        ArrayList arrayList = new ArrayList();
        Iterator it = parse.iterator();
        while (it.hasNext()) {
            CSVRecord cSVRecord = (CSVRecord) it.next();
            try {
                ArrayList arrayList2 = new ArrayList(size);
                for (int i2 = 0; i2 < size; i2++) {
                    Schema.ColType colType = colTypeArr[i2];
                    String str2 = cSVRecord.get(i2);
                    if (colType != Schema.ColType.STRING) {
                        if (colType != Schema.ColType.DOUBLE) {
                            throw new RuntimeException("Bad ColType");
                            break;
                        }
                        arrayList2.add(Double.valueOf(Double.parseDouble(str2)));
                    } else {
                        arrayList2.add(str2);
                    }
                }
                arrayList.add(new Row(arrayList2));
            } catch (NumberFormatException e) {
                this.badRecords++;
            }
        }
        return new DataFrame(schema, arrayList);
    }

    public int getBadRecords() {
        return this.badRecords;
    }
}
