package ai.catboost.spark;

import ai.catboost.CatBoostError;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.util.Arrays;
import org.apache.spark.ml.attribute.Attribute;
import org.apache.spark.ml.attribute.AttributeGroup;
import org.apache.spark.ml.attribute.NumericAttribute$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.encoders.RowEncoder$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.BinaryType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import org.apache.spark.storage.StorageLevel$;
import org.slf4j.Logger;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.EDatasetVisitorType;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.IQuantizedFeaturesDataVisitor;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.QuantizedFeaturesInfoPtr;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TDataProviderBuilderOptions;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TDataProviderClosureForJVM;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TDataProviderPtr;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TFeaturesLayoutPtr;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TIntermediateDataMetaInfo;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TLocalExecutor;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TQuantizedObjectsDataProviderPtr;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TRawObjectsDataProviderPtr;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TVector_TDataProviderPtr;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TVector_TMaybeOwningConstArrayHolder_float;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TVector_TMaybeOwningConstArrayHolder_i32;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.TVector_TString;
import ru.yandex.catboost.spark.catboost4j_spark.core.src.native_impl.native_impl;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
import scala.collection.Iterable;
import scala.collection.IterableLike;
import scala.collection.Iterator;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.IndexedSeq;
import scala.collection.immutable.IndexedSeq$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.ArrayBuilder;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashMap$;
import scala.collection.mutable.HashSet;
import scala.math.Ordering$;
import scala.math.Ordering$Byte$;
import scala.math.Ordering$Double$;
import scala.math.Ordering$Float$;
import scala.math.Ordering$Int$;
import scala.math.Ordering$Long$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;

/* compiled from: DataHelpers.scala */
/* loaded from: input_file:ai/catboost/spark/DataHelpers$.class */
public final class DataHelpers$ {
    public static final DataHelpers$ MODULE$ = null;

    static {
        new DataHelpers$();
    }

    public Seq<StructField> selectSchemaFields(StructType structType, String[] strArr) {
        return strArr == null ? structType.toSeq() : (Seq) structType.filter(new DataHelpers$$anonfun$selectSchemaFields$1(strArr));
    }

    public String[] selectSchemaFields$default$2() {
        return null;
    }

    public Dataset<Row> mapSampleIdxToPerGroupSampleIdx(Dataset<Row> dataset) {
        return dataset.sparkSession().createDataFrame(dataset.rdd().groupBy(new DataHelpers$$anonfun$4(dataset.schema().fieldIndex("groupId")), ClassTag$.MODULE$.Long()).flatMap(new DataHelpers$$anonfun$5(dataset.schema().fieldIndex("sampleId")), ClassTag$.MODULE$.apply(Row.class)), dataset.schema());
    }

    public Metadata makeFeaturesMetadata(String[] strArr) {
        String[] strArr2 = new String[strArr.length];
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), Predef$.MODULE$.refArrayOps(strArr2).size()).foreach(new DataHelpers$$anonfun$makeFeaturesMetadata$1(strArr, strArr2, new HashSet()));
        return new AttributeGroup("userFeatures", (Attribute[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(strArr2).map(new DataHelpers$$anonfun$7(NumericAttribute$.MODULE$.defaultAttr()), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Attribute.class)))).toArray(ClassTag$.MODULE$.apply(Attribute.class))).toMetadata();
    }

    public int[] getDistinctIntLabelValues(Dataset<Row> dataset, String str) {
        int[] iArr;
        Iterator iterator = (Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(dataset.select(str, Predef$.MODULE$.wrapRefArray(new String[0])).distinct().toLocalIterator()).asScala();
        DataType dataType = dataset.schema().apply(str).dataType();
        DataType dataType2 = DataTypes.IntegerType;
        if (dataType2 != null ? !dataType2.equals(dataType) : dataType != null) {
            DataType dataType3 = DataTypes.LongType;
            if (dataType3 != null ? !dataType3.equals(dataType) : dataType != null) {
                throw new CatBoostError("Unsupported data type for Integer Label");
            }
            iArr = (int[]) ((TraversableOnce) ((TraversableLike) iterator.map(new DataHelpers$$anonfun$getDistinctIntLabelValues$3()).toSeq().sorted(Ordering$Long$.MODULE$)).map(new DataHelpers$$anonfun$getDistinctIntLabelValues$1(), Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.Int());
        } else {
            iArr = (int[]) ((TraversableOnce) iterator.map(new DataHelpers$$anonfun$getDistinctIntLabelValues$2()).toSeq().sorted(Ordering$Int$.MODULE$)).toArray(ClassTag$.MODULE$.Int());
        }
        return iArr;
    }

    public float[] getDistinctFloatLabelValues(Dataset<Row> dataset, String str) {
        float[] fArr;
        Iterator iterator = (Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(dataset.select(str, Predef$.MODULE$.wrapRefArray(new String[0])).distinct().toLocalIterator()).asScala();
        DataType dataType = dataset.schema().apply(str).dataType();
        DataType dataType2 = DataTypes.FloatType;
        if (dataType2 != null ? !dataType2.equals(dataType) : dataType != null) {
            DataType dataType3 = DataTypes.DoubleType;
            if (dataType3 != null ? !dataType3.equals(dataType) : dataType != null) {
                throw new CatBoostError("Unsupported data type for Float Label");
            }
            fArr = (float[]) ((TraversableOnce) ((TraversableLike) iterator.map(new DataHelpers$$anonfun$getDistinctFloatLabelValues$3()).toSeq().sorted(Ordering$Double$.MODULE$)).map(new DataHelpers$$anonfun$getDistinctFloatLabelValues$1(), Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.Float());
        } else {
            fArr = (float[]) ((TraversableOnce) iterator.map(new DataHelpers$$anonfun$getDistinctFloatLabelValues$2()).toSeq().sorted(Ordering$Float$.MODULE$)).toArray(ClassTag$.MODULE$.Float());
        }
        return fArr;
    }

    public TVector_TString getDistinctStringLabelValues(Dataset<Row> dataset, String str) {
        Iterator iterator = (Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(dataset.select(str, Predef$.MODULE$.wrapRefArray(new String[0])).distinct().toLocalIterator()).asScala();
        DataType dataType = dataset.schema().apply(str).dataType();
        DataType dataType2 = DataTypes.StringType;
        if (dataType2 != null ? !dataType2.equals(dataType) : dataType != null) {
            throw new CatBoostError("Unsupported data type for String Label");
        }
        return new TVector_TString((Iterable<String>) JavaConverters$.MODULE$.asJavaIterableConverter(((IterableLike) iterator.map(new DataHelpers$$anonfun$getDistinctStringLabelValues$1()).toSeq().sorted(Ordering$String$.MODULE$)).toIterable()).asJava());
    }

    public byte[] calcFeaturesHasNans(Dataset<Row> dataset, String str, int i) {
        Dataset persist = dataset.mapPartitions(new DataHelpers$$anonfun$8(i, dataset.schema().fieldIndex(str)), dataset.sparkSession().implicits().newByteArrayEncoder()).persist(StorageLevel$.MODULE$.MEMORY_ONLY());
        ObjectRef create = ObjectRef.create(new byte[i]);
        Arrays.fill((byte[]) create.elem, (byte) 0);
        ((Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(persist.toLocalIterator()).asScala()).foreach(new DataHelpers$$anonfun$calcFeaturesHasNans$1(i, create));
        persist.unpersist();
        return (byte[]) create.elem;
    }

    public Tuple2<ArrayBuffer<Object[]>, TRawObjectsDataProviderPtr> processDatasetWithRawFeatures(Iterator<Row> iterator, int i, TFeaturesLayoutPtr tFeaturesLayoutPtr, int i2, boolean z, int i3, TLocalExecutor tLocalExecutor) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        int[] primitiveArray = native_impl.GetAvailableFeaturesFlatIndices_Float(tFeaturesLayoutPtr.__deref__()).toPrimitiveArray();
        int[] primitiveArray2 = native_impl.GetAvailableFeaturesFlatIndices_Categorical(tFeaturesLayoutPtr.__deref__()).toPrimitiveArray();
        ObjectRef create = ObjectRef.create(new ArrayBuilder[Predef$.MODULE$.intArrayOps(primitiveArray).size()]);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), Predef$.MODULE$.refArrayOps((ArrayBuilder[]) create.elem).size()).foreach$mVc$sp(new DataHelpers$$anonfun$processDatasetWithRawFeatures$1(create));
        ObjectRef create2 = ObjectRef.create(new ArrayBuilder[Predef$.MODULE$.intArrayOps(primitiveArray2).size()]);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), Predef$.MODULE$.refArrayOps((ArrayBuilder[]) create2.elem).size()).foreach$mVc$sp(new DataHelpers$$anonfun$processDatasetWithRawFeatures$2(create2));
        iterator.foreach(new DataHelpers$$anonfun$processDatasetWithRawFeatures$3(i, z, i3, arrayBuffer, primitiveArray, primitiveArray2, create, create2));
        TVector_TMaybeOwningConstArrayHolder_float tVector_TMaybeOwningConstArrayHolder_float = new TVector_TMaybeOwningConstArrayHolder_float();
        Predef$.MODULE$.refArrayOps((ArrayBuilder[]) create.elem).foreach(new DataHelpers$$anonfun$processDatasetWithRawFeatures$4(tVector_TMaybeOwningConstArrayHolder_float));
        TVector_TMaybeOwningConstArrayHolder_i32 tVector_TMaybeOwningConstArrayHolder_i32 = new TVector_TMaybeOwningConstArrayHolder_i32();
        Predef$.MODULE$.refArrayOps((ArrayBuilder[]) create2.elem).foreach(new DataHelpers$$anonfun$processDatasetWithRawFeatures$5(tVector_TMaybeOwningConstArrayHolder_i32));
        TRawObjectsDataProviderPtr CreateRawObjectsDataProvider = native_impl.CreateRawObjectsDataProvider(tFeaturesLayoutPtr, arrayBuffer.size(), tVector_TMaybeOwningConstArrayHolder_float, tVector_TMaybeOwningConstArrayHolder_i32, i2, tLocalExecutor);
        create.elem = null;
        create2.elem = null;
        System.gc();
        return new Tuple2<>(arrayBuffer, CreateRawObjectsDataProvider);
    }

    public DatasetForTraining prepareDatasetForTraining(Pool pool, byte b, int i) {
        return pool.pairsData() == null ? new UsualDatasetForTraining(pool, pool.repartition(i, true).data().withColumn("_datasetIdx", functions$.MODULE$.typedLit(BoxesRunTime.boxToByte(b), scala.reflect.runtime.package$.MODULE$.universe().TypeTag().Byte())).cache(), b) : new DatasetForTrainingWithPairs(pool, getCogroupedMainAndPairsRDD(pool.data(), pool.data().schema().fieldIndex((String) pool.getOrDefault(pool.groupIdCol())), pool.pairsData(), b, new Some(BoxesRunTime.boxToInteger(i))).cache(), pool.data().schema(), b);
    }

    public Tuple2<TDataProviderClosureForJVM, IQuantizedFeaturesDataVisitor> getDataProviderBuilderAndVisitor(boolean z, TLocalExecutor tLocalExecutor) {
        TDataProviderClosureForJVM tDataProviderClosureForJVM = new TDataProviderClosureForJVM(EDatasetVisitorType.QuantizedFeatures, new TDataProviderBuilderOptions(), z, tLocalExecutor);
        IQuantizedFeaturesDataVisitor GetQuantizedVisitor = tDataProviderClosureForJVM.GetQuantizedVisitor();
        if (GetQuantizedVisitor == null) {
            throw new CatBoostError("Failure to create IQuantizedFeaturesDataVisitor");
        }
        return new Tuple2<>(tDataProviderClosureForJVM, GetQuantizedVisitor);
    }

    public Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> getLoadedDatasets(Seq<DatasetLoadingContext> seq, int[] iArr) {
        TVector_TDataProviderPtr tVector_TDataProviderPtr = new TVector_TDataProviderPtr();
        TVector_TDataProviderPtr tVector_TDataProviderPtr2 = new TVector_TDataProviderPtr();
        ArrayBuffer[] arrayBufferArr = new ArrayBuffer[seq.size()];
        ((TraversableLike) seq.zipWithIndex(Seq$.MODULE$.canBuildFrom())).map(new DataHelpers$$anonfun$getLoadedDatasets$1(iArr, tVector_TDataProviderPtr, tVector_TDataProviderPtr2, arrayBufferArr), Seq$.MODULE$.canBuildFrom());
        return new Tuple3<>(tVector_TDataProviderPtr, tVector_TDataProviderPtr2, arrayBufferArr);
    }

    public Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> loadQuantizedDatasets(int i, QuantizedFeaturesInfoPtr quantizedFeaturesInfoPtr, HashMap<String, Object> hashMap, TIntermediateDataMetaInfo tIntermediateDataMetaInfo, StructType structType, Option<Object> option, TLocalExecutor tLocalExecutor, Iterator<Row> iterator, int[] iArr, int i2) {
        IndexedSeq indexedSeq = (IndexedSeq) RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), i).map(new DataHelpers$$anonfun$9(quantizedFeaturesInfoPtr, hashMap, tIntermediateDataMetaInfo, structType, option, tLocalExecutor, iArr, i2), IndexedSeq$.MODULE$.canBuildFrom());
        ObjectRef create = ObjectRef.create(new int[i]);
        Arrays.fill((int[]) create.elem, 0);
        iterator.foreach(new DataHelpers$$anonfun$loadQuantizedDatasets$1(indexedSeq, create, BoxesRunTime.unboxToInt(hashMap.getOrElse("_datasetIdx", new DataHelpers$$anonfun$1()))));
        return getLoadedDatasets(indexedSeq, (int[]) create.elem);
    }

    public int[] loadQuantizedDatasets$default$9() {
        return null;
    }

    public int loadQuantizedDatasets$default$10() {
        return 0;
    }

    public Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> loadQuantizedDatasetsWithPairs(int i, int i2, QuantizedFeaturesInfoPtr quantizedFeaturesInfoPtr, HashMap<String, Object> hashMap, TIntermediateDataMetaInfo tIntermediateDataMetaInfo, StructType structType, StructType structType2, Option<Object> option, TLocalExecutor tLocalExecutor, Iterator<Tuple2<Tuple2<Object, Object>, Tuple2<Iterable<Iterable<Row>>, Iterable<Iterable<Row>>>>> iterator, int[] iArr, int i3) {
        IndexedSeq indexedSeq = (IndexedSeq) RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), i2).map(new DataHelpers$$anonfun$10(quantizedFeaturesInfoPtr, hashMap, tIntermediateDataMetaInfo, structType, structType2, option, tLocalExecutor, iArr, i3), IndexedSeq$.MODULE$.canBuildFrom());
        ObjectRef create = ObjectRef.create(new int[i2]);
        Arrays.fill((int[]) create.elem, 0);
        ObjectRef create2 = ObjectRef.create(new int[i2]);
        Arrays.fill((int[]) create2.elem, 0);
        iterator.foreach(new DataHelpers$$anonfun$loadQuantizedDatasetsWithPairs$1(i, indexedSeq, create, create2, BoxesRunTime.unboxToInt(hashMap.apply("sampleId"))));
        return getLoadedDatasets(indexedSeq, (int[]) create.elem);
    }

    public int[] loadQuantizedDatasetsWithPairs$default$11() {
        return null;
    }

    public int loadQuantizedDatasetsWithPairs$default$12() {
        return 0;
    }

    public Tuple4<HashMap<String, Object>, String[], int[], Option<Object>> selectColumnsAndReturnIndex(Pool pool, Seq<String> seq, boolean z, boolean z2, Seq<String> seq2) {
        Some some;
        HashMap hashMap = new HashMap();
        ObjectRef create = ObjectRef.create(new ArrayBuffer());
        IntRef create2 = IntRef.create(0);
        DataHelpers$$anonfun$11 dataHelpers$$anonfun$11 = new DataHelpers$$anonfun$11(hashMap, create, create2);
        seq.foreach(new DataHelpers$$anonfun$selectColumnsAndReturnIndex$1(pool, dataHelpers$$anonfun$11));
        if (z && Predef$.MODULE$.refArrayOps(pool.data().schema().fieldNames()).contains("_estimatedFeatures")) {
            dataHelpers$$anonfun$11.apply("_estimatedFeatures", "_estimatedFeatures");
            some = new Some(BoxesRunTime.boxToInteger(pool.getEstimatedFeatureCount()));
        } else {
            some = None$.MODULE$;
        }
        Some some2 = some;
        if (z2) {
            dataHelpers$$anonfun$11.apply("_datasetIdx", "_datasetIdx");
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        seq2.foreach(new DataHelpers$$anonfun$selectColumnsAndReturnIndex$2(create, create2, arrayBuffer));
        return new Tuple4<>(hashMap, ((ArrayBuffer) create.elem).toArray(ClassTag$.MODULE$.apply(String.class)), arrayBuffer.toArray(ClassTag$.MODULE$.Int()), some2);
    }

    public boolean selectColumnsAndReturnIndex$default$4() {
        return false;
    }

    public Seq<String> selectColumnsAndReturnIndex$default$5() {
        return Seq$.MODULE$.apply(Nil$.MODULE$);
    }

    public Tuple3<DatasetForTraining, HashMap<String, Object>, Option<Object>> selectColumnsForTrainingAndReturnIndex(DatasetForTraining datasetForTraining, boolean z, boolean z2, boolean z3, boolean z4) {
        DatasetForTraining datasetForTrainingWithPairs;
        ArrayBuffer apply = ArrayBuffer$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"label", "weight", "groupWeight", "baseline", "groupId", "subgroupId", "timestamp"}));
        if (z) {
            apply.$plus$eq("features");
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        if (z2) {
            apply.$plus$eq("sampleId");
        } else {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        Tuple4<HashMap<String, Object>, String[], int[], Option<Object>> selectColumnsAndReturnIndex = selectColumnsAndReturnIndex(datasetForTraining.srcPool(), apply.toSeq(), z3, z4 && (datasetForTraining instanceof UsualDatasetForTraining), selectColumnsAndReturnIndex$default$5());
        if (selectColumnsAndReturnIndex == null) {
            throw new MatchError(selectColumnsAndReturnIndex);
        }
        Tuple3 tuple3 = new Tuple3((HashMap) selectColumnsAndReturnIndex._1(), (String[]) selectColumnsAndReturnIndex._2(), (Option) selectColumnsAndReturnIndex._4());
        HashMap hashMap = (HashMap) tuple3._1();
        String[] strArr = (String[]) tuple3._2();
        Option option = (Option) tuple3._3();
        if (datasetForTraining instanceof UsualDatasetForTraining) {
            UsualDatasetForTraining usualDatasetForTraining = (UsualDatasetForTraining) datasetForTraining;
            datasetForTrainingWithPairs = new UsualDatasetForTraining(usualDatasetForTraining.srcPool(), usualDatasetForTraining.data().select((String) Predef$.MODULE$.refArrayOps(strArr).head(), Predef$.MODULE$.wrapRefArray((Object[]) Predef$.MODULE$.refArrayOps(strArr).tail())), usualDatasetForTraining.datasetIdx());
        } else {
            if (!(datasetForTraining instanceof DatasetForTrainingWithPairs)) {
                throw new MatchError(datasetForTraining);
            }
            DatasetForTrainingWithPairs datasetForTrainingWithPairs2 = (DatasetForTrainingWithPairs) datasetForTraining;
            Pool srcPool = datasetForTrainingWithPairs2.srcPool();
            RDD<Tuple2<Tuple2<Object, Object>, Tuple2<Iterable<Iterable<Row>>, Iterable<Iterable<Row>>>>> data = datasetForTrainingWithPairs2.data();
            StructType mainDataSchema = datasetForTrainingWithPairs2.mainDataSchema();
            byte datasetIdx = datasetForTrainingWithPairs2.datasetIdx();
            int[] iArr = (int[]) Predef$.MODULE$.refArrayOps(strArr).map(new DataHelpers$$anonfun$12(mainDataSchema), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int()));
            datasetForTrainingWithPairs = new DatasetForTrainingWithPairs(srcPool, data.map(new DataHelpers$$anonfun$13(iArr), ClassTag$.MODULE$.apply(Tuple2.class)), new StructType((StructField[]) Predef$.MODULE$.intArrayOps(iArr).map(new DataHelpers$$anonfun$14(mainDataSchema), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class)))), datasetIdx);
        }
        return new Tuple3<>(datasetForTrainingWithPairs, hashMap, option);
    }

    public RDD<Tuple2<Tuple2<Object, Object>, Tuple2<Iterable<Iterable<Row>>, Iterable<Iterable<Row>>>>> getCogroupedMainAndPairsRDD(Dataset<Row> dataset, int i, Dataset<Row> dataset2, byte b, Option<Object> option) {
        RDD<Tuple2<Tuple2<Object, Object>, Tuple2<Iterable<Iterable<Row>>, Iterable<Iterable<Row>>>>> cogroup;
        RDD groupBy = dataset.rdd().groupBy(new DataHelpers$$anonfun$15(i, b), ClassTag$.MODULE$.apply(Tuple2.class));
        RDD groupBy2 = dataset2.rdd().groupBy(new DataHelpers$$anonfun$16(b, dataset2.schema().fieldIndex("groupId")), ClassTag$.MODULE$.apply(Tuple2.class));
        if (option instanceof Some) {
            cogroup = RDD$.MODULE$.rddToPairRDDFunctions(groupBy, ClassTag$.MODULE$.apply(Tuple2.class), ClassTag$.MODULE$.apply(Iterable.class), Ordering$.MODULE$.Tuple2(Ordering$Byte$.MODULE$, Ordering$Long$.MODULE$)).cogroup(groupBy2, BoxesRunTime.unboxToInt(((Some) option).x()));
        } else {
            if (!None$.MODULE$.equals(option)) {
                throw new MatchError(option);
            }
            cogroup = RDD$.MODULE$.rddToPairRDDFunctions(groupBy, ClassTag$.MODULE$.apply(Tuple2.class), ClassTag$.MODULE$.apply(Iterable.class), Ordering$.MODULE$.Tuple2(Ordering$Byte$.MODULE$, Ordering$Long$.MODULE$)).cogroup(groupBy2);
        }
        return cogroup;
    }

    public byte getCogroupedMainAndPairsRDD$default$4() {
        return (byte) 0;
    }

    public Option<Object> getCogroupedMainAndPairsRDD$default$5() {
        return None$.MODULE$;
    }

    public PoolFilesPaths downloadQuantizedPoolToTempFiles(DatasetForTraining datasetForTraining, boolean z, boolean z2, TLocalExecutor tLocalExecutor, String str, Logger logger, String str2, String str3) {
        Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> tuple3;
        logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"downloadQuantizedPoolToTempFiles for ", ": start"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
        Tuple3<DatasetForTraining, HashMap<String, Object>, Option<Object>> selectColumnsForTrainingAndReturnIndex = selectColumnsForTrainingAndReturnIndex(datasetForTraining, z, datasetForTraining instanceof DatasetForTrainingWithPairs, z2, false);
        if (selectColumnsForTrainingAndReturnIndex == null) {
            throw new MatchError(selectColumnsForTrainingAndReturnIndex);
        }
        Tuple3 tuple32 = new Tuple3((DatasetForTraining) selectColumnsForTrainingAndReturnIndex._1(), (HashMap) selectColumnsForTrainingAndReturnIndex._2(), (Option) selectColumnsForTrainingAndReturnIndex._3());
        DatasetForTraining datasetForTraining2 = (DatasetForTraining) tuple32._1();
        HashMap<String, Object> hashMap = (HashMap) tuple32._2();
        Option<Object> option = (Option) tuple32._3();
        if (datasetForTraining2 instanceof UsualDatasetForTraining) {
            UsualDatasetForTraining usualDatasetForTraining = (UsualDatasetForTraining) datasetForTraining2;
            Pool srcPool = usualDatasetForTraining.srcPool();
            Dataset<Row> data = usualDatasetForTraining.data();
            logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"loadQuantizedDatasets for ", ": start"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
            Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> loadQuantizedDatasets = loadQuantizedDatasets(1, srcPool.quantizedFeaturesInfo(), hashMap, srcPool.createDataMetaInfo(srcPool.createDataMetaInfo$default$1()), data.schema(), option, tLocalExecutor, (Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(data.toLocalIterator()).asScala(), loadQuantizedDatasets$default$9(), loadQuantizedDatasets$default$10());
            logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"loadQuantizedDatasets for ", ": finish"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
            tuple3 = loadQuantizedDatasets;
        } else {
            if (!(datasetForTraining2 instanceof DatasetForTrainingWithPairs)) {
                throw new MatchError(datasetForTraining2);
            }
            DatasetForTrainingWithPairs datasetForTrainingWithPairs = (DatasetForTrainingWithPairs) datasetForTraining2;
            Pool srcPool2 = datasetForTrainingWithPairs.srcPool();
            RDD<Tuple2<Tuple2<Object, Object>, Tuple2<Iterable<Iterable<Row>>, Iterable<Iterable<Row>>>>> data2 = datasetForTrainingWithPairs.data();
            StructType mainDataSchema = datasetForTrainingWithPairs.mainDataSchema();
            logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"loadQuantizedDatasetsWithPairs for ", ": start"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
            Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> loadQuantizedDatasetsWithPairs = loadQuantizedDatasetsWithPairs(datasetForTraining.datasetIdx(), 1, srcPool2.quantizedFeaturesInfo(), hashMap, srcPool2.createDataMetaInfo(srcPool2.createDataMetaInfo$default$1()), mainDataSchema, srcPool2.pairsData().schema(), option, tLocalExecutor, data2.toLocalIterator(), loadQuantizedDatasetsWithPairs$default$11(), loadQuantizedDatasetsWithPairs$default$12());
            logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"loadQuantizedDatasetsWithPairs for ", ": finish"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
            tuple3 = loadQuantizedDatasetsWithPairs;
        }
        Tuple3<TVector_TDataProviderPtr, TVector_TDataProviderPtr, ArrayBuffer<Object[]>[]> tuple33 = tuple3;
        if (tuple33 == null) {
            throw new MatchError(tuple33);
        }
        Tuple2 tuple2 = new Tuple2((TVector_TDataProviderPtr) tuple33._1(), (TVector_TDataProviderPtr) tuple33._2());
        TVector_TDataProviderPtr tVector_TDataProviderPtr = (TVector_TDataProviderPtr) tuple2._1();
        TVector_TDataProviderPtr tVector_TDataProviderPtr2 = (TVector_TDataProviderPtr) tuple2._2();
        logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", ": save loaded data to files: start"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
        TDataProviderPtr tDataProviderPtr = tVector_TDataProviderPtr.get(0);
        Path createTempFile = Files.createTempFile(str2, str3, new FileAttribute[0]);
        createTempFile.toFile().deleteOnExit();
        native_impl.SaveQuantizedPool(tDataProviderPtr, createTempFile.toString());
        Option option2 = None$.MODULE$;
        if (datasetForTraining instanceof DatasetForTrainingWithPairs) {
            option2 = new Some(Files.createTempFile(str2, str3, new FileAttribute[0]));
            ((Path) option2.get()).toFile().deleteOnExit();
            native_impl.SavePairsInGroupedDsvFormat(tDataProviderPtr, option2.get().toString());
        }
        Option option3 = None$.MODULE$;
        if (option.isDefined()) {
            option3 = new Some(Files.createTempFile(str2, str3, new FileAttribute[0]));
            ((Path) option3.get()).toFile().deleteOnExit();
            native_impl.SaveQuantizedPool(tVector_TDataProviderPtr2.get(0), option3.get().toString());
        }
        logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", ": save loaded data to files: finish"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
        logger.info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"downloadQuantizedPoolToTempFiles for ", ": finish"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
        return new PoolFilesPaths(createTempFile, option2, option3);
    }

    public String downloadQuantizedPoolToTempFiles$default$7() {
        return null;
    }

    public String downloadQuantizedPoolToTempFiles$default$8() {
        return null;
    }

    public TQuantizedObjectsDataProviderPtr downloadSubsetOfQuantizedFeatures(Pool pool, QuantizedFeaturesIndices quantizedFeaturesIndices, Set<Object> set, TLocalExecutor tLocalExecutor) {
        if (!pool.isQuantized()) {
            throw new CatBoostError("downloadSubsetOfQuantizedFeatures is applicable only for quantized pools");
        }
        SelectedFeaturesOffsets apply = SelectedFeaturesOffsets$.MODULE$.apply(pool.quantizedFeaturesInfo(), quantizedFeaturesIndices, set);
        int byteSize = apply.getByteSize();
        StructType apply2 = StructType$.MODULE$.apply(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StructField[]{new StructField("features", BinaryType$.MODULE$, false, StructField$.MODULE$.apply$default$4())})));
        Dataset mapPartitions = pool.data().select(pool.getFeaturesCol(), Predef$.MODULE$.wrapRefArray(new String[0])).mapPartitions(new DataHelpers$$anonfun$17(apply, byteSize), RowEncoder$.MODULE$.apply(apply2));
        TIntermediateDataMetaInfo tIntermediateDataMetaInfo = new TIntermediateDataMetaInfo();
        tIntermediateDataMetaInfo.setFeaturesLayout(native_impl.CloneWithSelectedFeatures(pool.quantizedFeaturesInfo().GetFeaturesLayout().__deref__(), (int[]) set.toArray(ClassTag$.MODULE$.Int())));
        return ((TVector_TDataProviderPtr) loadQuantizedDatasets(1, pool.quantizedFeaturesInfo(), (HashMap) HashMap$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("features"), BoxesRunTime.boxToInteger(0))})), tIntermediateDataMetaInfo, apply2, None$.MODULE$, tLocalExecutor, (Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(mapPartitions.toLocalIterator()).asScala(), loadQuantizedDatasets$default$9(), loadQuantizedDatasets$default$10())._1()).get(0).GetQuantizedObjectsDataProvider();
    }

    private DataHelpers$() {
        MODULE$ = this;
    }
}
