package cc.factorie.app.nlp.embeddings;

import cc.factorie.la.Tensor1;
import cc.factorie.la.Tensor2;
import cc.factorie.la.Tensor3;
import cc.factorie.la.Tensor4;
import cc.factorie.model.Parameters;
import cc.factorie.model.Weights;
import cc.factorie.model.Weights1;
import cc.factorie.model.Weights2;
import cc.factorie.model.Weights3;
import cc.factorie.model.Weights4;
import cc.factorie.model.WeightsSet;
import cc.factorie.optimize.AdaGradRDA;
import cc.factorie.optimize.AdaGradRDA$;
import cc.factorie.util.Threading$;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.collection.Iterable;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.immutable.IndexedSeq$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.io.Source$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichInt$;

/* compiled from: WordEmbeddingModel.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005%h!B\u0001\u0003\u0003\u0003i!AE,pe\u0012,UNY3eI&tw-T8eK2T!a\u0001\u0003\u0002\u0015\u0015l'-\u001a3eS:<7O\u0003\u0002\u0006\r\u0005\u0019a\u000e\u001c9\u000b\u0005\u001dA\u0011aA1qa*\u0011\u0011BC\u0001\tM\u0006\u001cGo\u001c:jK*\t1\"\u0001\u0002dG\u000e\u00011c\u0001\u0001\u000f)A\u0011qBE\u0007\u0002!)\t\u0011#A\u0003tG\u0006d\u0017-\u0003\u0002\u0014!\t1\u0011I\\=SK\u001a\u0004\"!\u0006\r\u000e\u0003YQ!a\u0006\u0005\u0002\u000b5|G-\u001a7\n\u0005e1\"A\u0003)be\u0006lW\r^3sg\"A1\u0004\u0001BC\u0002\u0013\u0005A$\u0001\u0003paR\u001cX#A\u000f\u0011\u0005yyR\"\u0001\u0002\n\u0005\u0001\u0012!!D#nE\u0016$G-\u001b8h\u001fB$8\u000f\u0003\u0005#\u0001\t\u0005\t\u0015!\u0003\u001e\u0003\u0015y\u0007\u000f^:!\u0011\u0015!\u0003\u0001\"\u0001&\u0003\u0019a\u0014N\\5u}Q\u0011ae\n\t\u0003=\u0001AQaG\u0012A\u0002uAq!\u000b\u0001C\u0002\u0013\u0005!&A\u0001E+\u0005Y\u0003CA\b-\u0013\ti\u0003CA\u0002J]RDaa\f\u0001!\u0002\u0013Y\u0013A\u0001#!\u0011\u001d\t\u0004\u00011A\u0005\u0002)\n\u0011A\u0016\u0005\bg\u0001\u0001\r\u0011\"\u00015\u0003\u00151v\fJ3r)\t)\u0004\b\u0005\u0002\u0010m%\u0011q\u0007\u0005\u0002\u0005+:LG\u000fC\u0004:e\u0005\u0005\t\u0019A\u0016\u0002\u0007a$\u0013\u0007\u0003\u0004<\u0001\u0001\u0006KaK\u0001\u0003-\u0002Bq!\u0010\u0001C\u0002\u0013E!&A\u0004uQJ,\u0017\rZ:\t\r}\u0002\u0001\u0015!\u0003,\u0003!!\bN]3bIN\u0004\u0003bB!\u0001\u0005\u0004%\tBQ\u0001\rC\u0012\fwI]1e\t\u0016dG/Y\u000b\u0002\u0007B\u0011q\u0002R\u0005\u0003\u000bB\u0011a\u0001R8vE2,\u0007BB$\u0001A\u0003%1)A\u0007bI\u0006<%/\u00193EK2$\u0018\r\t\u0005\b\u0013\u0002\u0011\r\u0011\"\u0005C\u0003-\tG-Y$sC\u0012\u0014\u0016\r^3\t\r-\u0003\u0001\u0015!\u0003D\u00031\tG-Y$sC\u0012\u0014\u0016\r^3!\u0011\u001di\u0005A1A\u0005\u0012)\n\u0001\"\\5o\u0007>,h\u000e\u001e\u0005\u0007\u001f\u0002\u0001\u000b\u0011B\u0016\u0002\u00135LgnQ8v]R\u0004\u0003bB)\u0001\u0005\u0004%\tBK\u0001\u0010S\u001etwN]3Ti>\u0004xk\u001c:eg\"11\u000b\u0001Q\u0001\n-\n\u0001#[4o_J,7\u000b^8q/>\u0014Hm\u001d\u0011\t\u000fU\u0003!\u0019!C\tU\u0005iao\\2bE\"\u000b7\u000f[*ju\u0016Daa\u0016\u0001!\u0002\u0013Y\u0013A\u0004<pG\u0006\u0014\u0007*Y:i'&TX\r\t\u0005\b3\u0002\u0011\r\u0011\"\u0005+\u0003E\u0019\u0018-\u001c9mS:<G+\u00192mKNK'0\u001a\u0005\u00077\u0002\u0001\u000b\u0011B\u0016\u0002%M\fW\u000e\u001d7j]\u001e$\u0016M\u00197f'&TX\r\t\u0005\b;\u0002\u0011\r\u0011\"\u0005+\u00031i\u0017\r\u001f,pG\u0006\u00147+\u001b>f\u0011\u0019y\u0006\u0001)A\u0005W\u0005iQ.\u0019=W_\u000e\f'mU5{K\u0002Bq!\u0019\u0001C\u0002\u0013\u0005!-\u0001\u0004d_J\u0004Xo]\u000b\u0002GB\u0011A-[\u0007\u0002K*\u0011amZ\u0001\u0005Y\u0006twMC\u0001i\u0003\u0011Q\u0017M^1\n\u0005),'AB*ue&tw\r\u0003\u0004m\u0001\u0001\u0006IaY\u0001\bG>\u0014\b/^:!\u0011\u001dq\u0007A1A\u0005\u0012\t\fab\\;uaV$h)\u001b7f]\u0006lW\r\u0003\u0004q\u0001\u0001\u0006IaY\u0001\u0010_V$\b/\u001e;GS2,g.Y7fA!9!\u000f\u0001b\u0001\n\u0013Q\u0013!D:u_J,\u0017J\u001c\"j]\u0006\u0014\u0018\u0010\u0003\u0004u\u0001\u0001\u0006IaK\u0001\u000fgR|'/Z%o\u0005&t\u0017M]=!\u0011\u001d1\bA1A\u0005\n\t\f\u0011\u0003\\8bIZ{7-\u00192GS2,g.Y7f\u0011\u0019A\b\u0001)A\u0005G\u0006\u0011Bn\\1e->\u001c\u0017M\u0019$jY\u0016t\u0017-\\3!\u0011\u001dQ\bA1A\u0005\n\t\f\u0011c]1wKZ{7-\u00192GS2,g.Y7f\u0011\u0019a\b\u0001)A\u0005G\u0006\u00112/\u0019<f->\u001c\u0017M\u0019$jY\u0016t\u0017-\\3!\u0011\u001dq\bA1A\u0005\n\t\f\u0001\"\u001a8d_\u0012Lgn\u001a\u0005\b\u0003\u0003\u0001\u0001\u0015!\u0003d\u0003%)gnY8eS:<\u0007\u0005C\u0005\u0002\u0006\u0001\u0001\r\u0011\"\u0005\u0002\b\u0005)ao\\2bEV\u0011\u0011\u0011\u0002\t\u0004=\u0005-\u0011bAA\u0007\u0005\taak\\2bE\n+\u0018\u000e\u001c3fe\"I\u0011\u0011\u0003\u0001A\u0002\u0013E\u00111C\u0001\nm>\u001c\u0017MY0%KF$2!NA\u000b\u0011%I\u0014qBA\u0001\u0002\u0004\tI\u0001\u0003\u0005\u0002\u001a\u0001\u0001\u000b\u0015BA\u0005\u0003\u00191xnY1cA!I\u0011Q\u0004\u0001A\u0002\u0013E\u0011qD\u0001\biJ\f\u0017N\\3s+\t\t\t\u0003E\u0002\u001f\u0003GI1!!\n\u0003\u0005Ia\u0015\u000e^3I_\u001e<\u0018\u000e\u001c3Ue\u0006Lg.\u001a:\t\u0013\u0005%\u0002\u00011A\u0005\u0012\u0005-\u0012a\u0003;sC&tWM]0%KF$2!NA\u0017\u0011%I\u0014qEA\u0001\u0002\u0004\t\t\u0003\u0003\u0005\u00022\u0001\u0001\u000b\u0015BA\u0011\u0003!!(/Y5oKJ\u0004\u0003\"CA\u001b\u0001\u0001\u0007I\u0011CA\u001c\u0003%y\u0007\u000f^5nSj,'/\u0006\u0002\u0002:A!\u00111HA!\u001b\t\tiDC\u0002\u0002@!\t\u0001b\u001c9uS6L'0Z\u0005\u0005\u0003\u0007\niD\u0001\u0006BI\u0006<%/\u00193S\t\u0006C\u0011\"a\u0012\u0001\u0001\u0004%\t\"!\u0013\u0002\u001b=\u0004H/[7ju\u0016\u0014x\fJ3r)\r)\u00141\n\u0005\ns\u0005\u0015\u0013\u0011!a\u0001\u0003sA\u0001\"a\u0014\u0001A\u0003&\u0011\u0011H\u0001\u000b_B$\u0018.\\5{KJ\u0004\u0003\"CA*\u0001\u0001\u0007I\u0011AA+\u0003\u001d9X-[4iiN,\"!a\u0016\u0011\r\u0005e\u0013\u0011NA8\u001d\u0011\tY&!\u001a\u000f\t\u0005u\u00131M\u0007\u0003\u0003?R1!!\u0019\r\u0003\u0019a$o\\8u}%\t\u0011#C\u0002\u0002hA\tq\u0001]1dW\u0006<W-\u0003\u0003\u0002l\u00055$aA*fc*\u0019\u0011q\r\t\u0011\u0007U\t\t(C\u0002\u0002tY\u0011qaV3jO\"$8\u000fC\u0005\u0002x\u0001\u0001\r\u0011\"\u0001\u0002z\u0005Yq/Z5hQR\u001cx\fJ3r)\r)\u00141\u0010\u0005\ns\u0005U\u0014\u0011!a\u0001\u0003/B\u0001\"a \u0001A\u0003&\u0011qK\u0001\to\u0016Lw\r\u001b;tA!I\u00111\u0011\u0001A\u0002\u0013%\u0011QQ\u0001\fiJ\f\u0017N\\0x_J$7/\u0006\u0002\u0002\bB\u0019q\"!#\n\u0007\u0005-\u0005C\u0001\u0003M_:<\u0007\"CAH\u0001\u0001\u0007I\u0011BAI\u0003=!(/Y5o?^|'\u000fZ:`I\u0015\fHcA\u001b\u0002\u0014\"I\u0011(!$\u0002\u0002\u0003\u0007\u0011q\u0011\u0005\t\u0003/\u0003\u0001\u0015)\u0003\u0002\b\u0006aAO]1j]~;xN\u001d3tA!9\u00111\u0014\u0001\u0005\u0002\u0005u\u0015A\u00032vS2$gk\\2bER\tQ\u0007C\u0004\u0002\"\u0002!\t!!(\u0002\u001f1,\u0017M\u001d8F[\n,G\rZ5oONDq!!*\u0001\t\u0003\ti*A\u0003ti>\u0014X\rC\u0004\u0002*\u0002!\t\"a+\u0002\u0019]|'o[3s)\"\u0014X-\u00193\u0015\u000fU\ni+!-\u00026\"9\u0011qVAT\u0001\u0004Y\u0013AA5e\u0011!\t\u0019,a*A\u0002\u0005\u001d\u0015a\u00024jY\u0016dUM\u001c\u0005\u000b\u0003o\u000b9\u000b%AA\u0002\u0005\u001d\u0015A\u00049sS:$\u0018I\u001a;fe:#un\u0019\u0005\b\u0003w\u0003a\u0011CA_\u0003\u001d\u0001(o\\2fgN$2aKA`\u0011!\t\t-!/A\u0002\u0005\r\u0017a\u00013pGB!\u0011QYAf\u001d\ry\u0011qY\u0005\u0004\u0003\u0013\u0004\u0012A\u0002)sK\u0012,g-C\u0002k\u0003\u001bT1!!3\u0011\u0011%\t\t\u000eAI\u0001\n#\t\u0019.\u0001\fx_J\\WM\u001d+ie\u0016\fG\r\n3fM\u0006,H\u000e\u001e\u00134+\t\t)N\u000b\u0003\u0002\b\u0006]7FAAm!\u0011\tY.!:\u000e\u0005\u0005u'\u0002BAp\u0003C\f\u0011\"\u001e8dQ\u0016\u001c7.\u001a3\u000b\u0007\u0005\r\b#\u0001\u0006b]:|G/\u0019;j_:LA!a:\u0002^\n\tRO\\2iK\u000e\\W\r\u001a,be&\fgnY3")
/* loaded from: input_file:cc/factorie/app/nlp/embeddings/WordEmbeddingModel.class */
public abstract class WordEmbeddingModel implements Parameters {
    private final EmbeddingOpts opts;
    private final int D;
    private int V;
    private final int threads;
    private final double adaGradDelta;
    private final double adaGradRate;
    private final int minCount;
    private final int ignoreStopWords;
    private final int vocabHashSize;
    private final int samplingTableSize;
    private final int maxVocabSize;
    private final String corpus;
    private final String outputFilename;
    private final int storeInBinary;
    private final String loadVocabFilename;
    private final String saveVocabFilename;
    private final String encoding;
    private VocabBuilder vocab;
    private LiteHogwildTrainer trainer;
    private AdaGradRDA optimizer;
    private Seq<Weights> weights;
    private long train_words;
    private final WeightsSet parameters;

    @Override // cc.factorie.model.Parameters
    public WeightsSet parameters() {
        return this.parameters;
    }

    @Override // cc.factorie.model.Parameters
    public void cc$factorie$model$Parameters$_setter_$parameters_$eq(WeightsSet weightsSet) {
        this.parameters = weightsSet;
    }

    @Override // cc.factorie.model.Parameters
    public Weights1 Weights(Function0<Tensor1> function0) {
        return Parameters.Cclass.Weights((Parameters) this, (Function0) function0);
    }

    @Override // cc.factorie.model.Parameters
    /* renamed from: Weights */
    public Weights2 mo133Weights(Function0<Tensor2> function0) {
        return Parameters.Cclass.m1634Weights((Parameters) this, (Function0) function0);
    }

    @Override // cc.factorie.model.Parameters
    /* renamed from: Weights */
    public Weights3 mo134Weights(Function0<Tensor3> function0) {
        return Parameters.Cclass.m1635Weights((Parameters) this, (Function0) function0);
    }

    @Override // cc.factorie.model.Parameters
    /* renamed from: Weights */
    public Weights4 mo135Weights(Function0<Tensor4> function0) {
        return Parameters.Cclass.m1636Weights((Parameters) this, (Function0) function0);
    }

    public EmbeddingOpts opts() {
        return this.opts;
    }

    public int D() {
        return this.D;
    }

    public int V() {
        return this.V;
    }

    public void V_$eq(int i) {
        this.V = i;
    }

    public int threads() {
        return this.threads;
    }

    public double adaGradDelta() {
        return this.adaGradDelta;
    }

    public double adaGradRate() {
        return this.adaGradRate;
    }

    public int minCount() {
        return this.minCount;
    }

    public int ignoreStopWords() {
        return this.ignoreStopWords;
    }

    public int vocabHashSize() {
        return this.vocabHashSize;
    }

    public int samplingTableSize() {
        return this.samplingTableSize;
    }

    public int maxVocabSize() {
        return this.maxVocabSize;
    }

    public String corpus() {
        return this.corpus;
    }

    public String outputFilename() {
        return this.outputFilename;
    }

    private int storeInBinary() {
        return this.storeInBinary;
    }

    private String loadVocabFilename() {
        return this.loadVocabFilename;
    }

    private String saveVocabFilename() {
        return this.saveVocabFilename;
    }

    private String encoding() {
        return this.encoding;
    }

    public VocabBuilder vocab() {
        return this.vocab;
    }

    public void vocab_$eq(VocabBuilder vocabBuilder) {
        this.vocab = vocabBuilder;
    }

    public LiteHogwildTrainer trainer() {
        return this.trainer;
    }

    public void trainer_$eq(LiteHogwildTrainer liteHogwildTrainer) {
        this.trainer = liteHogwildTrainer;
    }

    public AdaGradRDA optimizer() {
        return this.optimizer;
    }

    public void optimizer_$eq(AdaGradRDA adaGradRDA) {
        this.optimizer = adaGradRDA;
    }

    public Seq<Weights> weights() {
        return this.weights;
    }

    public void weights_$eq(Seq<Weights> seq) {
        this.weights = seq;
    }

    private long train_words() {
        return this.train_words;
    }

    private void train_words_$eq(long j) {
        this.train_words = j;
    }

    public void buildVocab() {
        Iterator lines;
        vocab_$eq(new VocabBuilder(vocabHashSize(), samplingTableSize(), 0.7d));
        if (new StringOps(Predef$.MODULE$.augmentString(loadVocabFilename())).size() == 0) {
            Predef$.MODULE$.println("Building Vocab");
            boolean endsWith = corpus().endsWith(".gz");
            if (true == endsWith) {
                lines = Source$.MODULE$.fromInputStream(new GZIPInputStream(new FileInputStream(corpus())), encoding()).getLines();
            } else {
                if (false != endsWith) {
                    throw new MatchError(BoxesRunTime.boxToBoolean(endsWith));
                }
                lines = Source$.MODULE$.fromInputStream(new FileInputStream(corpus()), encoding()).getLines();
            }
            Iterator iterator = lines;
            while (iterator.hasNext()) {
                Predef$.MODULE$.refArrayOps(new StringOps(Predef$.MODULE$.augmentString(new StringOps(Predef$.MODULE$.augmentString((String) iterator.next())).stripLineEnd())).split(' ')).foreach(new WordEmbeddingModel$$anonfun$buildVocab$1(this));
            }
        } else {
            Predef$.MODULE$.println("Loading Vocab");
            vocab().loadVocab(loadVocabFilename(), encoding());
        }
        vocab().sortVocab(minCount(), ignoreStopWords(), maxVocabSize());
        vocab().buildSamplingTable();
        vocab().buildSubSamplingTable(BoxesRunTime.unboxToDouble(opts().sample().value()));
        V_$eq(vocab().size());
        train_words_$eq(vocab().trainWords());
        Predef$.MODULE$.println(new StringBuilder().append("Corpus Stat - Vocab Size :").append(BoxesRunTime.boxToInteger(V())).append(" Total words (effective) in corpus : ").append(BoxesRunTime.boxToLong(train_words())).toString());
        if (new StringOps(Predef$.MODULE$.augmentString(saveVocabFilename())).size() != 0) {
            Predef$.MODULE$.println(new StringBuilder().append("Saving Vocab into ").append(saveVocabFilename()).toString());
            vocab().saveVocab(saveVocabFilename(), storeInBinary(), encoding());
            Predef$.MODULE$.println("Done Saving Vocab");
        }
    }

    public void learnEmbeddings() {
        Predef$.MODULE$.println("Learning Embeddings");
        optimizer_$eq(new AdaGradRDA(adaGradDelta(), adaGradRate(), AdaGradRDA$.MODULE$.$lessinit$greater$default$3(), AdaGradRDA$.MODULE$.$lessinit$greater$default$4(), AdaGradRDA$.MODULE$.$lessinit$greater$default$5()));
        weights_$eq((Seq) RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), V()).map(new WordEmbeddingModel$$anonfun$learnEmbeddings$2(this), IndexedSeq$.MODULE$.canBuildFrom()));
        optimizer().initializeWeights(parameters());
        trainer_$eq(new LiteHogwildTrainer(parameters(), optimizer(), threads(), Integer.MAX_VALUE));
        Threading$.MODULE$.parForeach((Iterable) RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), threads()).map(new WordEmbeddingModel$$anonfun$1(this), IndexedSeq$.MODULE$.canBuildFrom()), threads(), (Function1) new WordEmbeddingModel$$anonfun$learnEmbeddings$1(this, new File(corpus()).length()));
        Predef$.MODULE$.println("Done learning embeddings. ");
    }

    public void store() {
        Writer printWriter;
        Predef$.MODULE$.println("Now, storing the embeddings .... ");
        int storeInBinary = storeInBinary();
        switch (storeInBinary) {
            case 0:
                printWriter = new PrintWriter(outputFilename(), encoding());
                break;
            case 1:
                printWriter = new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(outputFilename()))), encoding());
                break;
            default:
                throw new MatchError(BoxesRunTime.boxToInteger(storeInBinary));
        }
        Writer writer = printWriter;
        writer.write(new StringOps(Predef$.MODULE$.augmentString("%d %d\n")).format(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(V()), BoxesRunTime.boxToInteger(D())})));
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), V()).foreach$mVc$sp(new WordEmbeddingModel$$anonfun$store$1(this, writer));
        writer.close();
        Predef$.MODULE$.println("Done storing embeddings");
    }

    public void workerThread(int i, long j, long j2) {
        FastLineReader fastLineReader = new FastLineReader(corpus(), (j / threads()) * i, encoding());
        long j3 = 0;
        boolean z = true;
        int i2 = 0;
        long train_words = train_words() / threads();
        while (fastLineReader.hasNext() && z) {
            j3 += process(fastLineReader.m425next());
            i2++;
            if (i == 1 && i2 % j2 == 0) {
                Predef$.MODULE$.println(new StringBuilder().append("Progress : ").append(BoxesRunTime.boxToDouble((j3 / train_words) * 100)).append(" %").toString());
            }
            if (j3 > train_words) {
                z = false;
            }
        }
    }

    public long workerThread$default$3() {
        return 100L;
    }

    public abstract int process(String str);

    public WordEmbeddingModel(EmbeddingOpts embeddingOpts) {
        this.opts = embeddingOpts;
        cc$factorie$model$Parameters$_setter_$parameters_$eq(new WeightsSet());
        this.D = BoxesRunTime.unboxToInt(embeddingOpts.dimension().value());
        this.V = 0;
        this.threads = BoxesRunTime.unboxToInt(embeddingOpts.threads().value());
        this.adaGradDelta = BoxesRunTime.unboxToDouble(embeddingOpts.delta().value());
        this.adaGradRate = BoxesRunTime.unboxToDouble(embeddingOpts.rate().value());
        this.minCount = BoxesRunTime.unboxToInt(embeddingOpts.minCount().value());
        this.ignoreStopWords = BoxesRunTime.unboxToBoolean(embeddingOpts.ignoreStopWords().value()) ? 1 : 0;
        this.vocabHashSize = BoxesRunTime.unboxToInt(embeddingOpts.vocabHashSize().value());
        this.samplingTableSize = BoxesRunTime.unboxToInt(embeddingOpts.samplingTableSize().value());
        this.maxVocabSize = BoxesRunTime.unboxToInt(embeddingOpts.vocabSize().value());
        this.corpus = embeddingOpts.corpus().value();
        this.outputFilename = embeddingOpts.output().value();
        this.storeInBinary = BoxesRunTime.unboxToBoolean(embeddingOpts.binary().value()) ? 1 : 0;
        this.loadVocabFilename = embeddingOpts.loadVocabFile().value();
        this.saveVocabFilename = embeddingOpts.saveVocabFile().value();
        this.encoding = embeddingOpts.encoding().value();
        this.vocab = null;
        this.trainer = null;
        this.optimizer = null;
        this.weights = null;
        this.train_words = 0L;
    }
}
