package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.dpoperator;

import burlap.behavior.functionapproximation.FunctionGradient;
import burlap.behavior.singleagent.learnfromdemo.mlirl.support.BoltzmannPolicyGradient;
import burlap.behavior.singleagent.planning.stochastic.dpoperator.SoftmaxOperator;

/* loaded from: input_file:burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/dpoperator/DifferentiableSoftmaxOperator.class */
public class DifferentiableSoftmaxOperator extends SoftmaxOperator implements DifferentiableDPOperator {
    public DifferentiableSoftmaxOperator() {
    }

    public DifferentiableSoftmaxOperator(double d) {
        super(d);
    }

    @Override // burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.dpoperator.DifferentiableDPOperator
    public FunctionGradient gradient(double[] dArr, FunctionGradient[] functionGradientArr) {
        FunctionGradient.SparseGradient sparseGradient = new FunctionGradient.SparseGradient();
        double maxBetaScaled = BoltzmannPolicyGradient.maxBetaScaled(dArr, this.beta);
        double logSum = BoltzmannPolicyGradient.logSum(dArr, maxBetaScaled, this.beta);
        for (int i = 0; i < dArr.length; i++) {
            double exp = Math.exp((this.beta * dArr[i]) - logSum);
            for (FunctionGradient.PartialDerivative partialDerivative : BoltzmannPolicyGradient.computePolicyGradient(this.beta, dArr, maxBetaScaled, logSum, functionGradientArr, i).getNonZeroPartialDerivatives()) {
                sparseGradient.put(partialDerivative.parameterId, sparseGradient.getPartialDerivative(partialDerivative.parameterId) + (exp * functionGradientArr[i].getPartialDerivative(partialDerivative.parameterId)) + (dArr[i] * partialDerivative.value));
            }
        }
        return sparseGradient;
    }
}
