package burlap.behavior.policy;

import burlap.behavior.policy.support.ActionProb;
import burlap.behavior.singleagent.MDPSolverInterface;
import burlap.behavior.valuefunction.QProvider;
import burlap.behavior.valuefunction.QValue;
import burlap.debugtools.RandomFactory;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import javax.management.RuntimeErrorException;

/* loaded from: input_file:burlap/behavior/policy/EpsilonGreedy.class */
public class EpsilonGreedy implements SolverDerivedPolicy, EnumerablePolicy {
    protected QProvider qplanner;
    protected double epsilon;
    protected Random rand;

    public EpsilonGreedy(double d) {
        this.qplanner = null;
        this.epsilon = d;
        this.rand = RandomFactory.getMapped(0);
    }

    public EpsilonGreedy(QProvider qProvider, double d) {
        this.qplanner = qProvider;
        this.epsilon = d;
        this.rand = RandomFactory.getMapped(0);
    }

    public double getEpsilon() {
        return this.epsilon;
    }

    public void setEpsilon(double d) {
        this.epsilon = d;
    }

    @Override // burlap.behavior.policy.SolverDerivedPolicy
    public void setSolver(MDPSolverInterface mDPSolverInterface) {
        if (!(mDPSolverInterface instanceof QProvider)) {
            throw new RuntimeErrorException(new Error("Planner is not a QComputablePlanner"));
        }
        this.qplanner = (QProvider) mDPSolverInterface;
    }

    @Override // burlap.behavior.policy.Policy
    public Action action(State state) {
        List<QValue> qValues = this.qplanner.qValues(state);
        if (this.rand.nextDouble() <= this.epsilon) {
            return qValues.get(this.rand.nextInt(qValues.size())).a;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(qValues.get(0));
        double d = qValues.get(0).q;
        for (int i = 1; i < qValues.size(); i++) {
            QValue qValue = qValues.get(i);
            if (qValue.q == d) {
                arrayList.add(qValue);
            } else if (qValue.q > d) {
                arrayList.clear();
                arrayList.add(qValue);
                d = qValue.q;
            }
        }
        return ((QValue) arrayList.get(this.rand.nextInt(arrayList.size()))).a;
    }

    @Override // burlap.behavior.policy.Policy
    public double actionProb(State state, Action action) {
        return PolicyUtils.actionProbFromEnum(this, state, action);
    }

    @Override // burlap.behavior.policy.EnumerablePolicy
    public List<ActionProb> policyDistribution(State state) {
        List<QValue> qValues = this.qplanner.qValues(state);
        ArrayList arrayList = new ArrayList(qValues.size());
        double d = Double.NEGATIVE_INFINITY;
        int i = 0;
        for (QValue qValue : qValues) {
            if (qValue.q > d) {
                d = qValue.q;
                i = 1;
            } else if (qValue.q == d) {
                i++;
            }
            arrayList.add(new ActionProb(qValue.a, this.epsilon * (1.0d / qValues.size())));
        }
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            if (qValues.get(i2).q == d) {
                ((ActionProb) arrayList.get(i2)).pSelection += (1.0d - this.epsilon) / i;
            }
        }
        return arrayList;
    }

    @Override // burlap.behavior.policy.Policy
    public boolean definedFor(State state) {
        return true;
    }
}
