package burlap.behavior.policy;

import burlap.behavior.policy.support.ActionProb;
import burlap.behavior.singleagent.MDPSolverInterface;
import burlap.behavior.valuefunction.QProvider;
import burlap.behavior.valuefunction.QValue;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import java.util.List;
import javax.management.RuntimeErrorException;

/* loaded from: input_file:burlap/behavior/policy/GreedyDeterministicQPolicy.class */
public class GreedyDeterministicQPolicy implements SolverDerivedPolicy, EnumerablePolicy {
    protected QProvider qplanner;

    public GreedyDeterministicQPolicy() {
        this.qplanner = null;
    }

    public GreedyDeterministicQPolicy(QProvider qProvider) {
        this.qplanner = qProvider;
    }

    @Override // burlap.behavior.policy.SolverDerivedPolicy
    public void setSolver(MDPSolverInterface mDPSolverInterface) {
        if (!(mDPSolverInterface instanceof QProvider)) {
            throw new RuntimeErrorException(new Error("Planner is not a QComputablePlanner"));
        }
        this.qplanner = (QProvider) mDPSolverInterface;
    }

    @Override // burlap.behavior.policy.Policy
    public Action action(State state) {
        double d = Double.NEGATIVE_INFINITY;
        QValue qValue = null;
        for (QValue qValue2 : this.qplanner.qValues(state)) {
            if (qValue2.q > d) {
                d = qValue2.q;
                qValue = qValue2;
            }
        }
        return qValue.a;
    }

    @Override // burlap.behavior.policy.Policy
    public double actionProb(State state, Action action) {
        return action(state).equals(action) ? 1.0d : 0.0d;
    }

    @Override // burlap.behavior.policy.EnumerablePolicy
    public List<ActionProb> policyDistribution(State state) {
        return PolicyUtils.deterministicPolicyDistribution(this, state);
    }

    @Override // burlap.behavior.policy.Policy
    public boolean definedFor(State state) {
        return true;
    }
}
