package burlap.behavior.singleagent.pomdp.qmdp;

import burlap.behavior.policy.GreedyQPolicy;
import burlap.behavior.policy.Policy;
import burlap.behavior.singleagent.MDPSolver;
import burlap.behavior.singleagent.auxiliary.StateEnumerator;
import burlap.behavior.singleagent.planning.Planner;
import burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration;
import burlap.behavior.valuefunction.QProvider;
import burlap.behavior.valuefunction.QValue;
import burlap.mdp.core.TerminalFunction;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.model.RewardFunction;
import burlap.mdp.singleagent.pomdp.PODomain;
import burlap.mdp.singleagent.pomdp.beliefstate.BeliefState;
import burlap.mdp.singleagent.pomdp.beliefstate.EnumerableBeliefState;
import burlap.statehashing.HashableStateFactory;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/pomdp/qmdp/QMDP.class */
public class QMDP extends MDPSolver implements Planner, QProvider {
    protected QProvider mdpQSource;

    public QMDP(PODomain pODomain, QProvider qProvider) {
        this.mdpQSource = qProvider;
        Planner planner = (Planner) this.mdpQSource;
        solverInit(pODomain, planner.getGamma(), planner.getHashingFactory());
    }

    public QMDP(PODomain pODomain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, HashableStateFactory hashableStateFactory, double d2, int i) {
        this.domain = pODomain;
        this.mdpQSource = new ValueIteration(pODomain, d, hashableStateFactory, d2, i);
        solverInit(pODomain, d, hashableStateFactory);
    }

    public void forceMDPPlanningFromAllStates() {
        if (!((PODomain) this.domain).providesStateEnumerator()) {
            throw new RuntimeException("QMDP cannot apply method forceMDPPlanningFromAllStates because the domain does not provide a StateEnumerator.");
        }
        Planner planner = (Planner) this.mdpQSource;
        StateEnumerator stateEnumerator = ((PODomain) this.domain).getStateEnumerator();
        if (stateEnumerator == null) {
            throw new RuntimeException("QMDP cannot plan from all states because the StateEnumerator for the POMDP domain was never specified.");
        }
        for (int i = 0; i < stateEnumerator.numStatesEnumerated(); i++) {
            planner.planFromState(stateEnumerator.getStateForEnumerationId(i));
        }
    }

    @Override // burlap.behavior.valuefunction.QProvider
    public List<QValue> qValues(State state) {
        if (!(state instanceof BeliefState) || !(state instanceof EnumerableBeliefState)) {
            throw new RuntimeException("QMDP cannot return the Q-values for the given state, because the given state is not a EnumerableBeliefState instance. It is a " + state.getClass().getName());
        }
        BeliefState beliefState = (BeliefState) state;
        List<Action> applicableActions = applicableActions(beliefState.sample());
        ArrayList arrayList = new ArrayList(applicableActions.size());
        List<EnumerableBeliefState.StateBelief> nonZeroBeliefs = ((EnumerableBeliefState) beliefState).nonZeroBeliefs();
        for (Action action : applicableActions) {
            arrayList.add(new QValue(state, action, qForBeliefList(nonZeroBeliefs, action)));
        }
        return arrayList;
    }

    @Override // burlap.behavior.valuefunction.QFunction
    public double qValue(State state, Action action) {
        if ((state instanceof BeliefState) && (state instanceof EnumerableBeliefState)) {
            return qForBelief((EnumerableBeliefState) state, action);
        }
        throw new RuntimeException("QMDP cannot return the Q-values for the given state, because the given state is not a EnumerableBeliefState instance. It is a " + state.getClass().getName());
    }

    @Override // burlap.behavior.valuefunction.ValueFunction
    public double value(State state) {
        return QProvider.Helper.maxQ(this, state);
    }

    public double qForBelief(EnumerableBeliefState enumerableBeliefState, Action action) {
        return qForBeliefList(enumerableBeliefState.nonZeroBeliefs(), action);
    }

    protected double qForBeliefList(List<EnumerableBeliefState.StateBelief> list, Action action) {
        double d = 0.0d;
        for (EnumerableBeliefState.StateBelief stateBelief : list) {
            d += stateBelief.belief * this.mdpQSource.qValue(stateBelief.s, action);
        }
        return d;
    }

    @Override // burlap.behavior.singleagent.planning.Planner
    public Policy planFromState(State state) {
        forceMDPPlanningFromAllStates();
        return new GreedyQPolicy(this);
    }

    @Override // burlap.behavior.singleagent.MDPSolver, burlap.behavior.singleagent.MDPSolverInterface
    public void resetSolver() {
        ((Planner) this.mdpQSource).resetSolver();
    }
}
