package burlap.behavior.singleagent.pomdp.wrappedmdpalgs;

import burlap.behavior.policy.GreedyQPolicy;
import burlap.behavior.policy.Policy;
import burlap.behavior.singleagent.Episode;
import burlap.behavior.singleagent.MDPSolver;
import burlap.behavior.singleagent.planning.Planner;
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling;
import burlap.behavior.singleagent.pomdp.BeliefPolicyAgent;
import burlap.behavior.valuefunction.QProvider;
import burlap.behavior.valuefunction.QValue;
import burlap.domain.singleagent.pomdp.tiger.TigerDomain;
import burlap.domain.singleagent.pomdp.tiger.TigerState;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.SADomain;
import burlap.mdp.singleagent.pomdp.BeliefMDPGenerator;
import burlap.mdp.singleagent.pomdp.PODomain;
import burlap.mdp.singleagent.pomdp.SimulatedPOEnvironment;
import burlap.mdp.singleagent.pomdp.beliefstate.TabularBeliefState;
import burlap.statehashing.HashableStateFactory;
import burlap.statehashing.ReflectiveHashableStateFactory;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/pomdp/wrappedmdpalgs/BeliefSparseSampling.class */
public class BeliefSparseSampling extends MDPSolver implements Planner, QProvider {
    protected SADomain beliefMDP;
    protected SparseSampling mdpPlanner;

    public BeliefSparseSampling(PODomain pODomain, double d, HashableStateFactory hashableStateFactory, int i, int i2) {
        solverInit(pODomain, d, hashableStateFactory);
        this.beliefMDP = new BeliefMDPGenerator(pODomain).generateDomain();
        this.mdpPlanner = new SparseSampling(this.beliefMDP, d, hashableStateFactory, i, Math.max(1, i2));
        if (i2 < 1) {
            this.mdpPlanner.setComputeExactValueFunction(true);
        }
    }

    public SADomain getBeliefMDP() {
        return this.beliefMDP;
    }

    public SparseSampling getSparseSamplingPlanner() {
        return this.mdpPlanner;
    }

    @Override // burlap.behavior.valuefunction.QProvider
    public List<QValue> qValues(State state) {
        return this.mdpPlanner.qValues(state);
    }

    @Override // burlap.behavior.valuefunction.QFunction
    public double qValue(State state, Action action) {
        return this.mdpPlanner.qValue(state, action);
    }

    @Override // burlap.behavior.singleagent.planning.Planner
    public Policy planFromState(State state) {
        this.mdpPlanner.planFromState(state);
        return new GreedyQPolicy(this);
    }

    @Override // burlap.behavior.singleagent.MDPSolver, burlap.behavior.singleagent.MDPSolverInterface
    public void resetSolver() {
        this.mdpPlanner.resetSolver();
    }

    @Override // burlap.behavior.valuefunction.ValueFunction
    public double value(State state) {
        return QProvider.Helper.maxQ(this, state);
    }

    public static void main(String[] strArr) {
        PODomain pODomain = (PODomain) new TigerDomain(true).generateDomain();
        TabularBeliefState initialBeliefState = TigerDomain.getInitialBeliefState(pODomain);
        GreedyQPolicy greedyQPolicy = new GreedyQPolicy(new BeliefSparseSampling(pODomain, 0.99d, new ReflectiveHashableStateFactory(), 10, -1));
        SimulatedPOEnvironment simulatedPOEnvironment = new SimulatedPOEnvironment(pODomain);
        simulatedPOEnvironment.setCurStateTo(new TigerState(TigerDomain.VAL_LEFT));
        BeliefPolicyAgent beliefPolicyAgent = new BeliefPolicyAgent(pODomain, simulatedPOEnvironment, greedyQPolicy);
        beliefPolicyAgent.setBeliefState(initialBeliefState);
        beliefPolicyAgent.setEnvironment(simulatedPOEnvironment);
        Episode actUntilTerminalOrMaxSteps = beliefPolicyAgent.actUntilTerminalOrMaxSteps(30);
        for (int i = 0; i < actUntilTerminalOrMaxSteps.numTimeSteps() - 1; i++) {
            System.out.println(actUntilTerminalOrMaxSteps.action(i) + " " + actUntilTerminalOrMaxSteps.reward(i + 1));
        }
    }
}
