package burlap.behavior.singleagent.learnfromdemo;

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling;
import burlap.behavior.valuefunction.QProvider;
import burlap.behavior.valuefunction.QValue;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.action.ActionUtils;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.SADomain;
import burlap.mdp.singleagent.environment.EnvironmentOutcome;
import burlap.mdp.singleagent.model.RewardFunction;
import burlap.mdp.singleagent.model.SampleModel;
import burlap.statehashing.simple.SimpleHashableStateFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/learnfromdemo/RewardValueProjection.class */
public class RewardValueProjection implements QProvider {
    protected RewardFunction rf;
    protected RewardProjectionType projectionType;
    protected SparseSampling oneStepBellmanPlanner;
    protected SADomain domain;

    /* loaded from: input_file:burlap/behavior/singleagent/learnfromdemo/RewardValueProjection$CustomRewardNoTermModel.class */
    public static class CustomRewardNoTermModel extends CustomRewardModel {
        public CustomRewardNoTermModel(SampleModel sampleModel, RewardFunction rewardFunction) {
            super(sampleModel, rewardFunction);
        }

        @Override // burlap.behavior.singleagent.learnfromdemo.CustomRewardModel, burlap.mdp.singleagent.model.SampleModel
        public boolean terminal(State state) {
            return false;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // burlap.behavior.singleagent.learnfromdemo.CustomRewardModel
        public EnvironmentOutcome modifyOutcome(EnvironmentOutcome environmentOutcome) {
            environmentOutcome.terminated = false;
            return super.modifyOutcome(environmentOutcome);
        }
    }

    /* loaded from: input_file:burlap/behavior/singleagent/learnfromdemo/RewardValueProjection$RewardProjectionType.class */
    public enum RewardProjectionType {
        SOURCESTATE,
        DESTINATIONSTATE,
        STATEACTION,
        ONESTEP
    }

    public RewardValueProjection(RewardFunction rewardFunction) {
        RewardProjectionType rewardProjectionType = RewardProjectionType.ONESTEP;
        this.projectionType = RewardProjectionType.DESTINATIONSTATE;
        this.rf = rewardFunction;
    }

    public RewardValueProjection(RewardFunction rewardFunction, RewardProjectionType rewardProjectionType) {
        RewardProjectionType rewardProjectionType2 = RewardProjectionType.ONESTEP;
        this.projectionType = RewardProjectionType.DESTINATIONSTATE;
        this.rf = rewardFunction;
        this.projectionType = rewardProjectionType;
        if (rewardProjectionType == RewardProjectionType.ONESTEP) {
            throw new RuntimeException("If the reward function depends on a 1 step transition (e.g., from a source state to a target state) then to project the value the Domain is needed evaluate the transition dynamics. Use the RewardValueProjection(RewardFunction, RewardProjectionType, Domain) constructor instead to specify.");
        }
    }

    public RewardValueProjection(RewardFunction rewardFunction, RewardProjectionType rewardProjectionType, SADomain sADomain) {
        RewardProjectionType rewardProjectionType2 = RewardProjectionType.ONESTEP;
        this.projectionType = RewardProjectionType.DESTINATIONSTATE;
        this.rf = rewardFunction;
        this.projectionType = rewardProjectionType;
        this.domain = sADomain;
        if (this.projectionType == RewardProjectionType.ONESTEP) {
            this.oneStepBellmanPlanner = new SparseSampling(sADomain, 1.0d, new SimpleHashableStateFactory(), 1, -1);
            this.oneStepBellmanPlanner.setModel(new CustomRewardNoTermModel(sADomain.getModel(), rewardFunction));
            this.oneStepBellmanPlanner.toggleDebugPrinting(false);
            this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
        }
    }

    @Override // burlap.behavior.valuefunction.QProvider
    public List<QValue> qValues(State state) {
        if (this.domain != null) {
            List<Action> allApplicableActionsForTypes = ActionUtils.allApplicableActionsForTypes(this.domain.getActionTypes(), state);
            ArrayList arrayList = new ArrayList(allApplicableActionsForTypes.size());
            for (Action action : allApplicableActionsForTypes) {
                arrayList.add(new QValue(state, action, qValue(state, action)));
            }
            return arrayList;
        }
        if (this.projectionType == RewardProjectionType.DESTINATIONSTATE) {
            return Arrays.asList(new QValue(state, null, this.rf.reward(null, null, state)));
        }
        if (this.projectionType == RewardProjectionType.SOURCESTATE) {
            return Arrays.asList(new QValue(state, null, this.rf.reward(null, null, state)));
        }
        if (this.projectionType == RewardProjectionType.STATEACTION) {
            throw new RuntimeException("RewardValueProjection cannot generate all state-action Q-values because it was notprovided the Domain to enumerate the actions. Use the RewardValueProjection(RewardFunction, RewardProjectionType, Domain) constructor to specify it.");
        }
        throw new RuntimeException("Unknown RewardProjectionType... this shouldn't happen.");
    }

    @Override // burlap.behavior.valuefunction.QFunction
    public double qValue(State state, Action action) {
        switch (this.projectionType) {
            case DESTINATIONSTATE:
                return this.rf.reward(null, action, state);
            case SOURCESTATE:
            case STATEACTION:
                return this.rf.reward(state, action, null);
            case ONESTEP:
                return this.oneStepBellmanPlanner.qValue(state, action);
            default:
                throw new RuntimeException("Unknown RewardProjectionType... this shouldn't happen.");
        }
    }

    @Override // burlap.behavior.valuefunction.ValueFunction
    public double value(State state) {
        switch (this.projectionType) {
            case DESTINATIONSTATE:
                return this.rf.reward(null, null, state);
            case SOURCESTATE:
                return this.rf.reward(state, null, null);
            case STATEACTION:
                return QProvider.Helper.maxQ(this, state);
            case ONESTEP:
                return this.oneStepBellmanPlanner.value(state);
            default:
                throw new RuntimeException("Unknown RewardProjectionType... this shouldn't happen.");
        }
    }
}
