package burlap.behavior.singleagent.learning.modellearning.rmax;

import burlap.behavior.policy.Policy;
import burlap.behavior.singleagent.Episode;
import burlap.behavior.singleagent.MDPSolver;
import burlap.behavior.singleagent.learning.LearningAgent;
import burlap.behavior.singleagent.learning.modellearning.KWIKModel;
import burlap.behavior.singleagent.learning.modellearning.ModelLearningPlanner;
import burlap.behavior.singleagent.learning.modellearning.modelplanners.VIModelLearningPlanner;
import burlap.behavior.singleagent.learning.modellearning.models.TabularModel;
import burlap.behavior.singleagent.shaping.potential.PotentialFunction;
import burlap.mdp.core.TerminalFunction;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.SADomain;
import burlap.mdp.singleagent.environment.Environment;
import burlap.mdp.singleagent.environment.EnvironmentOutcome;
import burlap.mdp.singleagent.model.RewardFunction;
import burlap.statehashing.HashableStateFactory;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/rmax/PotentialShapedRMax.class */
public class PotentialShapedRMax extends MDPSolver implements LearningAgent {
    protected RMaxModel model;
    protected RewardFunction modeledRewardFunction;
    protected TerminalFunction modeledTerminalFunction;
    protected ModelLearningPlanner modelPlanner;
    protected int maxNumSteps = Integer.MAX_VALUE;
    protected LinkedList<Episode> episodeHistory = new LinkedList<>();
    protected int numEpisodesToStore = 1;

    /* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/rmax/PotentialShapedRMax$RMaxPotential.class */
    public static class RMaxPotential implements PotentialFunction {
        double vmax;

        public RMaxPotential(double d, double d2) {
            this.vmax = d / (1.0d - d2);
        }

        public RMaxPotential(double d) {
            this.vmax = d;
        }

        @Override // burlap.behavior.singleagent.shaping.potential.PotentialFunction
        public double potentialValue(State state) {
            return this.vmax;
        }
    }

    public PotentialShapedRMax(SADomain sADomain, double d, HashableStateFactory hashableStateFactory, double d2, int i, double d3, int i2) {
        solverInit(sADomain, d, hashableStateFactory);
        this.model = new RMaxModel(new TabularModel(sADomain, hashableStateFactory, i), new RMaxPotential(d2, d), d, sADomain.getActionTypes());
        this.modelPlanner = new VIModelLearningPlanner(sADomain, this.model, d, hashableStateFactory, d3, i2);
    }

    public PotentialShapedRMax(SADomain sADomain, double d, HashableStateFactory hashableStateFactory, PotentialFunction potentialFunction, int i, double d2, int i2) {
        solverInit(sADomain, d, hashableStateFactory);
        this.model = new RMaxModel(new TabularModel(sADomain, hashableStateFactory, i), potentialFunction, d, sADomain.getActionTypes());
        this.modelPlanner = new VIModelLearningPlanner(sADomain, this.model, d, hashableStateFactory, d2, i2);
    }

    public PotentialShapedRMax(SADomain sADomain, HashableStateFactory hashableStateFactory, PotentialFunction potentialFunction, KWIKModel kWIKModel, ModelLearningPlanner modelLearningPlanner) {
        solverInit(sADomain, this.gamma, hashableStateFactory);
        this.model = new RMaxModel(kWIKModel, potentialFunction, this.gamma, sADomain.getActionTypes());
        this.modelPlanner = modelLearningPlanner;
        this.modelPlanner.setModel(this.model);
    }

    @Override // burlap.behavior.singleagent.MDPSolver, burlap.behavior.singleagent.MDPSolverInterface
    public RMaxModel getModel() {
        return this.model;
    }

    public ModelLearningPlanner getModelPlanner() {
        return this.modelPlanner;
    }

    public RewardFunction getModeledRewardFunction() {
        return this.modeledRewardFunction;
    }

    public TerminalFunction getModeledTerminalFunction() {
        return this.modeledTerminalFunction;
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public Episode runLearningEpisode(Environment environment) {
        return runLearningEpisode(environment, -1);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public Episode runLearningEpisode(Environment environment, int i) {
        State currentObservation = environment.currentObservation();
        this.modelPlanner.initializePlannerIn(currentObservation);
        Episode episode = new Episode(currentObservation);
        Policy createUnmodeledFavoredPolicy = createUnmodeledFavoredPolicy();
        State state = currentObservation;
        for (int i2 = 0; !environment.isInTerminalState() && (i2 < i || i == -1); i2++) {
            Action action = createUnmodeledFavoredPolicy.action(state);
            EnvironmentOutcome executeAction = environment.executeAction(action);
            episode.transition(action, executeAction.op, executeAction.r);
            boolean terminal = this.model.terminal(executeAction.op);
            if (!this.model.transitionIsModeled(state, action) || (!KWIKModel.Helper.stateTransitionsModeled(this.model, getActionTypes(), executeAction.op) && !terminal)) {
                this.model.updateModel(executeAction);
                if (this.model.transitionIsModeled(state, action) || (executeAction.terminated != terminal && terminal != this.model.terminal(executeAction.op))) {
                    this.modelPlanner.modelChanged(state);
                    createUnmodeledFavoredPolicy = createUnmodeledFavoredPolicy();
                }
            }
            state = environment.currentObservation();
        }
        if (this.episodeHistory.size() >= this.numEpisodesToStore) {
            this.episodeHistory.poll();
        }
        this.episodeHistory.offer(episode);
        return episode;
    }

    protected Policy createUnmodeledFavoredPolicy() {
        return new UnmodeledFavoredPolicy(this.modelPlanner.modelPlannedPolicy(), this.model, getActionTypes());
    }

    public Episode getLastLearningEpisode() {
        return this.episodeHistory.getLast();
    }

    public void setNumEpisodesToStore(int i) {
        if (i > 0) {
            this.numEpisodesToStore = i;
        } else {
            this.numEpisodesToStore = 1;
        }
    }

    public List<Episode> getAllStoredLearningEpisodes() {
        return this.episodeHistory;
    }

    @Override // burlap.behavior.singleagent.MDPSolver, burlap.behavior.singleagent.MDPSolverInterface
    public void resetSolver() {
        this.model.resetModel();
        this.modelPlanner.resetSolver();
        this.episodeHistory.clear();
    }
}
