package burlap.behavior.singleagent.learning.actorcritic;

import burlap.behavior.policy.Policy;
import burlap.behavior.singleagent.Episode;
import burlap.behavior.singleagent.MDPSolver;
import burlap.behavior.singleagent.learning.LearningAgent;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.action.ActionType;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.SADomain;
import burlap.mdp.singleagent.environment.Environment;
import burlap.mdp.singleagent.environment.EnvironmentOutcome;
import burlap.mdp.singleagent.environment.SimulatedEnvironment;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/learning/actorcritic/ActorCritic.class */
public class ActorCritic extends MDPSolver implements LearningAgent {
    protected Actor actor;
    protected Critic critic;
    protected int maxEpisodeSize;
    protected int numEpisodesForPlanning;
    protected LinkedList<Episode> episodeHistory;
    protected int numEpisodesToStore;

    public ActorCritic(SADomain sADomain, double d, Actor actor, Critic critic) {
        this.maxEpisodeSize = Integer.MAX_VALUE;
        this.actor = actor;
        this.critic = critic;
        this.numEpisodesForPlanning = 1;
        this.episodeHistory = new LinkedList<>();
        this.numEpisodesToStore = 1;
        solverInit(sADomain, d, null);
    }

    public ActorCritic(SADomain sADomain, double d, Actor actor, Critic critic, int i) {
        this.maxEpisodeSize = Integer.MAX_VALUE;
        this.actor = actor;
        this.critic = critic;
        this.maxEpisodeSize = i;
        this.numEpisodesForPlanning = 1;
        this.episodeHistory = new LinkedList<>();
        this.numEpisodesToStore = 1;
        solverInit(sADomain, d, null);
    }

    public void initializeForPlanning(int i) {
        this.numEpisodesForPlanning = i;
    }

    @Override // burlap.behavior.singleagent.MDPSolver, burlap.behavior.singleagent.MDPSolverInterface
    public void addActionType(ActionType actionType) {
        super.addActionType(actionType);
        this.actor.addNonDomainReferencedAction(actionType);
        this.critic.addActionType(actionType);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public Episode runLearningEpisode(Environment environment) {
        return runLearningEpisode(environment, -1);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public Episode runLearningEpisode(Environment environment, int i) {
        State currentObservation = environment.currentObservation();
        Episode episode = new Episode(currentObservation);
        State state = currentObservation;
        this.critic.initializeEpisode(state);
        for (int i2 = 0; !environment.isInTerminalState() && (i2 < i || i == -1); i2++) {
            Action action = this.actor.action(state);
            EnvironmentOutcome executeAction = environment.executeAction(action);
            episode.transition(action, executeAction.op, executeAction.r);
            this.actor.updateFromCritique(this.critic.critiqueAndUpdate(executeAction));
            state = environment.currentObservation();
        }
        this.critic.endEpisode();
        if (this.episodeHistory.size() >= this.numEpisodesToStore) {
            this.episodeHistory.poll();
        }
        this.episodeHistory.offer(episode);
        return episode;
    }

    public Episode getLastLearningEpisode() {
        return this.episodeHistory.getLast();
    }

    public void setNumEpisodesToStore(int i) {
        this.numEpisodesToStore = i;
    }

    public List<Episode> getAllStoredLearningEpisodes() {
        return this.episodeHistory;
    }

    public void planFromState(State state) {
        if (this.model == null) {
            throw new RuntimeException("Planning requires a model, but none is provided.");
        }
        SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(this.model, state);
        for (int i = 0; i < this.numEpisodesForPlanning; i++) {
            runLearningEpisode(simulatedEnvironment, this.maxEpisodeSize);
        }
    }

    @Override // burlap.behavior.singleagent.MDPSolver, burlap.behavior.singleagent.MDPSolverInterface
    public void resetSolver() {
        this.episodeHistory.clear();
        this.actor.resetData();
        this.critic.resetData();
    }

    public Policy getPolicy() {
        return this.actor;
    }
}
