package burlap.behavior.stochasticgames.agents.interfacing.singleagent;

import burlap.behavior.singleagent.MDPSolver;
import burlap.behavior.singleagent.learning.LearningAgent;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.action.ActionType;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.SADomain;
import burlap.mdp.singleagent.environment.Environment;
import burlap.mdp.singleagent.environment.EnvironmentOutcome;
import burlap.mdp.stochasticgames.JointAction;
import burlap.mdp.stochasticgames.SGDomain;
import burlap.mdp.stochasticgames.agent.SGAgentBase;
import burlap.mdp.stochasticgames.agent.SGAgentType;
import burlap.mdp.stochasticgames.world.World;
import java.util.Iterator;

/* loaded from: input_file:burlap/behavior/stochasticgames/agents/interfacing/singleagent/LearningAgentToSGAgentInterface.class */
public class LearningAgentToSGAgentInterface extends SGAgentBase implements Environment {
    protected LearningAgent learningAgent;
    protected double lastReward;
    protected State currentState;
    protected Thread saThread;
    protected int agentNum;
    protected boolean curStateIsTerminal = false;
    protected ActionReference nextAction = new ActionReference();
    protected StateReference nextState = new StateReference();

    /* loaded from: input_file:burlap/behavior/stochasticgames/agents/interfacing/singleagent/LearningAgentToSGAgentInterface$ActionReference.class */
    protected static class ActionReference {
        protected Action val;

        protected ActionReference() {
        }
    }

    /* loaded from: input_file:burlap/behavior/stochasticgames/agents/interfacing/singleagent/LearningAgentToSGAgentInterface$StateReference.class */
    protected static class StateReference {
        protected State val = null;

        protected StateReference() {
        }
    }

    public LearningAgentToSGAgentInterface(SGDomain sGDomain, LearningAgent learningAgent, String str, SGAgentType sGAgentType) {
        init(sGDomain, str, sGAgentType);
        this.learningAgent = learningAgent;
        if (this.learningAgent instanceof MDPSolver) {
            SADomain sADomain = new SADomain();
            Iterator<ActionType> it = sGAgentType.actions.iterator();
            while (it.hasNext()) {
                sADomain.addActionType(it.next());
            }
            ((MDPSolver) this.learningAgent).setDomain(sADomain);
        }
    }

    @Override // burlap.mdp.stochasticgames.agent.SGAgent
    public void gameStarting(World world, int i) {
        this.world = world;
        this.agentNum = i;
    }

    @Override // burlap.mdp.stochasticgames.agent.SGAgent
    public Action action(State state) {
        Action action;
        synchronized (this.nextState) {
            this.currentState = state;
            this.nextState.val = state;
            this.curStateIsTerminal = false;
            this.nextState.notifyAll();
        }
        if (this.saThread == null) {
            this.saThread = new Thread(new Runnable() { // from class: burlap.behavior.stochasticgames.agents.interfacing.singleagent.LearningAgentToSGAgentInterface.1
                @Override // java.lang.Runnable
                public void run() {
                    LearningAgentToSGAgentInterface.this.learningAgent.runLearningEpisode(LearningAgentToSGAgentInterface.this);
                }
            });
            this.saThread.start();
        }
        synchronized (this.nextAction) {
            while (this.nextAction.val == null) {
                try {
                    this.nextAction.wait();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            action = this.nextAction.val;
            this.nextAction.val = null;
        }
        return action;
    }

    @Override // burlap.mdp.stochasticgames.agent.SGAgent
    public void observeOutcome(State state, JointAction jointAction, double[] dArr, State state2, boolean z) {
        this.lastReward = dArr[this.agentNum];
        this.currentState = state2;
    }

    @Override // burlap.mdp.stochasticgames.agent.SGAgent
    public void gameTerminated() {
        synchronized (this.nextState) {
            this.curStateIsTerminal = true;
            this.nextState.val = this.currentState;
            this.nextState.notifyAll();
        }
        try {
            this.saThread.join();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        this.saThread = null;
    }

    @Override // burlap.mdp.singleagent.environment.Environment
    public State currentObservation() {
        return this.currentState;
    }

    @Override // burlap.mdp.singleagent.environment.Environment
    public EnvironmentOutcome executeAction(Action action) {
        State state = this.currentState;
        synchronized (this.nextAction) {
            this.nextAction.val = action;
            this.nextAction.notifyAll();
        }
        synchronized (this.nextState) {
            while (this.nextState.val == null) {
                try {
                    this.nextState.wait();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            this.nextState.val = null;
        }
        return new EnvironmentOutcome(state, action, this.currentState, this.lastReward, this.curStateIsTerminal);
    }

    @Override // burlap.mdp.singleagent.environment.Environment
    public double lastReward() {
        return this.lastReward;
    }

    @Override // burlap.mdp.singleagent.environment.Environment
    public boolean isInTerminalState() {
        return this.curStateIsTerminal;
    }

    @Override // burlap.mdp.singleagent.environment.Environment
    public void resetEnvironment() {
    }
}
