package burlap.behavior.stochasticgames.agents.naiveq.history;

import burlap.behavior.stochasticgames.agents.naiveq.SGNaiveQLAgent;
import burlap.behavior.valuefunction.QValue;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.stochasticgames.JointAction;
import burlap.mdp.stochasticgames.SGDomain;
import burlap.mdp.stochasticgames.agent.SGAgentType;
import burlap.mdp.stochasticgames.world.World;
import burlap.statehashing.HashableStateFactory;

/* loaded from: input_file:burlap/behavior/stochasticgames/agents/naiveq/history/SGQWActionHistory.class */
public class SGQWActionHistory extends SGNaiveQLAgent {
    protected int historySize;
    protected HistoryState curHState;

    public SGQWActionHistory(SGDomain sGDomain, double d, double d2, HashableStateFactory hashableStateFactory, int i) {
        super(sGDomain, d, d2, hashableStateFactory);
        this.historySize = i;
    }

    @Override // burlap.behavior.stochasticgames.agents.naiveq.SGNaiveQLAgent, burlap.mdp.stochasticgames.agent.SGAgentBase
    public SGQWActionHistory setAgentDetails(String str, SGAgentType sGAgentType) {
        super.setAgentDetails(str, sGAgentType);
        return this;
    }

    @Override // burlap.behavior.stochasticgames.agents.naiveq.SGNaiveQLAgent, burlap.mdp.stochasticgames.agent.SGAgent
    public void gameStarting(World world, int i) {
        super.gameStarting(world, i);
        this.curHState = null;
    }

    @Override // burlap.behavior.stochasticgames.agents.naiveq.SGNaiveQLAgent, burlap.mdp.stochasticgames.agent.SGAgent
    public void observeOutcome(State state, JointAction jointAction, double[] dArr, State state2, boolean z) {
        Action action = jointAction.action(this.agentNum);
        QValue storedQ = storedQ(this.curHState, action);
        HistoryState incrementWithChange = this.curHState.incrementWithChange(state2, jointAction);
        if (this.internalRewardFunction != null) {
            dArr = this.internalRewardFunction.reward(state, jointAction, state2);
        }
        double d = dArr[this.agentNum];
        double d2 = 0.0d;
        if (!z) {
            d2 = getMaxQValue(incrementWithChange);
        }
        storedQ.q += this.learningRate.pollLearningRate(this.totalNumberOfSteps, state, action) * ((d + (this.discount * d2)) - storedQ.q);
        this.totalNumberOfSteps++;
    }

    @Override // burlap.behavior.stochasticgames.agents.naiveq.SGNaiveQLAgent, burlap.mdp.stochasticgames.agent.SGAgent
    public Action action(State state) {
        if (this.curHState == null) {
            this.curHState = new HistoryState(state, this.historySize);
        }
        return super.action(this.curHState);
    }
}
