package burlap.behavior.stochasticgames.madynamicprogramming;

import burlap.behavior.stochasticgames.madynamicprogramming.AgentQSourceMap;
import burlap.behavior.valuefunction.ValueFunction;
import burlap.mdp.core.StateTransitionProb;
import burlap.mdp.core.TerminalFunction;
import burlap.mdp.core.state.State;
import burlap.mdp.stochasticgames.JointAction;
import burlap.mdp.stochasticgames.SGDomain;
import burlap.mdp.stochasticgames.agent.SGAgentType;
import burlap.mdp.stochasticgames.model.FullJointModel;
import burlap.mdp.stochasticgames.model.JointModel;
import burlap.mdp.stochasticgames.model.JointRewardFunction;
import burlap.statehashing.HashableState;
import burlap.statehashing.HashableStateFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:burlap/behavior/stochasticgames/madynamicprogramming/MADynamicProgramming.class */
public abstract class MADynamicProgramming implements MultiAgentQSourceProvider {
    protected SGDomain domain;
    protected List<SGAgentType> agentDefinitions;
    protected JointModel jointModel;
    protected JointRewardFunction jointRewardFunction;
    protected TerminalFunction terminalFunction;
    protected double discount;
    protected HashableStateFactory hashingFactory;
    protected ValueFunction vInit;
    protected SGBackupOperator backupOperator;
    protected AgentQSourceMap.HashMapAgentQSourceMap qSources;
    protected boolean planningStarted = false;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:burlap/behavior/stochasticgames/madynamicprogramming/MADynamicProgramming$BackupBasedQSource.class */
    public class BackupBasedQSource implements QSourceForSingleAgent {
        protected int agentNum;
        protected Map<HashableState, Double> valueFunction = new HashMap();

        public BackupBasedQSource(int i) {
            this.agentNum = i;
        }

        @Override // burlap.behavior.stochasticgames.madynamicprogramming.QSourceForSingleAgent
        public JAQValue getQValueFor(State state, JointAction jointAction) {
            JointActionTransitions jointActionTransitions = new JointActionTransitions(state, jointAction);
            double d = 0.0d;
            if (!MADynamicProgramming.this.terminalFunction.isTerminal(state)) {
                for (int i = 0; i < jointActionTransitions.tps.size(); i++) {
                    StateTransitionProb stateTransitionProb = jointActionTransitions.tps.get(i);
                    d += stateTransitionProb.p * (jointActionTransitions.jrs.get(i)[this.agentNum] + (MADynamicProgramming.this.discount * getValue(MADynamicProgramming.this.hashingFactory.hashState(stateTransitionProb.s))));
                }
            }
            return new JAQValue(state, jointAction, d);
        }

        public double getValue(HashableState hashableState) {
            Double d = this.valueFunction.get(hashableState);
            if (d != null) {
                return d.doubleValue();
            }
            double d2 = 0.0d;
            if (!MADynamicProgramming.this.terminalFunction.isTerminal(hashableState.s())) {
                d2 = MADynamicProgramming.this.vInit.value(hashableState.s());
            }
            this.valueFunction.put(hashableState, Double.valueOf(d2));
            return d2;
        }

        public void setValue(HashableState hashableState, double d) {
            this.valueFunction.put(hashableState, Double.valueOf(d));
        }
    }

    /* loaded from: input_file:burlap/behavior/stochasticgames/madynamicprogramming/MADynamicProgramming$JointActionTransitions.class */
    public class JointActionTransitions {
        public JointAction ja;
        public List<StateTransitionProb> tps;
        public List<double[]> jrs;

        public JointActionTransitions(State state, JointAction jointAction) {
            FullJointModel fullJointModel = (FullJointModel) MADynamicProgramming.this.jointModel;
            this.ja = jointAction;
            this.tps = fullJointModel.stateTransitions(state, jointAction);
            this.jrs = new ArrayList(this.tps.size());
            Iterator<StateTransitionProb> it = this.tps.iterator();
            while (it.hasNext()) {
                this.jrs.add(MADynamicProgramming.this.jointRewardFunction.reward(state, jointAction, it.next().s));
            }
        }
    }

    public void initMAVF(SGDomain sGDomain, List<SGAgentType> list, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction, double d, HashableStateFactory hashableStateFactory, ValueFunction valueFunction, SGBackupOperator sGBackupOperator) {
        this.domain = sGDomain;
        this.jointModel = sGDomain.getJointActionModel();
        this.jointRewardFunction = jointRewardFunction;
        this.terminalFunction = terminalFunction;
        this.discount = d;
        this.hashingFactory = hashableStateFactory;
        this.vInit = valueFunction;
        this.backupOperator = sGBackupOperator;
        setAgentDefinitions(list);
    }

    public boolean hasStartedPlanning() {
        return this.planningStarted;
    }

    public void setAgentDefinitions(List<SGAgentType> list) {
        if (this.planningStarted) {
            throw new RuntimeException("Cannot reset the agent definitions after planning has already started.");
        }
        if (list == null || this.agentDefinitions == list) {
            return;
        }
        this.agentDefinitions = list;
        HashMap hashMap = new HashMap();
        for (int i = 0; i < this.agentDefinitions.size(); i++) {
            hashMap.put(Integer.valueOf(i), new BackupBasedQSource(i));
        }
        this.qSources = new AgentQSourceMap.HashMapAgentQSourceMap(hashMap);
    }

    public abstract void planFromState(State state);

    @Override // burlap.behavior.stochasticgames.madynamicprogramming.MultiAgentQSourceProvider
    public AgentQSourceMap getQSources() {
        return this.qSources;
    }

    public double backupAllValueFunctions(State state) {
        HashableState hashState = this.hashingFactory.hashState(state);
        double d = Double.NEGATIVE_INFINITY;
        for (int i = 0; i < this.agentDefinitions.size(); i++) {
            BackupBasedQSource backupBasedQSource = (BackupBasedQSource) this.qSources.agentQSource(i);
            double value = backupBasedQSource.getValue(hashState);
            double performBackup = this.backupOperator.performBackup(state, i, this.agentDefinitions, this.qSources);
            d = Math.max(d, Math.abs(performBackup - value));
            backupBasedQSource.setValue(hashState, performBackup);
        }
        return d;
    }
}
