package burlap.behavior.stochasticgames.madynamicprogramming.policies;

import burlap.behavior.policy.EnumerablePolicy;
import burlap.behavior.policy.PolicyUtils;
import burlap.behavior.policy.support.ActionProb;
import burlap.behavior.stochasticgames.JointPolicy;
import burlap.behavior.stochasticgames.madynamicprogramming.AgentQSourceMap;
import burlap.behavior.stochasticgames.madynamicprogramming.MAQSourcePolicy;
import burlap.behavior.stochasticgames.madynamicprogramming.MultiAgentQSourceProvider;
import burlap.behavior.stochasticgames.madynamicprogramming.QSourceForSingleAgent;
import burlap.behavior.stochasticgames.solvers.CorrelatedEquilibriumSolver;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.action.ActionUtils;
import burlap.mdp.core.state.State;
import burlap.mdp.stochasticgames.JointAction;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/stochasticgames/madynamicprogramming/policies/ECorrelatedQJointPolicy.class */
public class ECorrelatedQJointPolicy extends MAQSourcePolicy implements EnumerablePolicy {
    protected MultiAgentQSourceProvider qSourceProvider;
    protected double epsilon;
    protected CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective objectiveType;

    public ECorrelatedQJointPolicy(double d) {
        this.objectiveType = CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective.UTILITARIAN;
        this.epsilon = d;
    }

    public ECorrelatedQJointPolicy(CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective correlatedEquilibriumObjective, double d) {
        this.objectiveType = CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective.UTILITARIAN;
        this.objectiveType = correlatedEquilibriumObjective;
        this.epsilon = d;
    }

    public void setCorrelatedQObjective(CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective correlatedEquilibriumObjective) {
        this.objectiveType = correlatedEquilibriumObjective;
    }

    @Override // burlap.behavior.stochasticgames.JointPolicy
    public void setTargetAgent(int i) {
    }

    @Override // burlap.behavior.stochasticgames.JointPolicy
    public JointPolicy copy() {
        ECorrelatedQJointPolicy eCorrelatedQJointPolicy = new ECorrelatedQJointPolicy(this.objectiveType, this.epsilon);
        eCorrelatedQJointPolicy.setQSourceProvider(this.qSourceProvider);
        eCorrelatedQJointPolicy.setAgentTypesInJointPolicy(this.agentsInJointPolicy);
        return eCorrelatedQJointPolicy;
    }

    @Override // burlap.behavior.policy.Policy
    public Action action(State state) {
        return PolicyUtils.sampleFromActionDistribution(this, state);
    }

    @Override // burlap.behavior.policy.Policy
    public double actionProb(State state, Action action) {
        return PolicyUtils.actionProbFromEnum(this, state, action);
    }

    @Override // burlap.behavior.policy.EnumerablePolicy
    public List<ActionProb> policyDistribution(State state) {
        AgentQSourceMap qSources = this.qSourceProvider.getQSources();
        QSourceForSingleAgent agentQSource = qSources.agentQSource(0);
        QSourceForSingleAgent agentQSource2 = qSources.agentQSource(1);
        List<Action> allApplicableActionsForTypes = ActionUtils.allApplicableActionsForTypes(this.agentsInJointPolicy.get(0).actions, state);
        List<Action> allApplicableActionsForTypes2 = ActionUtils.allApplicableActionsForTypes(this.agentsInJointPolicy.get(1).actions, state);
        double[][] dArr = new double[allApplicableActionsForTypes.size()][allApplicableActionsForTypes2.size()];
        double[][] dArr2 = new double[allApplicableActionsForTypes.size()][allApplicableActionsForTypes2.size()];
        for (int i = 0; i < allApplicableActionsForTypes.size(); i++) {
            for (int i2 = 0; i2 < allApplicableActionsForTypes2.size(); i2++) {
                JointAction jointAction = new JointAction();
                jointAction.addAction(allApplicableActionsForTypes.get(i));
                jointAction.addAction(allApplicableActionsForTypes2.get(i2));
                double d = agentQSource.getQValueFor(state, jointAction).q;
                double d2 = agentQSource2.getQValueFor(state, jointAction).q;
                dArr[i][i2] = d;
                dArr2[i][i2] = d2;
            }
        }
        double[][] correlatedEQJointStrategy = CorrelatedEquilibriumSolver.getCorrelatedEQJointStrategy(this.objectiveType, dArr, dArr2);
        ArrayList arrayList = new ArrayList();
        double size = this.epsilon / (allApplicableActionsForTypes.size() * allApplicableActionsForTypes2.size());
        for (int i3 = 0; i3 < allApplicableActionsForTypes.size(); i3++) {
            for (int i4 = 0; i4 < allApplicableActionsForTypes2.size(); i4++) {
                JointAction jointAction2 = new JointAction();
                jointAction2.addAction(allApplicableActionsForTypes.get(i3));
                jointAction2.addAction(allApplicableActionsForTypes2.get(i4));
                arrayList.add(new ActionProb(jointAction2, size + ((1.0d - this.epsilon) * correlatedEQJointStrategy[i3][i4])));
            }
        }
        return arrayList;
    }

    @Override // burlap.behavior.policy.Policy
    public boolean definedFor(State state) {
        return true;
    }

    @Override // burlap.behavior.stochasticgames.madynamicprogramming.MAQSourcePolicy
    public void setQSourceProvider(MultiAgentQSourceProvider multiAgentQSourceProvider) {
        this.qSourceProvider = multiAgentQSourceProvider;
    }
}
