package burlap.behavior.singleagent.planning.stochastic.policyiteration;

import burlap.behavior.policy.EnumerablePolicy;
import burlap.behavior.singleagent.planning.stochastic.DynamicProgramming;
import burlap.debugtools.DPrint;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.SADomain;
import burlap.mdp.singleagent.model.FullModel;
import burlap.mdp.singleagent.model.TransitionProb;
import burlap.statehashing.HashableState;
import burlap.statehashing.HashableStateFactory;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;

/* loaded from: input_file:burlap/behavior/singleagent/planning/stochastic/policyiteration/PolicyEvaluation.class */
public class PolicyEvaluation extends DynamicProgramming {
    protected double maxEvalDelta;
    protected double maxEvalIterations;

    public PolicyEvaluation(SADomain sADomain, double d, HashableStateFactory hashableStateFactory, double d2, double d3) {
        DPPInit(sADomain, d, hashableStateFactory);
        this.maxEvalDelta = d2;
        this.maxEvalIterations = d3;
    }

    public void evaluatePolicy(EnumerablePolicy enumerablePolicy, State state) {
        performReachabilityFrom(state);
        evaluatePolicy(enumerablePolicy);
    }

    public void evaluatePolicy(EnumerablePolicy enumerablePolicy) {
        if (this.valueFunction.size() == 0) {
            throw new RuntimeException("Cannot evaluate policy, because no states have been expanded. Use the performStateReachability methodor call the evaluatePolicy method that takes a seed initial state as input.");
        }
        double d = Double.NEGATIVE_INFINITY;
        Set<HashableState> keySet = this.valueFunction.keySet();
        for (int i = 0; i < this.maxEvalIterations; i++) {
            double d2 = 0.0d;
            for (HashableState hashableState : keySet) {
                d2 = Math.max(Math.abs(performFixedPolicyBellmanUpdateOn(hashableState, enumerablePolicy) - value(hashableState)), d2);
            }
            d = Math.max(d2, d);
            if (d2 < this.maxEvalDelta) {
                int i2 = i + 1;
                return;
            }
        }
    }

    public boolean performReachabilityFrom(State state) {
        HashableState stateHash = stateHash(state);
        if (this.valueFunction.containsKey(stateHash)) {
            return false;
        }
        DPrint.cl(this.debugCode, "Starting reachability analysis");
        LinkedList linkedList = new LinkedList();
        HashSet hashSet = new HashSet();
        linkedList.offer(stateHash);
        hashSet.add(stateHash);
        while (!linkedList.isEmpty()) {
            HashableState hashableState = (HashableState) linkedList.poll();
            if (!this.valueFunction.containsKey(hashableState) && !this.model.terminal(hashableState.s())) {
                this.valueFunction.put(hashableState, Double.valueOf(this.valueInitializer.value(hashableState.s())));
                Iterator<Action> it = applicableActions(hashableState.s()).iterator();
                while (it.hasNext()) {
                    Iterator<TransitionProb> it2 = ((FullModel) this.model).transitions(hashableState.s(), it.next()).iterator();
                    while (it2.hasNext()) {
                        HashableState stateHash2 = stateHash(it2.next().eo.op);
                        if (!hashSet.contains(stateHash2) && !this.valueFunction.containsKey(stateHash2)) {
                            hashSet.add(stateHash2);
                            linkedList.offer(stateHash2);
                        }
                    }
                }
            }
        }
        DPrint.cl(this.debugCode, "Finished reachability analysis; # states: " + this.valueFunction.size());
        return true;
    }
}
