package burlap.behavior.valuefunction;

import burlap.behavior.policy.EnumerablePolicy;
import burlap.behavior.policy.support.ActionProb;
import burlap.mdp.core.state.State;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:burlap/behavior/valuefunction/QProvider.class */
public interface QProvider extends QFunction {

    /* loaded from: input_file:burlap/behavior/valuefunction/QProvider$Helper.class */
    public static class Helper {
        private Helper() {
        }

        public static double maxQ(QProvider qProvider, State state) {
            List<QValue> qValues = qProvider.qValues(state);
            if (qValues.isEmpty()) {
                return 0.0d;
            }
            double d = Double.NEGATIVE_INFINITY;
            Iterator<QValue> it = qValues.iterator();
            while (it.hasNext()) {
                d = Math.max(it.next().q, d);
            }
            return d;
        }

        public static double policyValue(QProvider qProvider, State state, EnumerablePolicy enumerablePolicy) {
            double d = 0.0d;
            List<ActionProb> policyDistribution = enumerablePolicy.policyDistribution(state);
            if (policyDistribution.isEmpty()) {
                return 0.0d;
            }
            for (ActionProb actionProb : policyDistribution) {
                d += qProvider.qValue(state, actionProb.ga) * actionProb.pSelection;
            }
            return d;
        }
    }

    List<QValue> qValues(State state);
}
