package burlap.behavior.singleagent.options;

import burlap.behavior.policy.support.ActionProb;
import burlap.behavior.policy.support.AnnotatedAction;
import burlap.behavior.singleagent.Episode;
import burlap.debugtools.RandomFactory;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.environment.Environment;
import burlap.mdp.singleagent.environment.EnvironmentOutcome;
import java.util.List;
import java.util.Random;

/* loaded from: input_file:burlap/behavior/singleagent/options/Option.class */
public interface Option extends Action {

    /* loaded from: input_file:burlap/behavior/singleagent/options/Option$Helper.class */
    public static class Helper {
        public static EnvironmentOptionOutcome control(Option option, Environment environment, double d) {
            Random mapped = RandomFactory.getMapped(0);
            State currentObservation = environment.currentObservation();
            State state = currentObservation;
            Episode episode = new Episode(state);
            Episode episode2 = new Episode(state);
            int i = 0;
            double d2 = 0.0d;
            double d3 = 1.0d;
            do {
                Action policy = option.policy(state, episode2);
                EnvironmentOutcome executeAction = environment.executeAction(policy);
                i++;
                d2 += d3 * executeAction.r;
                state = executeAction.op;
                d3 *= d;
                episode2.transition(policy, executeAction.op, executeAction.r);
                episode.transition(new AnnotatedAction(policy, option.toString() + "(" + i + ")"), executeAction.op, d2);
                if (mapped.nextDouble() <= option.probabilityOfTermination(executeAction.op, episode2)) {
                    break;
                }
            } while (!environment.isInTerminalState());
            return new EnvironmentOptionOutcome(currentObservation, option, state, d2, environment.isInTerminalState(), d, episode);
        }
    }

    boolean inInitiationSet(State state);

    Action policy(State state, Episode episode);

    List<ActionProb> policyDistribution(State state, Episode episode);

    double probabilityOfTermination(State state, Episode episode);

    EnvironmentOptionOutcome control(Environment environment, double d);

    boolean markov();
}
