package burlap.behavior.singleagent.options.model;

import burlap.behavior.policy.support.ActionProb;
import burlap.behavior.singleagent.Episode;
import burlap.behavior.singleagent.options.Option;
import burlap.behavior.singleagent.options.model.BFSMarkovOptionModel;
import burlap.datastructures.HashedAggregator;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.model.FullModel;
import burlap.mdp.singleagent.model.SampleModel;
import burlap.mdp.singleagent.model.TransitionProb;
import burlap.statehashing.HashableState;
import burlap.statehashing.HashableStateFactory;
import java.util.LinkedList;

/* loaded from: input_file:burlap/behavior/singleagent/options/model/BFSNonMarkovOptionModel.class */
public class BFSNonMarkovOptionModel extends BFSMarkovOptionModel {

    /* loaded from: input_file:burlap/behavior/singleagent/options/model/BFSNonMarkovOptionModel$NonMarkovOptionScanNode.class */
    public static class NonMarkovOptionScanNode extends BFSMarkovOptionModel.OptionScanNode {
        protected Episode episode;

        public NonMarkovOptionScanNode(State state) {
            super(state);
            this.episode = new Episode(state);
        }

        public NonMarkovOptionScanNode(NonMarkovOptionScanNode nonMarkovOptionScanNode, State state, double d, double d2, Action action) {
            super(nonMarkovOptionScanNode, state, d, d2);
            this.episode = nonMarkovOptionScanNode.episode.copy();
            this.episode.transition(action, state, 0.0d);
        }
    }

    public BFSNonMarkovOptionModel(SampleModel sampleModel, double d, HashableStateFactory hashableStateFactory) {
        super(sampleModel, d, hashableStateFactory);
        this.requireMarkov = false;
    }

    @Override // burlap.behavior.singleagent.options.model.BFSMarkovOptionModel
    protected double computeTransitions(State state, Option option, HashedAggregator<HashableState> hashedAggregator, double[] dArr) {
        double d = 0.0d;
        LinkedList linkedList = new LinkedList();
        linkedList.addLast(new NonMarkovOptionScanNode(state));
        while (linkedList.size() > 0 && d < this.minProb) {
            NonMarkovOptionScanNode nonMarkovOptionScanNode = (NonMarkovOptionScanNode) linkedList.poll();
            double probabilityOfTermination = nonMarkovOptionScanNode.nSteps > 0 ? option.probabilityOfTermination(nonMarkovOptionScanNode.s, nonMarkovOptionScanNode.episode) : 0.0d;
            if (this.model.terminal(nonMarkovOptionScanNode.s)) {
                probabilityOfTermination = 1.0d;
            }
            double d2 = 1.0d - probabilityOfTermination;
            double pow = Math.pow(this.discount, nonMarkovOptionScanNode.nSteps);
            if (probabilityOfTermination > 0.0d) {
                hashedAggregator.add(this.hashingFactory.hashState(nonMarkovOptionScanNode.s), nonMarkovOptionScanNode.probability * pow * probabilityOfTermination);
                dArr[0] = dArr[0] + (nonMarkovOptionScanNode.cumulativeDiscountedReward * nonMarkovOptionScanNode.probability * probabilityOfTermination);
                d += nonMarkovOptionScanNode.probability;
            }
            if (d2 > 0.0d) {
                for (ActionProb actionProb : option.policyDistribution(nonMarkovOptionScanNode.s, nonMarkovOptionScanNode.episode)) {
                    for (TransitionProb transitionProb : ((FullModel) this.model).transitions(nonMarkovOptionScanNode.s, actionProb.ga)) {
                        double d3 = actionProb.pSelection * transitionProb.p * d2;
                        double d4 = pow * transitionProb.eo.r;
                        if (transitionProb.eo.terminated) {
                            this.srcTerminateStates.add(this.hashingFactory.hashState(transitionProb.eo.op));
                        }
                        linkedList.addLast(new NonMarkovOptionScanNode(nonMarkovOptionScanNode, transitionProb.eo.op, d3, d4, actionProb.ga));
                    }
                }
            }
        }
        return d;
    }
}
