Source code for torchrl.envs.base_env

from abc import ABC, abstractmethod


[docs]class BaseEnv(ABC):
    """
    Abstract base class used for implementing new environments.

    Includes some basic functionalities, like the option to use a running mean
    and standard deviation for normalizing states.

    Parameters
    ----------
    env_name: str
        The environment name.
    fixed_normalize_states: bool
        If True, use the state min and max value to normalize the states (Default is False).
    running_normalize_states: bool
        If True, use the running mean and std to normalize the states (Default is False).
    scale_reward: bool
        If True, use the running std to scale the rewards (Default is False).
    """

    def __init__(self, env_name):
        self.env_name = env_name
        self.env = self._create_env()

    def __str__(self):
        return "<{}>".format(type(self).__name__)

[docs]    @abstractmethod
    def get_state_info(self):
        """
        Returns a dict containing information about the state space.

        The dict should contain two keys: ``shape`` indicating the state shape,
        and ``dtype`` indicating the state type.

        Example
        -------
        State space containing 4 continuous actions::

            return dict(shape=(4,), dtype='continuous')
        """

[docs]    @abstractmethod
    def get_action_info(self):
        """
        Returns a dict containing information about the action space.

        The dict should contain two keys: ``shape`` indicating the action shape,
        and ``dtype`` indicating the action type.

        If dtype is ``int`` it will be assumed a discrete action space.

        Example
        -------
        Action space containing 4 float numbers::

            return dict(shape=(4,), dtype='float')
        """

    @property
    @abstractmethod
    def simulator(self):
        """
        Returns the name of the simulator being used as a string.
        """

[docs]    @abstractmethod
    def _create_env(self):
        """
        Creates ans returns an environment.

        Returns
        -------
            Environment object.
        """

[docs]    @abstractmethod
    def reset(self):
        """
        Resets the environment to an initial state.

        Returns
        -------
        numpy.ndarray
            A numpy array with the state information.
        """

[docs]    @abstractmethod
    def step(self, action):
        """
        Receives an action and execute it on the environment.

        Parameters
        ----------
        action: int or float or numpy.ndarray
            The action to be executed in the environment, it should be an ``int``
            for discrete enviroments and ``float`` for continuous. There's also
            the possibility of executing multiple actions (if the environment
            supports so), in this case it should be a ``numpy.ndarray``.

        Returns
        -------
        next_state: numpy.ndarray
            A numpy array with the state information.
        reward: float
            The reward.
        done: bool
            Flag indicating the termination of the episode.
        info: dict
            Dict containing additional information about the state.
        """

    @property
    def num_lives(self):
        raise NotImplementedError

    @property
    def unwrapped(self):
        return self

    def sample_random_action(self):
        raise NotImplementedError

    def record(self, path):
        raise NotImplementedError

    def close(self):
        raise NotImplementedError

[docs]    def update_config(self, config):
        """
        Updates a Config object to include information about the environment.

        Parameters
        ----------
        config: Config
            Object used for storing configuration.
        """
        config.new_section(
            "env",
            obj=dict(func=self.simulator, env_name=self.env_name),
            state_info=dict(
                (key, value)
                for key, value in self.get_state_info().items()
                if key not in ("low_bound", "high_bound")
            ),
            action_info=self.get_action_info(),
        )