Source code for irlc.ex04.model_pendulum

# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import sympy as sym
from irlc.ex03.control_model import ControlModel
from irlc.ex03.control_cost import SymbolicQRCost
from irlc.ex04.discrete_control_model import DiscreteControlModel
import gymnasium as gym
from gymnasium.spaces.box import Box
from irlc.ex04.control_environment import ControlEnvironment
import numpy as np

"""
SEE: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
https://github.com/openai/gym/blob/master/gym/envs/classic_control/pendulum.py
"""
class PendulumModel(ControlModel):
    state_labels= [r"$\theta$", r"$\frac{d \theta}{dt}$"]
    action_labels = ['Torque $u$']
    x_upright, x_down = np.asarray([0.0, 0.0]), np.asarray([np.pi, 0.0])

    def __init__(self, l=1., m=.8, friction=0.0, max_torque=6.0, transform_coordinates=False): 
        self.l, self.m, self.max_torque = l, m, max_torque
        assert not transform_coordinates
        super().__init__() 
        self.friction = friction
        self._u_prev = None                        # For rendering
        self.cp_render = {}
        assert friction == 0.0

    def sym_f(self, x, u, t=None): 
        l, m = self.l, self.m
        g = 9.82
        theta_dot = x[1]  # Parameterization: x = [theta, theta']
        theta_dot_dot =  g/l * sym.sin(x[0]) + 1/(m*l**2) * u[0]
        return [theta_dot, theta_dot_dot]

    def get_cost(self) -> SymbolicQRCost:
        return SymbolicQRCost(R=np.ones((1, 1)), Q=np.eye(2))  

    def tF_bound(self) -> Box: 
        return Box(0.5, 4, shape=(1,), dtype=float)

    def t0_bound(self) -> Box:
        return Box(0, 0, shape=(1,), dtype=float)

    def x_bound(self) -> Box:
        return Box(np.asarray( [-2 * np.pi, -np.inf]), np.asarray( [2 * np.pi, np.inf]), dtype=float)

    def u_bound(self) -> Box:
        return Box(np.asarray([-self.max_torque]), np.asarray([self.max_torque]), dtype=float)

    def x0_bound(self) -> Box:
        return Box(np.asarray( [np.pi, 0] ), np.asarray( [np.pi, 0]), dtype=float)

    def xF_bound(self) -> Box:
        return Box(np.asarray([0, 0]), np.asarray([0, 0]), dtype=float)         

    def close(self):
        for r in self.cp_render.values():
            r.close()

    def render(self, x, render_mode="human"):
        if render_mode not in self.cp_render: # is None or self.cp_render[1] != render_mode:
            self.cp_render[render_mode] = gym.make("Pendulum-v1", render_mode=render_mode)  # environment only used for rendering. Change to v1 in gym 0.26.
            self.cp_render[render_mode].max_time_limit = 10000
            self.cp_render[render_mode].reset()
        self.cp_render[render_mode].unwrapped.state = np.asarray(x)  # environment is wrapped
        self.cp_render[render_mode].unwrapped.last_u = self._u_prev[0] if self._u_prev is not None else None
        return self.cp_render[render_mode].render()

[docs] class SinCosPendulumModel(PendulumModel):
[docs] def phi_x(self, x): theta, theta_dot = x[0], x[1] return [sym.sin(theta), sym.cos(theta), theta_dot]
[docs] def phi_x_inv(self, x): sin_theta, cos_theta, theta_dot = x[0], x[1], x[2] theta = sym.atan2(sin_theta, cos_theta) # Obtain angle theta from sin(theta),cos(theta) return [theta, theta_dot]
[docs] def phi_u(self, u): return [sym.atanh(u[0] / self.max_torque)]
[docs] def phi_u_inv(self, u): return [sym.tanh(u[0]) * self.max_torque]
[docs] def u_bound(self) -> Box: return Box(np.asarray([-np.inf]), np.asarray([np.inf]), dtype=float)
class DiscreteSinCosPendulumModel(DiscreteControlModel): state_labels = [r'$\sin(\theta)$', r'$\cos(\theta)$', r'$\dot{\theta}$'] # Check if this escape character works. action_labels = ['Torque $u$'] def __init__(self, dt=0.02, cost=None, **kwargs): model = SinCosPendulumModel(**kwargs) self.max_torque = model.max_torque # self.transform_actions = transform_actions super().__init__(model=model, dt=dt, cost=cost) self.x_upright = np.asarray(self.phi_x(model.x_upright)) self.l = model.l # Pendulum length if cost is None: cost = _pendulum_cost(self) self.cost = cost def _pendulum_cost(model : DiscreteSinCosPendulumModel): # Helper method to set a discrete cost. from irlc.ex04.discrete_control_cost import DiscreteQRCost Q = np.eye(model.state_size) Q[0, 1] = Q[1, 0] = model.l Q[0, 0] = Q[1, 1] = model.l ** 2 Q[2, 2] = 0.0 R = np.array([[0.1]]) * 10 c0 = DiscreteQRCost(Q=np.zeros((model.state_size,model.state_size)), R=R) c0 = c0 + c0.goal_seeking_cost(Q=Q, x_target=model.x_upright) c0 = c0 + c0.goal_seeking_terminal_cost(xN_target=model.x_upright) * 1000 return c0 * 2 class ThetaPendulumEnvironment(ControlEnvironment): def __init__(self, Tmax=5, render_mode=None): dt = 0.02 discrete_model = DiscreteControlModel(PendulumModel(), dt=dt) super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode) class GymSinCosPendulumEnvironment(ControlEnvironment): def __init__(self, *args, Tmax=5, supersample_trajectory=False, render_mode=None, **kwargs): discrete_model = DiscreteSinCosPendulumModel(*args, **kwargs) self.action_space = Box(low=-np.inf, high=np.inf, shape=(discrete_model.action_size,), dtype=float) self.observation_space = Box(low=-np.inf, high=np.inf, shape=(discrete_model.state_size,), dtype=float) super().__init__(discrete_model, Tmax=Tmax, supersample_trajectory=supersample_trajectory, render_mode=render_mode) if __name__ == "__main__": model = SinCosPendulumModel(l=1, m=1) print(str(model)) print(f"Pendulum with l={model.l}, m={model.m}") x = [1,2] u = [0] # Input state/action. # x_dot = ... # TODO: 1 lines missing. raise NotImplementedError("Compute dx/dt = f(x, u, t=0) here using the model-class defined above") # x_dot_numpy = ... # TODO: 1 lines missing. raise NotImplementedError("Compute dx/dt = f(x, u, t=0) here using numpy-expressions you write manually.") print(f"Using model-class: dx/dt = f(x, u, t) = {x_dot}") print(f"Using numpy: dx/dt = f(x, u, t) = {x_dot_numpy}")