Source code for rofunc.planning_control.lqr.ilqr

"""
    iLQR for a viapoints task (batch formulation)

    Refers to https://gitlab.idiap.ch/rli/robotics-codes-from-scratch by Dr. Sylvain Calinon
"""
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
from rofunc.config.utils import get_config
from omegaconf import DictConfig


[docs]class iLQR:
    def __init__(self, cfg):
        self.cfg = cfg

[docs]    def logmap_2d(self, f, f0):
        position_error = f[:, :2] - f0[:, :2]
        orientation_error = np.imag(np.log(np.exp(f0[:, -1] * 1j).conj().T * np.exp(f[:, -1] * 1j).T)).conj().reshape(
            (-1, 1))
        error = np.hstack([position_error, orientation_error])
        return error

[docs]    def fkin0(self, x):
        L = np.tril(np.ones([self.cfg.nbVarX, self.cfg.nbVarX]))
        f = np.vstack([
            L @ np.diag(self.cfg.l) @ np.cos(L @ x),
            L @ np.diag(self.cfg.l) @ np.sin(L @ x)
        ]).T
        f = np.vstack([np.zeros(2), f])
        return f

[docs]    def fk(self, x):
        """
        Forward kinematics for end-effector (in robot coordinate system)
        $f(x_t) = x_t$
        """
        # f = x
        x = x.T
        L = np.tril(np.ones([self.cfg.nbVarX, self.cfg.nbVarX]))
        f = np.vstack([
            self.cfg.l @ np.cos(L @ x),
            self.cfg.l @ np.sin(L @ x),
            np.mod(np.sum(x, 0) + np.pi, 2 * np.pi) - np.pi
        ])  # f1,f2,f3, where f3 is the orientation (single Euler angle for planar robot)
        return f.T

[docs]    def Jacobian(self, x):
        """
        Jacobian with analytical computation (for single time step)
        $J(x_t)= \dfrac{\partial{f(x_t)}}{\partial{x_t}}$
        """
        # J = np.identity(len(x))
        L = np.tril(np.ones([self.cfg.nbVarX, self.cfg.nbVarX]))
        J = np.vstack([
            -np.sin(L @ x).T @ np.diag(self.cfg.l) @ L,
            np.cos(L @ x).T @ np.diag(self.cfg.l) @ L,
            np.ones(self.cfg.nbVarX)
        ])
        return J

[docs]    def f_reach(self, robot_state, Mu, Rot, specific_robot=None):
        """
        Error and Jacobian for a via-points reaching task (in object coordinate system)
        Args:
            cfg:
            robot_state: joint state or Cartesian pose
        Returns:

        """
        if specific_robot is not None:
            ee_pose = specific_robot.fk(robot_state)
        else:
            ee_pose = self.fk(robot_state)
        f = self.logmap_2d(ee_pose, Mu)
        J = np.zeros([self.cfg.nbPoints * self.cfg.nbVarF, self.cfg.nbPoints * self.cfg.nbVarX])
        for t in range(self.cfg.nbPoints):
            f[t, :2] = Rot[t].T @ f[t, :2]  # Object-oriented forward kinematics
            Jtmp = self.Jacobian(robot_state[t])
            Jtmp[:2] = Rot[t].T @ Jtmp[:2]  # Object centered Jacobian

            if self.cfg.useBoundingBox:
                for i in range(2):
                    if abs(f[t, i]) < self.cfg.sz[i]:
                        f[t, i] = 0
                        Jtmp[i] = 0
                    else:
                        f[t, i] -= np.sign(f[t, i]) * self.cfg.sz[i]

            J[t * self.cfg.nbVarF:(t + 1) * self.cfg.nbVarF, t * self.cfg.nbVarX:(t + 1) * self.cfg.nbVarX] = Jtmp
        return f, J

[docs]    def get_matrices(self):
        # Precision matrix
        Q = np.identity(self.cfg.nbVarF * self.cfg.nbPoints)

        # Control weight matrix
        R = np.identity((self.cfg.nbData - 1) * self.cfg.nbVarU) * self.cfg.rfactor

        # Time occurrence of via-points
        tl = np.linspace(0, self.cfg.nbData, self.cfg.nbPoints + 1)
        tl = np.rint(tl[1:]).astype(np.int64) - 1
        idx = np.array([i + np.arange(0, self.cfg.nbVarX, 1) for i in (tl * self.cfg.nbVarX)])
        return Q, R, idx, tl

[docs]    def set_dynamical_system(self):
        # Transfer matrices (for linear system as single integrator)
        Su0 = np.vstack([np.zeros([self.cfg.nbVarX, self.cfg.nbVarX * (self.cfg.nbData - 1)]),
                         np.tril(np.kron(np.ones([self.cfg.nbData - 1, self.cfg.nbData - 1]),
                                         np.eye(self.cfg.nbVarX) * self.cfg.dt))])
        Sx0 = np.kron(np.ones(self.cfg.nbData), np.identity(self.cfg.nbVarX)).T
        return Su0, Sx0

[docs]    def get_u_x(self, Mu: np.ndarray, Rot: np.ndarray, u: np.ndarray, x0: np.ndarray, Q: np.ndarray,
                R: np.ndarray, Su0: np.ndarray, Sx0: np.ndarray, idx: np.ndarray, tl: np.ndarray):
        Su = Su0[idx.flatten()]  # We remove the lines that are out of interest

        for i in range(self.cfg.nbIter):
            x = Su0 @ u + Sx0 @ x0  # System evolution
            x = x.reshape([self.cfg.nbData, self.cfg.nbVarX])
            f, J = self.f_reach(x[tl], Mu, Rot)  # Residuals and Jacobians
            du = np.linalg.inv(Su.T @ J.T @ Q @ J @ Su + R) @ (
                    -Su.T @ J.T @ Q @ f.flatten() - u * self.cfg.rfactor)  # Gauss-Newton update
            # Estimate step size with backtracking line search method
            alpha = 2
            cost0 = f.flatten() @ Q @ f.flatten() + np.linalg.norm(u) ** 2 * self.cfg.rfactor  # Cost
            while True:
                utmp = u + du * alpha
                xtmp = Su0 @ utmp + Sx0 @ x0  # System evolution
                xtmp = xtmp.reshape([self.cfg.nbData, self.cfg.nbVarX])
                ftmp, _ = self.f_reach(xtmp[tl], Mu, Rot)  # Residuals
                cost = ftmp.flatten() @ Q @ ftmp.flatten() + np.linalg.norm(utmp) ** 2 * self.cfg.rfactor  # Cost
                if cost < cost0 or alpha < 1e-3:
                    u = utmp
                    print("Iteration {}, cost: {}".format(i, cost))
                    break
                alpha /= 2
            if np.linalg.norm(du * alpha) < 1E-2:
                break  # Stop iLQR iterations when solution is reached
        return u, x

[docs]    def solve(self, Mu, Rot, u0, x0, for_test=False):
        Q, R, idx, tl = self.get_matrices()
        Su0, Sx0 = self.set_dynamical_system()
        u, x = self.get_u_x(Mu, Rot, u0, x0, Q, R, Su0, Sx0, idx, tl)
        self.vis(Mu, Rot, x, tl, for_test=for_test)

[docs]    def vis(self, Mu, Rot, x, tl, for_test):
        plt.figure()
        plt.axis('off')
        plt.gca().set_aspect('equal', adjustable='box')

        # Get points of interest
        f00 = self.fkin0(x[0, :])
        f01 = self.fkin0(x[tl[0], :])
        f02 = self.fkin0(x[tl[1], :])
        f = self.fk(x)

        plt.plot(f00[:, 0], f00[:, 1], c='black', linewidth=5, alpha=.2)
        plt.plot(f01[:, 0], f01[:, 1], c='black', linewidth=5, alpha=.4)
        plt.plot(f02[:, 0], f02[:, 1], c='black', linewidth=5, alpha=.6)
        plt.plot(f[:, 0], f[:, 1], c='black', marker='o', markevery=[0] + tl.tolist())

        # Plot bounding box or viapoints
        ax = plt.gca()
        color_map = ['deepskyblue', 'darkorange']
        for t in range(self.cfg.nbPoints):
            if self.cfg.useBoundingBox:
                rect_origin = Mu[t, :2] - Rot[t, :, :] @ np.array(self.cfg.sz)
                rect_orn = Mu[t, -1]
                rect = patches.Rectangle(rect_origin, self.cfg.sz[0] * 2, self.cfg.sz[1] * 2, np.degrees(rect_orn),
                                         color=color_map[t])
                ax.add_patch(rect)
            else:
                plt.scatter(Mu[t, 0], Mu[t, 1], s=100, marker='X', c=color_map[t])

        if not for_test:
            plt.show()