import matplotlib.pyplot as plt
import numpy as np

from utils import ode, RK4_integrator, project
from plot_utils import plot_closed_loop_simulation, animate_pendulum


# the data.npz file is generated by running dynamic_programming.py
with np.load('data.npz') as data:
    Ts = data["Ts"]
    nSteps = data["nSteps"]*2
    Q = data["Q"]
    R = data["R"]
    K = data["K"]
    x1_values = data["x1_values"]
    x2_values = data["x2_values"]
    u_max = data["u_max"]
    u_map = data["u_map"]


# Closed-loop simulation
t_grid = [0]
Tf = 5
x0 = np.vstack(([np.pi, 0]))

cost_LQR = [0]
state_LQR = [x0]
control_LQR = []

cost_DP = [0]
state_DP = [x0]
control_DP = []


while t_grid[-1] < Tf:
    # optimal feedback law from LQR
    # TODO:
    # don't forget to clip to the contraints
    x_LQR = ...
    u_LQR = ...
    control_LQR.append(u_LQR)
    if np.ndim(u_LQR) == 0:
        u_LQR = np.array([u_LQR])
    cost_LQR.append((u_LQR.T@R@u_LQR + x_LQR.T@Q@x_LQR + cost_LQR[-1]).item())

    # apply LQR control to nonlinear system
    x_next, _, _ = RK4_integrator(x_LQR, u_LQR, Ts, nSteps, ode)
    state_LQR.append(x_next)

    # optimal feedback law from DP   
    # TODO:
    x_DP = ...
    u_DP = ...
    control_DP.append(u_DP)
    if np.ndim(u_DP) == 0:
        u_DP = np.array([u_DP])
    cost_DP.append((u_DP.T@R@u_DP + x_DP.T@Q@x_DP + cost_DP[-1]).item())

    # apply DP control to nonlinear system
    x_next, _, _ = RK4_integrator(x_DP, u_DP, Ts, nSteps, ode)
    state_DP.append(x_next)
    
    # next time step
    t_grid.append(t_grid[-1] + Ts)


# and visualize result
state_LQR = np.hstack(state_LQR)
state_DP = np.hstack(state_DP)
plot = plot_closed_loop_simulation(
    t_grid, cost_LQR, state_LQR, control_LQR, cost_DP, state_DP, control_DP
)
plt.show()
