function lqr_solution close all animation = false; global N_phi N_w N_u phi_min phi_max w_min w_max u_min u_max % discretization grid sizes N_phi = 120; N_w = 73; N_u = 5; % range of phi phi_min = -pi; phi_max = +pi; % bounds on omega w_min = -pi; w_max = pi; % bounds on the controls u_min = -1.1; u_max = 1.1; % Task 1: % continous time linearization Ac = [0 1; 2 0]; Bc = [0; 1]; % convert to discrete time system, ts=0.1 sysc = ss(Ac,Bc,eye(2),0); sysd = c2d(sysc,0.1); A = sysd.a; B = sysd.b; % Task 2: Q = [1 0; 0 0]; R = 1; S = [0 0]; % Task 3: N_DP = 60; N = 100; h = 0.12; x0 = [-pi/8;1.2]; P = inf(2,2,N_DP); P(:,:,N_DP) = 0; for k=N_DP:-1:2 Pk = P(:,:,k); P(:,:,k-1) = Q + A'*Pk*A-(S'+A'*Pk*B)*inv(R+B'*Pk*B)*(S+B'*Pk*A); end % Task 4: J1 = inf(N_phi,N_w); for phiZ=1:N_phi for wZ=1:N_w phi = phiZ_to_phi(phiZ); w = wZ_to_w(wZ); J1(phiZ,wZ) = [phi w] * P(:,:,1) * [phi; w]; end end figure(1) [C,handle] = contour3(J1); set(handle,'LevelStep',get(handle,'LevelStep')*0.2) figure(2) [C,handle] = contour3(get_first_J()); set(handle,'LevelStep',get(handle,'LevelStep')*0.2) % Task 5.1: u_star_lin = zeros(N_DP-1,1); x_star_lin = zeros(2,N_DP); x_star_lin(:,1) = x0; % get optimal controls for the linear system for k=1:N_DP-1 u_star_lin(k) = -inv(R+B'*P(:,:,k+1)*B)*(S+B'*P(:,:,k+1)*A) * x_star_lin(:,k); x_star_lin(:,k+1) = A * x_star_lin(:,k) + B * u_star_lin(k); end % Task 5.2: % simulate with linear system if animation figure(3) plot_pendulum(x_star_lin(:,1)); pause(1) for k=1:N_DP-1 plot_pendulum(x_star_lin(:,k+1)); title('linear system'); pause(h) end end % Task 5.3: % simulate with non-linear system x_nonlin = zeros(2,N_DP); x_nonlin(:,1) = x0; figure(3) plot_pendulum(x_star_lin(:,1)); pause(1) for k=1:N_DP-1 phi = x_nonlin(1,k); w = x_nonlin(2,k); x_nonlin(:,k+1) = integrate_rk4([phi;w],u_star_lin(k),h); if animation plot_pendulum(x_nonlin(:,k+1)); title('non-linear system'); pause(h) end end % Task 5.4: % non-linear dynamics w feedback u_star = zeros(N_DP-1,1); x_feedback = zeros(2,N_DP); x_feedback(:,1) = x0; figure(3) plot_pendulum(x_feedback(:,1)); pause(1) for k=1:N_DP-1 phi = x_feedback(1,k); w = x_feedback(2,k); u_star(k) = -inv(R+B'*P(:,:,k+1)*B)*(S+B'*P(:,:,k+1)*A) * [phi;w]; if u_star(k) > u_max u_star(k) = u_max; elseif u_star(k) < u_min u_star(k) = u_min; end [x_feedback(:,k+1), ~] = integrate_rk4([phi;w],u_star(k),h); if animation figure(3) plot_pendulum(x_feedback(:,k+1)); title('feedback control'); pause(h) end end % Task 6: % solve the algebraic Ricatti Equation Palg = inf(2,2); Palg(:,:) = 0; counter = 1; while true P_next = Q + A'*Palg*A-(S'+A'*Palg*B)*inv(R+B'*Palg*B)*(S+B'*Palg*A); if norm(P_next-Palg, 'fro') <= 1e-5 break; end Palg=P_next; counter = counter + 1; end % Task 7: % non-linear dynamics w feedback with ricatti algebraic u_lqr = zeros(N_DP-1,1); x_lqr = zeros(2,N_DP); x_lqr(:,1) = x0; figure(3) plot_pendulum(x_lqr(:,1)); pause(1) for k=1:N_DP-1 phi = x_lqr(1,k); w = x_lqr(2,k); u_lqr(k) = -inv(R+B'*Palg*B)*(S+B'*Palg*A) * [phi;w]; if u_lqr(k) > u_max u_lqr(k) = u_max; elseif u_lqr(k) < u_min u_lqr(k) = u_min; end [x_lqr(:,k+1), ~] = integrate_rk4([phi;w],u_lqr(k),h); if animation figure(3) plot_pendulum(x_lqr(:,k+1)); title('LQR'); pause(h) end end figure(4) subplot(4,2,1) plot(linspace(0,N_DP*h,N_DP), x_star_lin) subplot(4,2,3) plot(linspace(0,N_DP*h,N_DP), x_nonlin) subplot(4,2,5) plot(linspace(0,N_DP*h,N_DP), x_feedback) subplot(4,2,7) plot(linspace(0,N_DP*h,N_DP), x_lqr) subplot(4,2,2) stairs(linspace(0,N_DP*h,N_DP-1), u_star_lin) subplot(4,2,4) stairs(linspace(0,N_DP*h,N_DP-1), u_star_lin) subplot(4,2,6) stairs(linspace(0,N_DP*h,N_DP-1), u_star) subplot(4,2,8) stairs(linspace(0,N_DP*h,N_DP-1), u_lqr) end function first_J = get_first_J() global N_phi N_w N_u phi_min phi_max w_min w_max u_min u_max h = 0.12; N = 60; % precalculate all integrations % for each state and control combination % compute next state, and immediate cost tic PhiNext = zeros(N_phi,N_w,N_u); WNext = zeros(N_phi,N_w,N_u); L = zeros(N_phi,N_w,N_u); for phiZ=1:N_phi for wZ=1:N_w for u_int=1:N_u [phi_int_next, w_int_next, cost] = integrate_Z(phiZ, wZ, u_int,h); PhiNext(phiZ,wZ,u_int) = phi_int_next; WNext(phiZ,wZ,u_int) = w_int_next; L(phiZ,wZ,u_int) = cost; end end end toc % DYNAMIC PROGRAMMING % backward recursion % initialize with infinite cost-to-go for all iterations J = inf*ones(N_phi,N_w,N); % initialize final costs to be 0 J(:,:,N) = 0; % iterate backwards over time from N-1 to 1 for k=N-1:-1:1 % iterate over all states on the grid for phiZ=1:N_phi for wZ=1:N_w % iterate over all controls for u_int=1:N_u % update the cost-to-go for this state % use the lookup tables for efficiency phi_int_next = PhiNext(phiZ,wZ,u_int); w_int_next = WNext(phiZ,wZ,u_int); sum_cost = J(phi_int_next,w_int_next, k+1) + L(phiZ,wZ,u_int); if (sum_cost < J(phiZ,wZ, k)) J(phiZ,wZ, k) = sum_cost; end end end end end first_J = J(:,:,1); end %%%%%%%%%%%%%%%%%%%%%% LOCAL FUNCTIONS %%%%%%%%%%%%%%%%%% function [phiZ_next, wZ_next, cost] = integrate_Z_unconstr(phiZ,wZ,uZ,h) global N_phi w_min w_max phi = phiZ_to_phi(phiZ); w = wZ_to_w(wZ); u = uZ_to_u(uZ); x = [phi;w]; [x_next,cost] = integrate_rk4(x,u,h); phi_next = x_next(1); w_next = x_next(2); % if w_next > w_max % w_next=w_min; % cost = inf; % elseif w_next < w_min % w_next=w_min; % cost = inf; % end phiZ_next = mod(phi_to_phiZ(phi_next)-1, N_phi)+1; wZ_next = w_to_wZ(w_next); end function [phiZ_next, wZ_next, cost] = integrate_Z(phiZ,wZ,uZ,h) global N_phi w_min w_max phi = phiZ_to_phi(phiZ); w = wZ_to_w(wZ); u = uZ_to_u(uZ); x = [phi;w]; [x_next,cost] = integrate_rk4(x,u,h); phi_next = x_next(1); w_next = x_next(2); if w_next > w_max w_next=w_min; cost = inf; elseif w_next < w_min w_next=w_min; cost = inf; end phiZ_next = mod(phi_to_phiZ(phi_next)-1, N_phi)+1; wZ_next = w_to_wZ(w_next); end function phi = phiZ_to_phi(phiZ) global phi_min phi_max N_phi phi = phi_min + phiZ*(phi_max-phi_min) / (N_phi); end function w = wZ_to_w(wZ) global w_min w_max N_w w = w_min + wZ*(w_max-w_min) / (N_w-1); end function u = uZ_to_u(uZ) global u_min u_max N_u u = u_min + uZ*(u_max-u_min) / (N_u-1); end function phi_int = phi_to_phiZ(phi) global phi_min phi_max N_phi phi_int = floor(((phi - phi_min) / (phi_max-phi_min)) * (N_phi) + (phi_max-phi_min)/N_phi)+1; end function w_int = w_to_wZ(w) global w_min w_max N_w w_int = round(((w - w_min) / (w_max-w_min)) * (N_w-1))+1; end function u_int = u_to_uZ(u) global u_min u_max N_u u_int = round(((u- u_min) / (u_max-u_min)) * (N_u-1))+1; end function [ x_next, q ] = integrate_rk4(x,u,h) [k1,q1] = ode(x,u); [k2,q2] = ode(x+h/2.*k1,u); [k3,q3] = ode(x+h/2.*k2,u); [k4,q4] = ode(x+h*k3,u); x_next = x + h/6. * (k1+2*k2+2*k3+k4); q = h/6. * (q1+2*q2+2*q3+q4); end function [dx,dq]=ode(x,u) dx = [x(2); 2*sin(x(1)) + u]; dq = x(1)^2 + u^2; end function plot_pendulum(x) phi = x(1); plot([0;sin(phi)], [0;cos(phi)], '-o') xlim([-1,1]) ylim([-1,1]) end