Commit 125e68e1 authored by flforget's avatar flforget
Browse files

Add matlab iLQR

parent 8539ddbe
function newu = ilqr(u, x0, xf, Qf, Q, R, dt, iterations, ethreshold)
% du = ilqr(u, x0, xf, Qf, Q, R, dt, iterations, ethreshold)
%
% run the ilqr algorithm using some initial state, nominal comman signal,
% and cost function. iterate until the maximum iterations are reached
% or until the change in the command signal goes below some threshold.
%
% given -> u
% -> x0 the state of the system at some initial time
% -> xf the requested goal state of the system
% -> Qf the quadratic final cost matrix
% -> Q the quadratic integrated cost matrix on the state
% -> R the quadratic integrated cost matrix on the command
% -> dt the current time step used
% -> iterations the maximum number of iterations to run for
% -> ethreshold stop when the change in u goes below this scalar
%
% returns -> newu the new nominal trajectory given by the algorithm
%
% created by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
% last edited by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
currentu = u;
oldcost = ilqr_cost(u, x0, xf, Qf, Q, R, dt);
for i = 1:iterations
du = ilqr_iteration(currentu, x0, xf, Qf, Q, R, dt);
currentu = currentu + 0.25 * du;
newcost = ilqr_cost(currentu, x0, xf, Qf, Q, R, dt);
if(abs(newcost - oldcost) < ethreshold)
break;
end;
oldcost = newcost;
end;
newu = currentu;
function cost = ilqr_cost(u, x0, xf, Qf, Q, R, dt)
% newx = ilqr_cost(u, x0, xf, Qf, Q, R, dt)
%
% run the system under consideration forward using the command signal u
% and calculate the associated cost defined by xf, Qf, Q, R.
%
% given -> u the command signal with which to control the system
% -> x0 the state of the system at some initial time
% -> xf the desired final state of the system
% -> Qf the quadratic final cost matrix
% -> Q the quadratic integrated cost matrix on the state
% -> R the quadratic integrated cost matrix on the command
% -> dt the current time step used
%
% returns -> the associated cost.
%
% created by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
% last edited by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
x = ilqr_openloop(x0, u, dt);
N = length(x);
cost = (x(N,:) - xf) * Qf * (x(N,:) - xf)';
for i = 1:N-1
cost = cost + (x(i,:)-xf) * Q * (x(i,:)-xf)' + u(i,:) * R * u(i,:)';
end;
cost = (1/2)*cost;
function du = ilqr_iteration(u, x0, xf, Qf, Q, R, dt)
% du = ilqr_iteration(u, x0, xf, Qf, Q, R, dt)
%
% run through one iteration of the ilqr algorithm. the du returned
% can be used to update the nominal trajectory u given to the function.
%
% given -> x0 the state of the system at some initial time
% -> xf the desired final state of the system
% -> Qf the quadratic final cost matrix
% -> Q the quadratic integrated cost matrix on the state
% -> R the quadratic integrated cost matrix on the command
% -> dt the current time step used
%
% returns -> du the update to be applied to the nominal trajectory in ilqr.
%
% created by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
% last edited by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
du = zeros(size(u));
nx = length(x0);
nu = length(u(1,:));
x = ilqr_openloop(x0, u, dt);
N = length(x);
dx = 0.00000000000001*ones(size(x));
A = zeros(nx, nx, N-1);
B = zeros(nx, nu, N-1);
for i = 1:N-1
[A(:,:,i) B(:,:,i)] = ilqr_linearization(x(i,:), u(i,:), dt);
end;
S = zeros(nx, nx, N);
S(:,:,N) = Qf;
nv = nx;
v = zeros(N, nx);
v(N,:) = (Qf*(x(N,:) - xf)')';
K = zeros(nu, nx, N-1);
Kv = zeros(nu, nv, N-1);
Ku = zeros(nu, nu, N-1);
for k = N-1:-1:1
K(:,:,k) = inv(B(:,:,k)' * S(:,:,k+1) * B(:,:,k) + R) * B(:,:,k)' * S(:,:,k+1) * A(:,:,k);
Kv(:,:,k) = inv(B(:,:,k)' * S(:,:,k+1) * B(:,:,k) + R) * B(:,:,k)';
Ku(:,:,k) = inv(B(:,:,k)' * S(:,:,k+1) * B(:,:,k) + R) * R;
S(:,:,k) = A(:,:,k)' * S(:,:,k+1) * (A(:,:,k) - B(:,:,k) * K(:,:,k)) + Q;
v(k,:) = ((A(:,:,k) - B(:,:,k) * K(:,:,k))' * v(k+1,:)' - K(:,:,k)' * R * u(k,:)' + Q * x(k,:)')';
du(k,:) = (-1) * (K(:,:,k)* dx(k,:)' + Kv(:,:,k) * v(k+1,:)' + Ku(:,:,k) * u(k,:)')';
end;
function [A, B] = ilqr_linearization(x, u, dt)
% [A, B] = ilqr_linearization(x, u, dt)
%
% linearizes the system in preparation to apply the ilqr techniques.
%
% given -> x the state of the system at some time
% -> u the command into the system at some time
% -> dt the current time step used
%
% returns -> A the current jacobian Dxf(x,u) as defined by todorov
% -> B the current jacobian Duf(x,u) as defined by todorov
%
% created by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
% last edited by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
A = zeros(length(x), length(x));
B = zeros(length(x), length(u));
% g = 9.8;
% m = 1.0;
% l = 1.0;
% mu = 0.01;
% A = diag([-10.73*dt 0 0 ]);
% A(1,2) = -84.23*dt;
% A(1,3) = -366*dt;
% A(2,1) = 1*dt;
% A(2,3) = 0;
% A(3,1) = 0;
% A(3,2) = 1*dt;
%
% B(1,1) = 1*dt;
% B(2,1) = 0;
% B(3,1) = 0;
%% Continuous Time model
% A = diag([(1-10.73*dt) 1 1]);
% A(1,2) = -10.53*dt;
% A(1,3) = -5.719*dt;
% A(2,1) = 8*dt;
% A(2,3) = 0;
% A(3,1) = 0;
% A(3,2) = 8*dt;
%
% B(1,1) = 2*dt;
% B(2,1) = 0;
% B(3,1) = 0;
%% Continuous Time modelfrm Transfer function
% A = diag([ 1 1 (1-10.73*dt)]);
% A(1,2) = dt;
% A(1,3) = 0;
% A(2,1) = 0;
% A(2,3) = dt;
% A(3,1) = -366*dt;
% A(3,2) = -84.23*dt;
%
% B(1,1) = 0;
% B(2,1) = 0;
% B(3,1) = 207.1*dt;
A = diag([ 1 1 (1-18.69*dt)]);
A(1,2) = dt;
A(1,3) = 0;
A(2,1) = 0;
A(2,3) = dt;
A(3,1) = -1001*dt;
A(3,2) = -148.2*dt;
B(1,1) = 0;
B(2,1) = 0;
B(3,1) = 522.5*dt;
A = zeros(4,4);
B=zeros(4,1);
k=1000.0;
R=200.0;
Jm=138*1e-7;
Jl=0.1;
Cf0=0.1;
a=10.0;
A(1,2) = 1.0;
A(3,3) = 1.0;
A(2,1) = -(k/Jl)+(k/(Jm*R*R));
A(4,1) =1.0/Jl;
A = (A*dt+eye(4));
B =[ 0.0;
k/(R*Jm)*dt;
0.0;
0.0];
%% discrete time model
% A = diag([(1+2.89*dt) 1 1 ]);
% A(1,2) = -1.394*dt;
% A(1,3) = 0.449*dt;
% A(2,1) = 2*dt;
% A(2,3) = 0;
% A(3,1) = 0;
% A(3,2) = 1*dt;
%
% B(1,1) = 0.007812*dt;
% B(2,1) = 0;
% B(3,1) = 0;
%
% Ad = [ 2.89 -1.394 0.4491;
% 2 0 0;
% 0 1 0];
% Bd = [0.007812 0 0];
% Cd = [ 0.004301 0.008373 0.002038];
% Dd = 0;
k=1000.0;
R=200.0;
Jm=138*1e-7;
Jl=0.1;
Cf0=0.1;
a=10.0;
A = [0.0, 1.0, 0.0, 0.0;
-(k/Jl)+(k/(Jm*R*R)), 0.0,0.0,0.0;
0.0,0.0,1.0,0.0;
1.0/Jl,0.0,0.0,0.0];
A = A*dt + eye(4);
B = [0.0;0.0;k/(R*Jm);0.0];
B = B*dt;
\ No newline at end of file
function trajectory = ilqr_openloop(x0, u, dt)
% trajectory = ilqr_openloop(x, u, dt)
%
% run the system forward in open loop from an initial state x0 using
% the control signal u. note that length(trajectory) == length(u) + 1.
%
% given -> x0 the state of the system at some initial time
% -> u the command signal into the system
% -> dt the current time step used
%
% returns -> the trajectory through state space taken by the system.
%
% created by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
% last edited by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
trajectory = zeros(length(u)+1, length(x0));
trajectory(1,:) = x0;
for i = 2:length(trajectory)
trajectory(i,:) = ilqr_system(trajectory(i-1,:), u(i-1,:), dt);
end;
function newx = ilqr_system(x, u, dt)
% newx = ilqr_system(x, u, dt)
%
% run the system under consideration forward and return the new state.
%
% given -> x the state of the system at some time
% -> u the command into the system at some time
% -> dt the current time step used
%
% returns -> newx the new state of the system
%
% created by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
% last edited by timothy lillicrap (tim@biomed.queensu.ca); december 1, 2005
newx = zeros(size(x));
[A B] = ilqr_linearization(x,u,dt);
newx = (A*x' + B*u')';
%% Linear model
% u = zeros(50,2);
% u = ilqr(u, [0 0 0 0], [1 4 0 0], diag([1 1 1 1]),zeros(4),0.001*eye(2), 0.05, 20, 0); %ilqr(u, x0, xf, Qf, Q, R, dt, iterations, ethreshold)
% x = ilqr_openloop([0 0 0 0], u, 0.05);
%% Simple pendulum
% u = zeros(120, 1);
% u = ilqr(u, [pi, 0], [0 0], diag([1 1]), zeros(2), 0.00001*eye(1), 0.005, 20, 0);
% x = ilqr_openloop([pi 0], u, 0.005); plot(x(:,1), x(:,2));
% N = 120;
% u = zeros(N, 1);
%
% %ilqr_system(x0,u, dt);
% x0 = [pi/2 0];
% xf = [0 0];
% Qf = diag([1 1]);
% Q = zeros(2);
% R = 0.00001*eye(1);
% dt = 0.005;
% iterations = 20;
% ethreshold = 0.0000001;
% Timeofsim = 1.0;
%% Pneumaticarm Model
%N = 800;
%u = zeros(N, 1);
%ilqr_system(x0,u, dt);
% x0 = [0 0];
% xf = [1 0];
% Qf = diag([1 1 ]);
% Q = zeros(2);
% R = 0.00001*eye(1);
% dt = 0.005;
% iterations = 20;
% ethreshold = 0.0000000001;
% Timeofsim = 4;
% N = Timeofsim/(10*dt);
% u = zeros(N, 1);
%% Bertrand Model
% g = 9.8;
% m = 2.75;
% link_l = 0.32;
%
% I = 0.25*m*(link_l^2)/3;
% K1 = 2.1794;
% K2 = 1.2698;
% Pm = 2.5 ;
%
% Tmax = 5*K1;
% fk = 0.1*Tmax;
% fs = fk/10;
% fv = fk;
% %Linear around (0,0)
% A = [0 1; (-2*K2*Pm/I - m*g*link_l/I) -fv/I];
% B = [0 ; 2*K1/I];
% C = [1 0 ];
% D = 0;
% ss_linearbertrand = ss(A,B,C,D);
%% Identified Lienar Model
% N = 81;
% u = zeros(N, 1);
%
% %ilqr_system(x0,u, dt);
x0 = [0.0 0.0 0.0 0.0];
xf = [1.0 0.0 0.0 0.0];
Qf = diag([100.0 0.0 0.0 0.0]);
Q = Qf; %zeros(3);
R = 0.1*eye(1);
dt = 0.001;
iterations = 20;
ethreshold = 0.00000001;
Timeofsim = 1;
N = Timeofsim/(100*dt);
N = 30;
u = zeros(N, 1);
%%
xcur = x0;
%% One loop
u = ilqr(u, xcur, xf, Qf, Q, R, dt, iterations, ethreshold);
xtraj = ilqr_openloop(x0, u, dt);
figure(1)
subplot(211), plot(x(:,1))
hold on;
grid on;
xlabel('Time');
ylabel('Angular postion of elbow joint (rad)')
subplot(212), plot(u(:,1))
grid on;
xlabel('Time');
ylabel('Pressure variation (bar)')
set(gca,'FontSize',30,'fontWeight','bold')
%% TIme evolution
% xtime = zeros(Timeofsim/(dt) +1,4);
% utime = zeros(Timeofsim/(dt),1);
% lp =0;
% lpn =1;
% int=1;
% mstep =1;
% finiter = Timeofsim/(dt);
%
% fin = Timeofsim/(dt*N);
% mstep = fin;
% xtime(1,:) = xcur;
% ucur = u;
% %[waypoint, T,Xref] = lsim(ss_linearbertrand, u_ref, time_ref);
% for i = 1:1:finiter-fin
% % xf(:,1) = waypoint(lp)
%
% u = ilqr(ucur, xcur, xf, Qf, Q, R, dt, iterations, ethreshold);
% xtraj = ilqr_openloop(xcur, u, dt) ;%+ 0.001*rand(1);
% % xcur = xtraj(2,:);
% % xtime(i+1,:) = xcur;
% % utime(i+1) = u(1);
% %u = u(1:801-i);
% % lp = lp+1
% %
% % if(lp >= fin)
% % ucur = zeros(N-lpn,1);
% % ucur = u(1:N+1-lpn,1);
% %
% % lpn = lpn+1;
% % lp=0;
% % end
%
%
% end
%
% figure(1)
% subplot(211), plot(xtime(:,1))
% hold on;
% grid on;
% xlabel('Time');
% ylabel('Angular postion of elbow joint (rad)')
%
% subplot(212), plot(utime(:,1))
% grid on;
% xlabel('Time');
% ylabel('Pressure variation (bar)')
% set(gca,'FontSize',30,'fontWeight','bold')
%
% set(findall(gcf,'type','text'),'FontSize',30,'fontWeight','bold')
% figure(2)
% plot(xtime(:,1), xtime(:,2))
% grid on;
%
% xlabel('Angular postion of elbow joint (rad)');
% ylabel('Angular speed of elbow joint (rad)');
%% For Preview gain
% v1 = ones(N, 1);
% v2 = ones(N, 1);
% v3 = ones(N, 1);
%
% v1(:) = K(1,1,:);
% v2(:) = K(1,2,:);
% v1(:) = K(1,3,:);
%
% figure(3)
% plot(v1, 'r')
% hold on
% plot(v2, 'g')
% plot(v3, 'b')
% ss_sys = ss(tf1);
% u = u'
% utraj = zeros(length(u)+1,1);
% utraj(1) = 0;
% for lp =3:1:N+2
% utraj(lp-1) = u(lp-2);
% end
% utraj(1:N) = u(1:N) ;
% utraj(N+1:2*N+1) = u(N);
% utraj;
% Tfinal = dt*(2*N);
% T = 0:dt:Tfinal;
%u = 1+zeros(size(T));
%% Discrete time model
% dsys = c2d(tf1, 0.01, 'zoh');
% ss_dsys = ss(dsys)
% Ad = [ 2.89 -1.394 0.4491;
% 2 0 0;
% 0 1 0];
% Bd = [0.007812 0 0]';
% Cd = [ 0.004301 0.008373 0.002038];
% Dd = 0;
%
% xtrajd = dlsim(Ad, Bd, Cd, Dd, utraj)'
% x0 = [0, 0, 0]';
%
% xtraj = lsim(ss_sys, utraj, T, x0)'
% % plot(xtrajd, 'r')
% plot(xtraj, 'g')
%% Linear model
% u = zeros(50,2);
% u = ilqr(u, [0 0 0 0], [1 4 0 0], diag([1 1 1 1]),zeros(4),0.001*eye(2), 0.05, 20, 0); %ilqr(u, x0, xf, Qf, Q, R, dt, iterations, ethreshold)
% x = ilqr_openloop([0 0 0 0], u, 0.05);
%% Simple pendulum
% u = zeros(120, 1);
% u = ilqr(u, [pi, 0], [0 0], diag([1 1]), zeros(2), 0.00001*eye(1), 0.005, 20, 0);
% x = ilqr_openloop([pi 0], u, 0.005); plot(x(:,1), x(:,2));
% N = 120;
% u = zeros(N, 1);
%
% %ilqr_system(x0,u, dt);
% x0 = [pi/2 0];
% xf = [0 0];
% Qf = diag([1 1]);
% Q = zeros(2);
% R = 0.00001*eye(1);
% dt = 0.005;
% iterations = 20;
% ethreshold = 0.0000001;
% Timeofsim = 1.0;
%% Pneumaticarm Model
%N = 800;
%u = zeros(N, 1);
%ilqr_system(x0,u, dt);
% x0 = [0 0];
% xf = [1 0];
% Qf = diag([1 1 ]);
% Q = zeros(2);
% R = 0.00001*eye(1);
% dt = 0.005;
% iterations = 20;
% ethreshold = 0.0000000001;
% Timeofsim = 4;
% N = Timeofsim/(10*dt);
% u = zeros(N, 1);
%% Bertrand Model
% g = 9.8;
% m = 2.75;
% link_l = 0.32;
%
% I = 0.25*m*(link_l^2)/3;
% K1 = 2.1794;
% K2 = 1.2698;
% Pm = 2.5 ;
%
% Tmax = 5*K1;
% fk = 0.1*Tmax;
% fs = fk/10;
% fv = fk;
% %Linear around (0,0)
% A = [0 1; (-2*K2*Pm/I - m*g*link_l/I) -fv/I];
% B = [0 ; 2*K1/I];
% C = [1 0 ];
% D = 0;
% ss_linearbertrand = ss(A,B,C,D);
%% Identified Lienar Model
% N = 81;
% u = zeros(N, 1);
%
% %ilqr_system(x0,u, dt);
x0 = [0.0 0.0 0.0 0.0];
xf = [1.0 0.0 0.0 0.0];
Qf = diag([100.0 0.0 0.0 0.0]);
Q = Qf; %zeros(3);
R = 0.1*eye(1);
dt = 0.0001;
iterations = 20;
ethreshold = 0.001;
Timeofsim = 1;
N = Timeofsim/(100*dt);
N = 30;
u = zeros(N, 1);
%%
xcur = x0;
%% One loop
u = ilqr(u, xcur, xf, Qf, Q, R, dt, iterations, ethreshold);
x = ilqr_openloop(x0, u, dt);
figure(1)
subplot(211), plot(x(:,1))
hold on;
grid on;
xlabel('Time');
ylabel('Angular postion of elbow joint (rad)')
subplot(212), plot(u(:,1))
grid on;
xlabel('Time');
ylabel('Pressure variation (bar)')
set(gca,'FontSize',30,'fontWeight','bold')
%% TIme evolution
% xtime = zeros(Timeofsim/(dt) +1,4);
% utime = zeros(Timeofsim/(dt),1);