We demonstrate a comparison among various implementations of Rolling Regression in SAS and show that the fastest implementation is over 3200X faster than traditional BY-processing approach.

More often than not, we encounter a problem where an OLS over a rolling time window is required, see , , , , , , , for a few examples.

One solution is to resort to SAS MACRO, but it is extremely inefficient and can't handle large dataset in reality, . This method is shown below as Method. It couldn't finish the test in allowable time using the sample data below.

The other common solution is to use the BY-processing capability of PROC REG after re-shaping the data into appropriate format, see , . This method is demonstrated as Method below. While certainly much better than above one, it is still not the fastest and requires more memory.

The third solution comes to play by recognizing that in OLS, what you need is the SSCP and you can easily build up the SSCP over rolling time window by resorting to PROC EXPAND. This is demonstrated as Method below. This approach will further improve the speed but still requires large amount of memory if the data is big and many rolling windows are generated.

Since what we need to do is to build the SSCP matrix and obtain the coefficient estimates based on the informaiton in SSCP, we can certainly code this in a DATA Step using ADJUST operator, which provides a solution that is both fast and low memory occupancy. See  for an introduction to ADJUST operator. To make this even faster, a modification of ADJUST operator, the SWEEP operator, can be used. For an introduction to SWEEP operator, see , . In the code below, Method implements the ADJUST operator, while Method implements the SWEEP operator.

The experiment literally runs 499980 regressions each with 20 observations and 2 predictors, and the results are shown below:

Real Time    |        CPU Time          | Memory
=====================================================

Method 0 |    1.01 (seconds)   |    1.01 (seconds)        |    611K

Method 1 |    0.25 (seconds)   |    0.24 (seconds)        |    432K

Method 2 |    1.61 (seconds)   |    0.94 (seconds)        | 50381K

Method 3 |  80.54 (seconds)   |   79.61 (seconds)       |  2322K

Method 4 |         Failed           |           Failed               |    Failed
=====================================================

Reference:
MYSAS.NET, http://www.mysas.net/forum/viewtopic.php?f=4&t=8070
MYSAS.NET, http://www.mysas.net/forum/viewtopic.php?f=4&t=7898
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0604D&L=sas-l&P=R32485
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0704C&L=sas-l&P=R3305
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0802C&L=sas-l&P=R9746
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0801C&L=sas-l&P=R14671
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0810A&L=sas-l&P=R19135
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0802C&L=sas-l&P=R13489
Michael D Boldin, "Programming Rolling Regressions in SAS", Proceedings of NESUG, 2007
SAS-L, http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0604D&L=sas-l&D=0&P=56926
J. H. Goodnight, "The Sweep Operator: Its Importance in Statistical Computing", SAS Tech Report R-106, 1978
Kenneth Lange, "Numerical Analysis for Statisticians", Springer, 1998

proc datasets library=work kill; run;

options fullstimer;
data test;
do seq=1 to 500000;
x1=rannor(9347957);
*x2=rannor(876769)+0.1*x1;
epsilon=rannor(938647)*0.5;
y = 1.5 + 0.5*x1 +epsilon;
output;
end;
run;

/* Method 0.*/
data res0;
set test;
array _x{3,3} _temporary_ ;
array _a{3,3} _temporary_ ;
array _tempval{5, 20} _temporary_ ;
m=mod(_n_-1, 20)+1;
_tempval[1, m]=x1;
_tempval[2, m]=y;
_tempval[3, m]=x1**2;
_tempval[4, m]=x1*y;
_tempval[5, m]=y**2;

if _n_>=20 then do;
if _n_>20 then do;
m2=mod(_n_-20, 20)+1;
_x[1,2]+(-_tempval[1, m2]);
_x[1,3]+(-_tempval[2, m2]);
_x[2,2]+(-_tempval[3, m2]);
_x[2,3]+(-_tempval[4, m2]);
_x[3,3]+(-_tempval[5, m2]);
end;
do i=1 to dim(_a, 1);
do j=1 to dim(_a, 2);
_a[i, j]=_x[i, j];
end;
end;

do k=1 to dim(_a, 1)-1;
end;
Intercept=_a[1,3]; beta=_a[2,3];
keep seq intercept beta;
output;
end;

return;
filler:
_x[1,1]=20; _x[1,2]+x1; _x[1,3]+y;
_x[2,2]+_tempval[3,m]; _x[2,3]+_tempval[4,m]; _x[3,3]+_tempval[5,m];
_x[2,1]=_x[1,2]; _x[3,1]=_x[1,3]; _x[3,2]=_x[2,3];
return;

B=_a[k, k];
do j=1 to dim(_a, 2);
_a[k, j]=_a[k, j]/B;
end;
do i=1 to dim(_a, 1);
if i ^=k then do;
B=_a[i, k];
do j=1 to dim(_a, 2);
_a[i, j]=_a[i, j]-B*_a[k, j];
end;
end;
end;
return;

run;
sasfile test close;

/* Method 1.*/

data rest0;
set test;
array _x{4} _temporary_;
array _a{2,20} _temporary_;
m=mod(_n_-1, 20)+1;
_a[1, m]=x1; _a[2,m]=y;

m2=mod(_n_-20, 20)+1;
if _n_>=20 then do;
if _n_>20 then do;
end;
beta=(_x-_x*_x/20)/(_x-_x**2/20);
intercept=_x/20 - beta*_x/20;
keep seq intercept beta ;
output;
end;
return;
filler:
_x+x1;
_x+x1*y;
_x+x1**2;
_x+y;
return;
deduct:
_x=_x-_a[1,m2];
_x=_x-_a[1,m2]*_a[2,m2];
_x=_x-_a[1,m2]**2;
_x=_x-_a[2,m2];
return;
run;
sasfile test close;

/* Method 2.*/

%macro wrap;
%let window=20;
%let diff=%eval(&window-0);
data testv/view=testv;
set test;
xy=x1*y;
run;

proc expand data=testv method=none out=summary(keep=seq sumxy sumx1 sumy ussx1 may max);
convert x1=sumx1/transformout=(movsum &diff);
convert xy=sumxy/transformout=(movsum &diff);
convert x1=ussx1/transformout=(movuss &diff);
convert y =sumy /transformout=(movsum &diff);
convert y =may / transformout=(movave &diff);
convert x1 =max / transformout=(movave &diff);
run;

data result1;
set summary(firstobs=&window);
beta = (sumxy - sumx1*sumy/&window)/(ussx1 - sumx1/&window.*sumx1);
alpha= may - beta*max;
keep seq beta alpha;
run;
%mend;

%let t0=%sysfunc(datetime(), datetime24.);
*options nosource nonotes;
%wrap;
options source notes;
%let t1=%sysfunc(datetime(), datetime24.);
%put Start @ &t0;
%put End @ &t1;

/* Method 3.*/
%let t0=%sysfunc(datetime(), datetime.);

data test2v/view=test2v;
set test;
array _x{2, 20} _temporary_ (20*0 20*0);
k=mod(_n_-1, 20)+1;
_x[1, k]=x1; _x[2, k]=y;
if _n_>=20 then do;
do j=1 to dim(_x, 2);
x=_x[1, j]; y=_x[2, j];
output;
keep seq x y;
end;
end;
run;

ods select none;
proc reg data=test2v outest=res2(keep=seq x intercept);
by seq;
model y = x;
run;quit;
ods select all;

%let t1=%sysfunc(datetime(), datetime.);
%put Start @ &t0;
%put End @ &t1;

/* Method 4. */
%macro wrap;
options nonotes;
ods select none;
%do i=20 %to 500000;
%let fo=%eval(&i-19);
proc reg data=test(firstobs=&fo obs=&i) outest=_xres(keep=x1 intercept);
model y =x1;
run;quit;
%if %eval(&i=20) %then %do;
data res3; set _xres; run;
%end;
%else %do;
proc append base=res3 data=_xres; run;
%end;
%end;

ods select all;
data res3;
set res3;
time=19+_n_;
run;
options notes;
%mend;

%let t0=%sysfunc(datetime(), datetime.);
%wrap;
%let t1=%sysfunc(datetime(), datetime.);
%put Start @ &t0;
%put End @ &t1; 