%let wrds=wrds.wharton.upenn.edu 4016;options comamid=TCP remote=WRDS; signon username=_prompt_; rsubmit; libname temp '/sastemp3/'; /* *********************set up for crsp compustat merge*************************************; %let beg_yr = 1991; %let end_yr = 2005; *get all firms in date range; proc sort data=crsp.CSTLINK2 out=lnk; where LINKTYPE in ("LU", "LC", "LD", "LF", "LN", "LO", "LS", "LX") and (&end_yr+1 >= year(LINKDT) or LINKDT = .B) and (&beg_yr-1 <= year(LINKENDDT) or LINKENDDT = .E); by GVKEY LINKDT; run; *get compustat data without missing variables all firms of interest are likely to have; proc sql; create table compdata as select lnk.gvkey,lnk.npermno as permno,yeara,fyenddt,fyr,dnum,cnum,coname,smbl, data25*data199 as mvefye,data45 as advx,data46 as rd, data12 as sales,data18 as nibe,data41 as cogs,data8 as ppe, data189 as sga,data84 as caplease,data204 as goodwill,data33 as intangibles, data352 as otherintangibles, data308 as cfo from lnk,crsp.cstann as cst where lnk.gvkey=cst.gvkey and finc=00 and (&beg_yr <= YEARA <= &end_yr) and (LINKDT <= cst.FYENDDT or LINKDT = .B) and (cst.FYENDDT <= LINKENDDT or LINKENDDT = .E) and npermno ne . and cstann.gvkey ne . and not 4000<=dnum<=4999 and not 6000<=dnum<=6999 and not (missing(data45)) and not (missing(data12)) and not (missing(data18)) and not (missing(data41)) and not (missing(data8)) and not (missing(data189)) and not (missing(data308)) order by npermno,yeara; quit; *format data, if other missing variable set the variables to 0, also create an identifier starting with a character and sic2; data compdata; set compdata; array missvar{*} rd caplease goodwill intangibles otherintangibles; do i=1 to dim(missvar); if missvar(i) eq . then missvar(i)=0; end; *create identifier; permnoc=put(permno,5.); yearac=put(yeara,4.); dmu=smbl||permnoc||yearac; *create sic2; if (dnum gt 900) then dnum4=dnum; if (dnum le 900) then dnum3=dnum; dnumc4=put(dnum4,4.); dnumc3=put(dnum3,3.); p=0; pre=put(p,1.); dnumc34=pre||dnumc3; if (dnum gt 900) then dnumc=dnumc4; if (dnum le 900) then dnumc=dnumc34; sic2=substr(dnumc,1,2); drop dnum3 dnum4 dnumc3 dnumc4 p pre dnumc34 dnumc permnoc yearac; run; *limit sample to industry years with at least 100 firms; proc sql; create table compdata as select *,n(permno) as n_indust from compdata group by sic2,yeara; quit; *limit to industries with a sufficient number of firms per year, in this case 50 is arbitrary and limits the number of industries severely; data compdata; set compdata; where n_indust ge 50; run; *proc freq data=compdata; *tables nsic2 yeara nsic2*yeara; *run; *create sic variables for macro for industries in the sample; data compdata; set compdata; nsic2=sic2*1; if nsic2 eq 20 then msic=1; if nsic2 eq 28 then msic=2; if nsic2 eq 35 then msic=3; if nsic2 eq 36 then msic=4; if nsic2 eq 38 then msic=5; if nsic2 eq 58 then msic=6; if nsic2 eq 59 then msic=7; if nsic2 eq 73 then msic=8; run; */ *this part creates the efficiency measures, run by industry year; %macro industryyear; %do iyear=1 %to 15; %do isic=1 %to 8; data compdata2; set compdata; where msic=&isic and yeara=1990+&iyear; run; proc sql; create table inpdata as select dmu,cogs,sga,ppe,caplease,rd,goodwill,intangibles+otherintangibles as intang,advx from compdata2 order by dmu; create table outdata as select dmu,sales from compdata2 order by dmu; quit; PROC EXPORT DATA= WORK.INPDATA OUTFILE= "/sastemp3/inpdata.txt" DBMS=TAB REPLACE; RUN; PROC EXPORT DATA= WORK.outDATA OUTFILE= "/sastemp3/outdata.txt" DBMS=TAB REPLACE; RUN; %let _InData="/sastemp3/inpdata.txt" ; %let _nInput=7; %let _OutData="/sastemp3/outdata.txt" ; %let _nOutput=1; %let _Orienta='InputMin'; libname sasdea "/sastemp3/"; proc datasets nolist; delete Eff Report1 report2; run; data NY; infile &_OutData delimiter='09'X missover dsd ; array NYarray(&_nOutput) $ NY1- NY&_nOutput ; length unitname $ 50 ; input unitname $ NYarray(*) $ ; if _n_ eq 1 then output ; drop unitname; run; data AY DMUs; infile &_OutData delimiter='09'X missover dsd; array AYarray(&_nOutput) AY1-AY&_nOutput; length unitname $ 50 ; input unitname $ AYarray(*) ; if _n_ gt 1 then output ; run; data NX; infile &_InData delimiter='09'X missover dsd ; array NXarray(&_nInput)$ NX1-NX&_NInput ; length unitname $ 50 ; input unitname $ NXarray(*) $ ; if _n_ eq 1 then output; drop unitname; run; data AX; infile &_InData delimiter='09'X missover dsd ; array AXarray(&_nInput) AX1-AX&_nInput; length unitname $ 50 ; input unitname $ AXarray(*) ; if _n_ gt 1 then output; call symput('_nDmu',_n_-1); run; data DMUs; set AY (keep=UnitName); Uj0=_n_; run; %macro loop; %let _nUnit=&_nDmu; %do jj=1 %to &_nUnit; %macro model; %local i j j0; data MODEL1; %let _nUnit=&_nDmu; array NYarray(&_nOutput)$ NY1-NY&_nOutput ; array AYarray(&_nOutput) AY1-AY&_nOutput; array NXarray(&_nInput) $ NX1-NX&_NInput ; array AXarray(&_nInput) AX1-AX&_nInput; array Jarray(&_nUnit) U1-U&_nUnit ; length _row_ $ 50 _col_ $ 50 _type_ $ 8; keep _row_ _col_ _type_ _coef_ ; J0=&jj; /* Jo is unit under assessment*/ if &_Orienta='InputMin' then do; _type_='MIN'; _row_ ='OBJ'; _col_='.'; _coef_=.; output; _type_='.'; _row_ ='OBJ'; _col_ ='FI'; _coef_=1; output; _type_='.'; _row_ ='OBJ'; _col_='_rhs_'; _coef_=0; output; end; link ReadNX; do j= 1 to &_nUnit; link ReadAX; do i= 1 to &_nInput; _col_='LA' || put(j,3.); _row_=NXarray(i); _type_='.'; _coef_=AXarray(i); output; if j=J0 then do; select(&_Orienta); when('InputMin') do; _row_=NXarray(i); _type_='.'; _coef_=-AXarray(i); _col_='FI'; output; _col_='_rhs_'; _row_=NXarray(i); _type_='LE'; *LE; _coef_=0; output; end; when('OutputMax') do; _row_=NXarray(i); _type_='LE'; _coef_=AXarray(i); _col_='_rhs_'; output; end; otherwise; end; end; end; end; ReadNX: set NX; return; ReadAX: set AX; return; run; data MODEL2; %let _nUnit=&_nDmu; array NYarray(&_nOutput)$ NY1-NY&_nOutput ; array AYarray(&_nOutput) AY1-AY&_nOutput; array NXarray(&_nInput) $ NX1-NX&_NInput ; array AXarray(&_nInput) AX1-AX&_nInput; array Jarray(&_nUnit) U1-U&_nUnit ; length _row_ $ 50 _col_ $ 50 _type_$8; keep _row_ _col_ _type_ _coef_ ; J0=&jj; * Jo is unit under assessment; if &_Orienta='OutputMax' then do; _type_='MAX'; _row_ ='OBJ'; _col_='.'; _coef_=.; output; _type_='.'; _row_ ='OBJ'; _col_ ='FI'; _coef_=1; output; _type_='.'; _row_ ='OBJ'; _col_='_rhs_'; _coef_=0; output; end; link ReadNY; do j= 1 to &_nUnit; link ReadAY; do r= 1 to &_nOutput; _col_='LA' || put(J,3.); _row_=NYarray(r); _type_='.'; _coef_=AYarray(r); output; if j=J0 then do; select(&_Orienta); when('InputMin') do; _col_='_rhs_'; _row_=NYarray(r); _type_='GE'; _coef_=AYarray(r); output; end; when('OutputMax') do; _col_='FI'; _row_=NYarray(r); _type_='.'; _coef_=-AYarray(r); output; _col_='_rhs_'; _row_=NYarray(r); _type_='GE'; _coef_=0; output; end; otherwise; end; end; end; end; ReadNY: set NY; return; ReadAY: set AY; return; run; data MODEL; set MODEL1 MODEL2 ; run; %mend model ; %model options nonotes; proc lp data=MODEL noprint sparsedata primalout=lp2 dualout=lp3 ; run; data EffJ0(keep=Uj0 eff); set lp2 (where= (_var_='OBJ')); Uj0=&jj; if &_Orienta='OutputMax' and _value_ ne 0 then Eff=1/_value_;else Eff=_value_; run; proc datasets nolist; append base=Eff data=EffJ0; run; data lp2;set lp2;Uj0=&jj; run; proc datasets nolist; append base=Report1 data=lp2;run; data lp3;set lp3;Uj0=&jj; run; proc datasets nolist; append base=Report2 data=lp3;run; options notes; %end; %mend loop; %loop data eff(drop=Uj0); merge DMUs Eff; by Uj0; run; data eff&iyear&isic; set eff; run; %end; %end; %mend industryyear; %industryyear data totaleff; set eff11 eff12 eff13 eff14 eff15 eff16 eff17 eff18 eff21 eff22 eff23 eff24 eff25 eff26 eff27 eff28 eff31 eff32 eff33 eff34 eff35 eff36 eff37 eff38 eff41 eff42 eff43 eff44 eff45 eff46 eff47 eff48 eff51 eff52 eff53 eff54 eff55 eff56 eff57 eff58 eff61 eff62 eff63 eff64 eff65 eff66 eff67 eff68 eff71 eff72 eff73 eff74 eff75 eff76 eff77 eff78 eff81 eff82 eff83 eff84 eff85 eff86 eff87 eff88 eff91 eff92 eff93 eff94 eff95 eff96 eff97 eff98 eff101 eff102 eff103 eff104 eff105 eff106 eff107 eff108 eff111 eff112 eff113 eff114 eff115 eff116 eff117 eff118 eff121 eff122 eff123 eff124 eff125 eff126 eff127 eff128 eff131 eff132 eff133 eff134 eff135 eff136 eff137 eff138 eff141 eff142 eff143 eff144 eff145 eff146 eff147 eff148 eff151 eff152 eff153 eff154 eff155 eff156 eff157 eff158; run; data totaleff; set totaleff; yeara=1*put(substr(unitname,14,4),$4.); dmu=unitname; run; data compdata; set compdata; yeara=1*yeara; run; proc sort data=totaleff; by dmu yeara; run; proc sort data=compdata; by dmu yeara; run; data dea; merge totaleff (in=a) compdata (in=b); by dmu yeara; if a eq 1 and b eq 1 then output; run; proc print data=dea(obs=20); run; *proc download data=dea; *run; *proc download data=totaleff; *run; endrsubmit; *Compustat CRSP merge; proc sql; create table crspdata as select compdata.*, msi.ewretd, msi.date from compdata left join crsp.msi on intnx('month',fyenddt,-12) <= msi.date <= intnx('month',fyenddt,3); quit; *pull firm returns; proc sql; create table crspdata as select crspdata.*, msf.ret from crspdata left join crsp.msf on crspdata.permno = msf.permno and crspdata.date = msf.date; quit; proc sql; create table crspdata as select crspdata.*, (exp(sum(log(1+(ret))))-1 -exp(sum(log(1+(ewretd))))-1) as adjret, n(ret) as N from crspdata group by permno, fyenddt; quit; data compdata1; set compdata1; *industry classification; if 1000<=dnum<1300 or 1400<=dnum<=1999 then indust='Mining and Construction'; if 2000<=dnum<=2111 then indust='Food'; if 2200<=dnum<=2799 then indust='Textiles, printing and publishing'; if 2800<=dnum<=2824 or 2840<=dnum<=2899 then indust='Chemicals'; if 2830<=dnum<=2836 then indust='Pharmaceuticals'; if 2900<=dnum<=2999 or 1300<=dnum<=1399 then indust='Extractive industries'; if 3000<=dnum<3570 or 3580<=dnum<3670 or 3680<=dnum<=3999 then indust='Durable manufacturers'; if 7370<=dnum<=7379 or 3570<=dnum<=3579 or 3670<=dnum<=3679 then indust='Computers'; if 4000<=dnum<=4899 then indust='Transportation'; if 4900<=dnum<=4999 then indust='Utilities'; if 5000<=dnum<=5999 then indust='Retail'; if 6000<=dnum<=6411 then indust='Financial Institutions'; if 6500<=dnum<=6999 then indust='Insurance and Real Estate'; if 7000<=dnum<7370 or 7380<=dnum<=8999 then indust='Services'; if dnum>=9000 then indust='Other'; run;