%let wrds=wrds.wharton.upenn.edu 4016; options comamid=TCP remote=WRDS; signon username=_prompt_; rsubmit; libname comp '/wrds/compustat/sasdata'; libname crsp '/wrds/crsp/sasdata'; libname temp '/sastemp3/'; ***************************************************************** * This program ; *read in compdata, omitting financial institutions and utilities; data temp.compann; set comp.compann; where (yeara ge 1976) and (yeara le 2001); if (dnum ge 4400) and (dnum le 5000) then delete; if (dnum ge 6000) and (dnum le 6500) then delete; keep dnum yeara gvkey data25 data170 fyr smbl stk data6 data199 data181 data16 data17 data18 data50 data52 data53 data58 data63 data64 data122 data172; *rename variables and set tax rate; data temp.compann; set temp.compann; shout=data25; Pricefyrclose=data199; TA=data6; TL=data181; bve=ta-tl; lev=TL/TA; taxexp=data16; EPSBasicExclExtra=data58; NI=data172; NIBE=data18; def=data50; SI=data17; PTI=data170; curfed=data63; curfor=data64; roa=ni/ta; ETR_BMN=taxexp/pti; ETR_cur=(curfed)/pti; incomeb4SI=pti-si; if (yeara ge 1987) then str=.35; if (yeara lt 1987) then str=.46; if (curfed ne .) and (curfor ne .) then TI=(curfed + curfor)/str; if (curfed eq .) and (curfor eq .) then TI=(taxexp-def)/str; BTdiffper=((PTI-SI)-TI)/PTI; drop data6 data181 data170 data16 data17 data18 data25 data50 data52 data53 data58 data63 data64 data122 data172 data199; proc sort data=temp.compann; by gvkey yeara; run; data temp.compann1; set temp.compann; if (gvkey eq lag(gvkey)) then lag_mve=(lag(pricefyrclose)*lag(shout)); if (gvkey eq lag(gvkey)) then lag_shout=lag(shout); if (gvkey eq lag(gvkey)) then lag_ta=lag(ta); if (gvkey eq lag(gvkey)) then lag_bve=lag(bve); *BMN exclude if not have ni/lag_mve, pti/lag_mve, or incomeb4SI/lag_mve; if (ni eq .) or (pti eq .) or (incomeb4si eq .) or (lag_mve eq .) then delete; *BMN exclude any obs with ni, pti, or incomeb4si exaclty equal to zero; if (ni eq 0) or (pti eq 0) or (incomeb4si eq 0) then delete; ni_mve=ni/lag_mve; pti_mve=pti/lag_mve; ib4SI_mve=incomeb4si/lag_mve; si_mve=si/lag_mve; run; ******************** *BMN also exclude upper and lower 1 percent of each earnings distributions for each year; %macro loop; %do i=1976 %to 2001; data yeara_&i; set temp.compann1; where yeara=&i; run; data yeara_&i; set yeara_&i; mergedum=1; proc univariate data=yeara_&i noprint; var ni_mve pti_mve ib4si_mve; output out=winsor p1= ni_mve_1 pti_mve_1 ib4si_mve_1 p99= ni_mve_99 pti_mve_99 ib4si_mve_99; *************** perform winsorization of variables ****************; data winsor; set winsor; mergedum=1; data yeara_&i; merge yeara_&i winsor; by mergedum; if (ni_mve > ni_mve_99) then delete; if (ni_mve < ni_mve_1) and (ni_mve ne .) then delete; if (pti_mve > pti_mve_99) then delete; if (pti_mve < pti_mve_1) and (pti_mve ne .) then delete; if (ib4si_mve > ib4si_mve_99) then delete; if (ib4si_mve < ib4si_mve_1) and (ib4si_mve ne .) then delete; run; %end; %mend loop; %loop; data bmn; set yeara_1976 yeara_1977 yeara_1978 yeara_1979 yeara_1980 yeara_1981 yeara_1982 yeara_1983 yeara_1984 yeara_1985 yeara_1986 yeara_1987 yeara_1988 yeara_1989 yeara_1990 yeara_1991 yeara_1992 yeara_1993 yeara_1994 yeara_1995 yeara_1996 yeara_1997 yeara_1998 yeara_1999 yeara_2000 yeara_2001; run; *BMN winsorize etr and si; data bmn; set bmn; mergedum=1; proc univariate data=bmn noprint; var etr_bmn etr_cur si_mve; output out=winsor p1= etr_bmn_1 etr_cur_1 si_mve_1 p99= etr_bmn_99 etr_cur_99 si_mve_99; *************** perform winsorization of variables ****************; data winsor; set winsor; mergedum=1; data bmn; merge bmn winsor; by mergedum; if (etr_bmn >etr_bmn_99) then etr_bmn=etr_bmn_99; if (etr_bmn < etr_bmn_1) and (etr_bmn ne .) then etr_bmn=etr_bmn_1; if (etr_cur > etr_cur_99) then etr_cur=etr_cur_99; if (etr_cur < etr_cur_1) and (etr_cur ne .) then etr_cur=etr_cur_1; if (si_mve > si_mve_99) then si_mve=si_mve_99; if (si_mve < si_mve_1) and (si_mve ne .) then si_mve=si_mve_1; run; proc download data=bmn; run; endrsubmit; data bmn; set bmn; where yeara gt 1977; run; /* proc means data=bmn; by yeara; var ni_mve pti_mve ib4si_mve; run; data pospretax; set bmn; if pti_mve gt 0; run; data negpretax; set bmn; if pti_mve lt 0; run; proc means data=pospretax; var ni_mve pti_mve etr_bmn etr_cur; run; proc means data=negpretax; var ni_mve pti_mve etr_bmn etr_cur; run; data pospresi; set bmn; if ib4si_mve gt 0; run; data negpresi; set bmn; if ib4si_mve lt 0; run; proc means data=pospresi; var pti_mve si_mve ib4si_mve; run; proc means data=negpresi; var pti_mve si_mve ib4si_mve; run; */ **************************** I can't get close on for 1976 and 1977, but other than that I get the same thing, so I use after 1976 and 1977 for the distributions...; data pospretax; set bmn; if pti_mve gt 0; run; data negpretax; set bmn; if pti_mve lt 0; run; proc univariate data=pospretax; histogram /href=0; var etr_bmn si_mve; run; proc univariate data=negpretax; histogram /href=0; var etr_bmn si_mve; run; *histograms; data distr; set bmn; if -.6