************ * MS_spellmaker2015.sas * Randy Ellis and Wenjia Zhu 4/2/2015 * Current spell types * 1 = No visit spells (N) * 2 = New visit spells (V) * 3 = Continuation spells (C) * 4 = Chronic spell types (CHR) * Prior spell types * 0 = Unknown - spell type cannot be assigned in first spell (U) * 1 = No visit spells (N) * 2 = New visit spells (V) * 3 = Continuation spells (C) * 4 = Chronic spell types (CHR) * Dirty spells * 1 = spells with negative (net) sum of payments or oop * Each spell is 30 days long. except that in months were a new visit starts, the NO visit spell segment can be 1-29 days. * Spellindex = month defining the spell month, which ranges from 1 to 60 in our five year sample; * ****************; dm 'clear log'; dm 'clear print'; libname in5m "F:\MS2013"; *local directory where you want to read in claims data; libname elig "F:\TestData"; *local directory where you want to read in eligibility data; libname out "F:\MarketScan Extracts\Outputs\test"; *local directory where you want to write out claims and eligibility data; libname junkk "F:\MarketScan Extracts\Outputs\junk"; *local directory where you want to temporarily store data sets; *%include "C:\Users\wz97\Dropbox\EllisZhu2013\MS_Formats\formats_spellmaker2015.sas"; *formats used in this program; *this format is currently at the bottom of this file, but could be put in a separate file and included if desired. options ls = 200 ps = 9999 nocenter; options obs = max; *options obs = 1000000; *for debugging; %macro bigsort(indata=in,outdata=out, byvar=id, parts=10, junkdir=); * MACRO BIGSORT Randy Ellis 11/27/2014 * this macro breaks a big file into &parts pieces, sorts each piece, and then merges them back together. * It is ineffieicnt, but minimimzes the risk of overflowing available disk space; * Note that all files in junkdir are deleted at the end of the program execution, so beware! *********************************; * count number of observations in indata; data _null_; dsid=open("&indata."); z=attrn(dsid,"nobs"); call symput('_nobs', z); partobs=z/&parts.; call symputx('_partobs', partobs); run; %put 'nobs in full dataset ' &_nobs ; %put 'parts used for bigsort ' &parts ; %put 'nobs in each part in bigsort' &_partobs ; *split dataset into &parts pieces in the junkdir drive; data %do ibs = 1 %to &parts; &junkdir..temp&ibs. %end; ; drop _done; set &indata; %do ibs = 1 %to &parts; if (&ibs-1)*&_partobs < _n_ <= &ibs*&_partobs then do; output &junkdir..temp&ibs.; _done=1; end; %end; *this line worries that the last obs may be dropped due to rounding errrors; if _done ne 1 then output &junkdir..temp&parts. ; run; *sort each part, overwriting the original; %do ibs = 1 %to &parts; proc sort data=&junkdir..temp&ibs.; by &byvar.; run; %end; *merge together pieces; data &outdata; set %do ibs = 1 %to &parts; &junkdir..temp&ibs. %end; ; by &byvar.; run; proc datasets library =&junkdir. noprint; delete %do ibs = 1 %to &parts; temp&ibs. %end; ; quit; run; *sample macro call; *%bigsort(indata=one,outdata=two, byvar=x, parts=3, junkdir=junkk); %mend; /** sort relevant data sets, only for first-time run;*/ %bigsort(indata=in5m.ms_op_adult7891011, outdata=in5m.op7891011srt, byvar=enrolid svcdate, parts=5, junkdir=junkk); %bigsort(indata=in5m.ms_is_adult7891011, outdata=in5m.is7891011srt, byvar=enrolid svcdate, parts=5, junkdir=junkk); *ms_op_adult7891011 ms_is_adult7891011 are inpatient and outpatient claims for patients continuously enrolled during 2007-2011; *In general, indata can be other types of claims data, e.g. inpatient, prescription drug. during selected sample period; %bigsort(indata=elig.ms_a_pers_7891011, outdata=elig.ms_a_pers7891011srt, byvar=enrolid year, parts=5, junkdir=junkk); *ms_a_pers_7891011 is the set of person yeats of MS adults continuously enrolled during 2007-2011; *In general, indata takes any type of eligibility, during any selected sample period; %macro spellmaker; data _null_; y1= mdy(12,31,2006); call symputx('end2006',y1); y2= mdy(12,31,2007); call symputx('end2007',y2); y3= mdy(12,31,2008); call symputx('end2008',y3); y4= mdy(12,31,2009); call symputx('end2009',y4); y5= mdy(12,31,2010); call symputx('end2010',y5); y6= mdy(12,31,2011); call symputx('end2011',y6); y7= mdy(12,31,2012); call symputx('end2012',y7); y8= mdy(12,31,2013); call symputx('end2013',y8); put _all_; run; data out.ms_spells; set in5m.ms_op_adult7891011 (in=inop) in5m.ms_is_adult7891011 (in=inip); by enrolid svcdate; drop end2006-end2013 ; *drop ADMTYP CASEID DOBYR DRG DSTATUS EIDFLAG EMPZIP HOSPCTY HOSPZIP MSA PATFLAG PATID PDDATE PROVZIP SEQNUM TSVCDAT UNIHOSP VERSION WGTKEY PDX PROCMOD; if svcdate > &end2006. and svcdate <= &end2013.; * drop claims with dates outside of target region; retain firstday lastday splityear spelltype spellindex spellspan nextspan spellduration totalspells /*prevtype*/ ipspellflag erspellflag; retain end2006 &end2006. end2007 &end2007. end2008 &end2008. end2009 &end2009. end2010 &end2010. end2011 &end2011. end2012 &end2012. end2013 &end2013.; * 17166 is the sas date for 12/31/2006; array enddate end2007 end2008 end2009 end2010 end2011 end2012 end2013; array full firstday lastday spelltype spellindex spellduration totalspells /*prevtype*/ nextspan; month = month(svcdate); *drop if already on file; year = year(svcdate); day = svcdate-end2006; label day = days of service since 12/31/2006; label firstday = first SASdate of spell; label lastday = last SASdate of spell; label spelltype = current spell spelltype 1-no visit 2-new visit 3-continuation visit; label spellindex = index of spell from 1 to 60 used for dxcg or merging; label spellspan = length of current spell month; label nextspan = length of next spell month; label spellduration = number of months for current spelltype; label totalspells = sum of number of treatment spells in sample; label splityear = spell spans two calendar years; label ipspellflag = is 1 if IP during spell; label erspellflag = is 1 if ER visit during spell; drop time; if first.enrolid then do ; do over full; full=.; end; totalspells=0; spelltype=1; spellspan=0; nextspan=0; spellduration=0; ipspellflag=0; erspellflag=0; end; time = input(put(day,daytime.),2.); *use new mapping for exact results; if lastday = . or day > sum(lastday,nextspan) then do; *first record of enrollee or over month elapsed from last spell; firstday=day; spellspan=input(put(firstday,dayspan.),4.); nextspan=input(put(firstday,nextspan.),4.); lastday = day+spellspan-1; totalspells=totalspells+1; spelltype = 2; ipspellflag=0; erspellflag=0; spellindex = time; spellduration = 1; if put(firstday,splityear.)='1' then splityear=1; else splityear=0; end; else if lastday < day and day <=sum(lastday,nextspan) then do; *enrollee's current record within month of last spell; *service is a continuation spell (type=3) and need to update the spell variables; totalspells=totalspells+1; firstday =lastday+1; lastday = firstday+nextspan-1; spellspan=nextspan; nextspan=input(put(firstday,nextspan.),4.); spelltype = 3; spellindex=spellindex+1; spellduration = spellduration+1; ipspellflag=0; erspellflag=0; if put(firstday,splityear.)='1' then splityear=1; else splityear=0; end; *else claim is during a known spell and all is fine; *create new flags; if put(svcscat,$er_svcscat.)='1' then erspellflag=1; else if put(svcscat,$ip_svcscat.) = '1' then ipspellflag=1; output; run; proc contents data = out.ms_spells; run; proc means data = out.ms_spells; title proc means data = out.ms_spells; run; proc print data = out.ms_spells (obs = 100); var enrolid svcdate year month splityear day firstday lastday spelltype spellindex spellduration totalspells /*prevtype*/; run; proc freq data = out.ms_spells; tables year month splityear spellindex*splityear month*splityear /*time*/ spelltype spellindex spellspan spellduration totalspells /*prevtype*/; format spelltype /*prevtype*/ spelltype.; run; title collapse to one record per treatment spell; proc summary data=out.ms_spells nway; by enrolid; class spellindex; id year month /*time*/ splityear /*day*/ firstday lastday spelltype spellspan spellduration totalspells ipspellflag erspellflag /*prevtype*/; var pay netpay ; output out=out.ms_spellsummary sum(pay) = sumpay sum(netpay)=sumnetpay n(pay) = nobs; run; proc means data = out.ms_spellsummary; run; proc freq data = out.ms_spellsummary; tables year month /*time*/ splityear spelltype spellindex spellspan spellduration totalspells /*prevtype*/; format spelltype /*prevtype*/ spelltype.; run; *now fill in the no visit spells; *Initialize a file with one record per spellmonth; data out.allids; set elig.ms_a_pers_7891011; *person year enrollment file; keep contract enrolid year age month agegrp sex spellindex spelltype splityear spellduration totalspells sumpay sumnetpay sumoop ipspellflag erspellflag plankey plantype empcty region eeclass eestatu egeoloc emprel hlthplan indstry mhsacovg phyflag ; array full spellindex spelltype splityear spellduration totalspells sumpay sumnetpay sumoop ipspellflag erspellflag; do over full; full=0; end; plankey=plnkey12; plantype=plntyp12; spelltype = 1; do month=1 to 12; spellindex=month+12*(year-2007); output; end; run; data out.pers_spells; merge out.allids (in=in1) out.ms_spellsummary (in=in2); by enrolid spellindex; sumoop=sumpay - sumnetpay; if sumpay<0 or sumnetpay <0 or sumoop<0 then dirty=1; if spelltype ne 1 then anyvisit=1; else anyvisit=0; *verify that the only measures that spelltype takes are 1, 2, 3 at this point; label anyvisit=any visits during spellindex; label dirty = indicating sumpay<0 or sumnetpay <0 or sumoop<0; if in1; /*if dirty ne 1; *drop dirty spells although this creates an unbalanced panel;*/ run; title 'means and freqs with No Treatment spells added in'; proc means data = out.pers_spells; run; proc contents data = out.pers_spells; run; *add chronic spell type and create prevtype (prior spell type); data out.pers_spells_final; set out.pers_spells; retain lastspelltype 0; drop lastspelltype; by enrolid spellindex; prevtype =lastspelltype; if first.enrolid then do; if anyvisit =0 then spelltype =1; else spelltype =2; *new visit spells; prevtype =0; end; else do; if anyvisit =0 then spelltype =1; *no visit spells; else if prevtype =1 then spelltype =2; *new visit spells; else if prevtype =2 then spelltype =3; *continuation visit spells; else spelltype =4; *chronic visit spells; end; lastspelltype =spelltype; label spelltype = current spell spelltype 1-no visit 2-new visit 3-continuation visit 4-chronic visit; label prevtype = previous spell spelltype 0-unknown 1-no visit 2-new visit 3-continuation visit 4-chronic visit; run; proc means data =out.pers_spells_final; title proc means data =out.pers_spells_final; run; proc freq data =out.pers_spells_final; title proc freq data =out.pers_spells_final; tables prevtype*spelltype ipspellflag erspellflag spellindex; format prevtype prevtype. spelltype spelltype.; run; %mend; *The following formats should be included at the beginning of the main program; *************************************** * formats_spellmaker2015.sas * Format library for spellmaker2015.sas; **********************************; options nosource; proc format; value spelltype 1 = 'No visit spells -N' 2 = 'New visit spells -V' 3 = 'Continuation spells after new visit -C' 4 = 'Chronic spell types -CHR' ; value prevtype 0 = 'Unknown - spell type cannot be assigned in first spell -U' 1 = 'No visit spells -N' 2 = 'New visit spells -V' 3 = 'Continuation spells after new visit -C' 4 = 'Chronic spell types -CHR' ; /* Format created 3/18/2014 by Randy maps MS svccat into ER values without dropping IP admissions */ value $ER_svcscat "10120"= 1 /* 10120-Facility IP Non Acute ER */ "10220"= 1 /* 10220-Facility IP LTC ER */ "10320"= 1 /* 10320-Facility IP Maternity ER */ "10420"= 1 /* 10420-Facility IP Surgical ER */ "10520"= 1 /* 10520-Facility IP Medical ER */ "12220"= 1 /* 12220-Facility OP ER */ "20120"= 1 /* 20120-Physician Specialty IP ER */ "20220"= 1 /* 20220-Physician Non-Specialty IP ER */ "21120"= 1 /* 21120-Physician Specialty OP ER */ "21220"= 1 /* 21220-Physician Non-Specialty OP ER */ "22120"= 1 /* 22120-Professional IP ER */ "22320"= 1 /* 22320-Professional OP ER */ "30120"= 1 /* 30120-Mental Health Facility IP ER */ "30220"= 1 /* 30220-Mental Health Physician IP ER */ "30320"= 1 /* 30320-Mental Health Professional IP ER */ "30420"= 1 /* 30420-Mental Health Facility OP ER */ "30520"= 1 /* 30520-Mental Health Physician OP ER */ "30620"= 1 /* 30620-Mental Health Professional OP ER */ "31120"= 1 /* 31120-Substance Abuse Facility IP ER */ "31220"= 1 /* 31220-Substance Abuse Physician IP ER */ "31320"= 1 /* 31320-Substance Abuse Professional IP ER */ "31420"= 1 /* 31420-Substance Abuse Facility OP ER */ "31520"= 1 /* 31520-Substance Abuse Physician OP ER */ "31620"= 1 /* 31620-Substance Abuse Professional OP ER */ other = 0; /* Format created 3/18/2014 by Randy maps MS svccat into IP values for room and board records */ Value $IP_svcscat "10110"= 1 /* 10110-Facility IP Non Acute Room and Board */ "10210"= 1 /* 10210-Facility IP LTC Room and Board */ "10310"= 1 /* 10310-Facility IP Maternity Room and Board */ "10410"= 1 /* 10410-Facility IP Surgical Room and Board */ "10510"= 1 /* 10510-Facility IP Medical Room and Board */ "12210"= 1 /* 12210-Facility OP Room and Board */ "30110"= 1 /* 30110-Mental Health Facility IP Room and Board */ "30410"= 1 /* 30410-Mental Health Facility OP Room and Board */ "31110"= 1 /* 31110-Substance Abuse Facility IP Room and Board */ "31410"= 1 /* 31410-Substance Abuse Facility OP Room and Board */ other = 0; value daytime /* Format created 3/18/2014 by Randy maps day intervals into time index*/ 1 - 30 = 1 31 - 61 = 2 62 - 91 = 3 92 - 122 = 4 123 - 152 = 5 153 - 183 = 6 184 - 213 = 7 214 - 244 = 8 245 - 274 = 9 275 - 305 = 10 306 - 335 = 11 336 - 365 = 12 366 - 395 = 13 396 - 426 = 14 427 - 456 = 15 457 - 487 = 16 488 - 517 = 17 518 - 548 = 18 549 - 578 = 19 579 - 609 = 20 610 - 639 = 21 640 - 670 = 22 671 - 700 = 23 701 - 731 = 24 732 - 761 = 25 762 - 792 = 26 793 - 822 = 27 823 - 853 = 28 854 - 883 = 29 884 - 914 = 30 915 - 944 = 31 945 - 975 = 32 976 - 1005 = 33 1006 - 1036 = 34 1037 - 1066 = 35 1067 - 1096 = 36 1097 - 1126 = 37 1127 - 1157 = 38 1158 - 1187 = 39 1188 - 1218 = 40 1219 - 1248 = 41 1249 - 1279 = 42 1280 - 1309 = 43 1310 - 1340 = 44 1341 - 1370 = 45 1371 - 1401 = 46 1402 - 1431 = 47 1432 - 1461 = 48 1462 - 1491 = 49 1492 - 1522 = 50 1523 - 1552 = 51 1553 - 1583 = 52 1584 - 1613 = 53 1614 - 1644 = 54 1645 - 1674 = 55 1675 - 1705 = 56 1706 - 1735 = 57 1736 - 1766 = 58 1767 - 1796 = 59 1797 - 1826 = 60 1827 - 1856 = 61 1857 - 1887 = 62 1888 - 1917 = 63 1918 - 1948 = 64 1949 - 1978 = 65 1979 - 2009 = 66 2010 - 2039 = 67 2040 - 2070 = 68 2071 - 2100 = 69 2101 - 2131 = 70 2132 - 2161 = 71 2162 - 2192 = 72 2193 - 2222 = 73 2223 - 2253 = 74 2254 - 2283 = 75 2284 - 2314 = 76 2315 - 2344 = 77 2345 - 2375 = 78 2376 - 2405 = 79 2406 - 2436 = 80 2437 - 2466 = 81 2467 - 2497 = 82 2498 - 2527 = 83 2528 - 2557 = 84 ; value dayspan /* Format created 3/18/2014 by Randy shows lenght of each month period adjsting for leap years */ 1 - 30 = 30 31 - 61 = 31 62 - 91 = 30 92 - 122 = 31 123 - 152 = 30 153 - 183 = 31 184 - 213 = 30 214 - 244 = 31 245 - 274 = 30 275 - 305 = 31 306 - 335 = 30 336 - 365 = 30 366 - 395 = 30 396 - 426 = 31 427 - 456 = 30 457 - 487 = 31 488 - 517 = 30 518 - 548 = 31 549 - 578 = 30 579 - 609 = 31 610 - 639 = 30 640 - 670 = 31 671 - 700 = 30 701 - 731 = 31 732 - 761 = 30 762 - 792 = 31 793 - 822 = 30 823 - 853 = 31 854 - 883 = 30 884 - 914 = 31 915 - 944 = 30 945 - 975 = 31 976 - 1005 = 30 1006 - 1036 = 31 1037 - 1066 = 30 1067 - 1096 = 30 1097 - 1126 = 30 1127 - 1157 = 31 1158 - 1187 = 30 1188 - 1218 = 31 1219 - 1248 = 30 1249 - 1279 = 31 1280 - 1309 = 30 1310 - 1340 = 31 1341 - 1370 = 30 1371 - 1401 = 31 1402 - 1431 = 30 1432 - 1461 = 30 1462 - 1491 = 30 1492 - 1522 = 31 1523 - 1552 = 30 1553 - 1583 = 31 1584 - 1613 = 30 1614 - 1644 = 31 1645 - 1674 = 30 1675 - 1705 = 31 1706 - 1735 = 30 1736 - 1766 = 31 1767 - 1796 = 30 1797 - 1826 = 30 1827 - 1856 = 30 1857 - 1887 = 31 1888 - 1917 = 30 1918 - 1948 = 31 1949 - 1978 = 30 1979 - 2009 = 31 2010 - 2039 = 30 2040 - 2070 = 31 2071 - 2100 = 30 2101 - 2131 = 31 2132 - 2161 = 30 2162 - 2192 = 31 2193 - 2222 = 30 2223 - 2253 = 31 2254 - 2283 = 30 2284 - 2314 = 31 2315 - 2344 = 30 2345 - 2375 = 31 2376 - 2405 = 30 2406 - 2436 = 31 2437 - 2466 = 30 2467 - 2497 = 31 2498 - 2527 = 30 2528 - 2557 = 30 ; value nextspan /* Format created 3/18/2014 by Randy looks ahead to length of next period adjusting for leap years to find no visit spells*/ 1 - 30 = 31 31 - 61 = 30 62 - 91 = 31 92 - 122 = 30 123 - 152 = 31 153 - 183 = 30 184 - 213 = 31 214 - 244 = 30 245 - 274 = 31 275 - 305 = 30 306 - 335 = 30 336 - 365 = 30 366 - 395 = 31 396 - 426 = 30 427 - 456 = 31 457 - 487 = 30 488 - 517 = 31 518 - 548 = 30 549 - 578 = 31 579 - 609 = 30 610 - 639 = 31 640 - 670 = 30 671 - 700 = 31 701 - 731 = 30 732 - 761 = 31 762 - 792 = 30 793 - 822 = 31 823 - 853 = 30 854 - 883 = 31 884 - 914 = 30 915 - 944 = 31 945 - 975 = 30 976 - 1005 = 31 1006 - 1036 = 30 1037 - 1066 = 30 1067 - 1096 = 30 1097 - 1126 = 31 1127 - 1157 = 30 1158 - 1187 = 31 1188 - 1218 = 30 1219 - 1248 = 31 1249 - 1279 = 30 1280 - 1309 = 31 1310 - 1340 = 30 1341 - 1370 = 31 1371 - 1401 = 30 1402 - 1431 = 30 1432 - 1461 = 30 1462 - 1491 = 31 1492 - 1522 = 30 1523 - 1552 = 31 1553 - 1583 = 30 1584 - 1613 = 31 1614 - 1644 = 30 1645 - 1674 = 31 1675 - 1705 = 30 1706 - 1735 = 31 1736 - 1766 = 30 1767 - 1796 = 30 1797 - 1826 = 30 1827 - 1856 = 31 1857 - 1887 = 30 1888 - 1917 = 31 1918 - 1948 = 30 1949 - 1978 = 31 1979 - 2009 = 30 2010 - 2039 = 31 2040 - 2070 = 30 2071 - 2100 = 31 2101 - 2131 = 30 2132 - 2161 = 31 2162 - 2192 = 30 2193 - 2222 = 31 2223 - 2253 = 30 2254 - 2283 = 31 2284 - 2314 = 30 2315 - 2344 = 31 2345 - 2375 = 30 2376 - 2405 = 31 2406 - 2436 = 30 2437 - 2466 = 31 2467 - 2497 = 30 2498 - 2527 = 30 2528 - 2557 = 30 ; value splityear /* RE 4/17/2014 spells starting in these intervals will span more than one calendar year */ 337 - 365 = 1 702 - 731 = 1 1068 - 1096 = 1 1433 - 1461 = 1 1798 - 1826 = 1 2163 - 2192 = 1 2529 - 2557 = 1 other = 0 ; options source;