* generate sampling weights for mayflies; * created 12/14/04 by BRG from code received from JS; * last revised: 9/23/09; * note: users will want to replace the library name "webcode" used in this code with their own library name; * enter population sizes from http://www.umesc.usgs.gov/ltrmp/stats/population_sizes.xls; data capN_h_invert; input fieldsta _total_ habclass $5.; do year=1993 to 2004 by 1; output; end; * generate a set of _total_ values for every year; datalines; 1 2342 IMP 1 2252 MCB 1 2887 SC 1 9203 BWC 2 2529 MCB 2 5506 SC 2 9203 BWC 2 14021 IMP 3 4579 MCB 3 3938 SC 3 11115 BWC 3 14688 IMP 4 13251 MCB 4 5983 SC 4 1646 BWC 4 760 IMP 5 13537 MCB 5 1747 SC 6 9776 MCB 6 714 SC 6 4542 BWC run; * ensure strata labels match across the invert and capN_h_invert datasets; data invert; set webcode.invert; if habclass="MC" then habclass="MCB"; run; * sort population and data files; proc sort data=capN_h_invert; by fieldsta year habclass; run; proc sort data=webcode.invert out=invert; by fieldsta year habclass; run; * calculate sampling weights; proc means data=invert noprint; var mayflies; output out=n_h n=n_h; by fieldsta year habclass; run; data both; merge capN_h_invert n_h; by fieldsta year habclass; data weights; set both; if n_h then sweight = _total_ / n_h; else sweight = .; run; * merge with datafile; data invertwt1; merge invert weights; by fieldsta year habclass; data webcode.invertwt; set invertwt1; if mayflies = . then sweight = .; * confirm weights sum to _totals_ (check log to confirm no records met the mismatch criteria); proc means data=webcode.invertwt noprint; id _total_; var sweight; output out=wtcheck sum=sumweight; by fieldsta year habclass; run; proc print data=wtcheck noobs; where round(sumweight,.1) ne _total_ and sumweight; run;