* generate sampling weights for fish: example using bluegill day electrofishing data from 1993 - 2003; * filename: sampling weights BLGL day.sas; * created 9/21/09 by BG; * last revised: 9/23/09 (BG); /* THIS CODE IS PROVIDED WITHOUT WARRANTY, WHETHER EXPRESSED OR IMPLIED. PLS SEND COMMENTS ON THE CODE OR DESCRIPTIONS OF ERRORS IN THE CODE TO BRIAN GRAY AT brgray@usgs.gov */ * notes: - this code is designed to help jump start efforts by users of LTRMP fisheries data to obtain design-based and/or design-adjusted estimates from those data. the code has not been rigorously reviewed, and should not be treated as definitive. detailed questions about LTRMP fisheries sampling designs should be directed to Brian Ickes at bickes@usgs.gov or Jim Rogala at jrogala@usgs.gov - the code ignores that period represents a temporal stratum (if only because sample sizes are often small within periods) - sampling weights are calculated both with period 1 fish (sweightp_all) and without that period (sweightp_23). the former allows the estimation of _annual_ means that include period 1 while the latter presumes means will be calculated w/o period 1. Design-based estimates of temporal trend over multiple years should use weights that exclude period 1; * enter population sizes from http://www.umesc.usgs.gov/ltrmp/stats/population_sizes.xls; data fish_capN_h; input component $ fstation stratum2 $ stratum_code _total_; * rename stratum as stratum2 to avoid conflict with a SAS naming convention; *format stratum2 $5.; drop component; * generate a set of _total_ values for every year; do year=1993 to 2004 by 1; output; end; datalines; FISH 1 MCB-O 1502 1486 FISH 1 MCB-S 1503 766 FISH 1 SC 1504 2887 FISH 1 BWC-O 1510 5073 FISH 1 BWC-S 1511 3860 FISH 2 MCB-O 1502 1620 FISH 2 MCB-S 1503 756 FISH 2 SC 1504 4148 FISH 2 BWC-O 1510 1978 FISH 2 BWC-S 1511 3434 FISH 2 IMP-O 1520 13204 FISH 2 IMP-S 1521 494 FISH 3 MCB-O 1502 3527 FISH 3 MCB-S 1503 910 FISH 3 SC 1504 2758 FISH 3 BWC-O 1510 5877 FISH 3 BWC-S 1511 3734 FISH 3 IMP-O 1520 10002 FISH 3 IMP-S 1521 438 FISH 4 MCB-O 1502 10032 FISH 4 MCB-S 1503 3199 FISH 4 SC 1504 5671 FISH 4 BWC-O 1510 358 FISH 4 BWC-S 1511 764 FISH 4 IMP-O 1520 588 FISH 4 IMP-S 1521 172 FISH 5 MCB-O 1502 10001 FISH 5 MCB-S 1503 2592 FISH 5 SC 1504 1872 FISH 6 MCB-O 1502 4829 FISH 6 MCB-S 1503 4935 FISH 6 SC 1504 653 FISH 6 BWC-O 1510 6946 FISH 6 BWC-S 1511 3616 run; * modify the user's selected fish dataset ; data bgills; set blglp8; * <--change the input dataset here; * modify strata names; format stratum2 $8.; if stratum = "MCB-U" and gear in ("D","M") then stratum = "MCB-S"; else if stratum = "MCB-U" and gear in ("HL","HS") then stratum = "MCB-O"; if stratum in ("SCB","SCB-O","SCB-C") then stratum ="SC"; * "-O" and "-C" represent post-facto stratification by Open River field station personnel; stratum2=stratum; * rename stratum to avoid conflict with SAS reserved name. this line must stay below other stratum lines; if effmin then cpue=catch*15/effmin; else cpue = .; * create CPUE with effort units of 15 minutes; if effmin ne . then effmin15=effmin/15; else effmin15=.; if effmin then logeffmin15=log(effmin15); else logeffmin15=.; where stratum notin ("MCB-W","TWZ"); * exclude wing dams and trawling-tailwater "strata"; keep fishcode fstation year stratum2 period catch cpue effmin effmin15; run; * sort population and data files; proc sort data=fish_capN_h; by fstation year stratum2; run; proc sort data=bgills out=bgillssort; by fstation year stratum2 period; run; * calculate sampling weights across all 3 periods; proc means data=bgillssort noprint; var cpue; output out=n_h n=n_h; by fstation year stratum2; run; data both; merge fish_capN_h n_h; by fstation year stratum2; data weights; set both; if n_h then sweightp_all = _total_ / n_h; else sweightp_all = .; run; * calculate sampling weights across periods 2 and 3; proc means data=bgillssort noprint; var cpue; output out=n_h_p23 n=n_h; by fstation year stratum2; where period gt 1; run; data both_p23; merge fish_capN_h n_h_p23; by fstation year stratum2; data weights_p23; set both_p23; if n_h then sweightp_23 = _total_ / n_h; else sweightp_23 = .; run; * merge with datafile; data bgillswt1; merge bgillssort weights weights_p23; by fstation year stratum2; data bgillswt; set bgillswt1; if cpue = . then sweightp_all = .; if cpue = . then sweightp_23 = .; where fishcode ne ""; run; * confirm weights sum to _totals_ (check log to confirm no records met the mismatch criteria); proc means data=bgillswt noprint; id _total_; var sweightp_all; output out=wtcheck sum=sumweightp_all; by fstation year stratum2; run; proc print data=wtcheck noobs; where round(sumweightp_all,.1) ne _total_ and sumweightp_all; run; proc means data=bgillswt noprint; id _total_; var sweightp_23; output out=wtcheckp_23 sum=sumweightp_23; by fstation year stratum2; where period gt 1; run; proc print data=wtcheckp_23 noobs; where round(sumweightp_23,.1) ne _total_ and sumweightp_23; run; * rename dataset (to be comparable with blglP8); data blglP8wt; set bgillswt; run;