from samplics.datasets import load_nhanes2brr, load_nhanes2jk, load_nmihs
from samplics.estimation import ReplicateEstimator
from samplics.utils.types import PopParam, RepMethod
Replicated-based Estimation
Samplics’s class TaylorEstimator uses replicate-based methods (bootstrap, brr/fay, and jackknife) to estimate the variance of population parameters.
Bootstrap
# Load NMIHS sample data
= load_nmihs()
nmihs_dict = nmihs_dict["data"]
nmihs
15) nmihs.head(
finalwgt | birth_weight | bsrw1 | bsrw2 | bsrw3 | bsrw4 | bsrw5 | bsrw6 | bsrw7 | bsrw8 | ... | bsrw41 | bsrw42 | bsrw43 | bsrw44 | bsrw45 | bsrw46 | bsrw47 | bsrw48 | bsrw49 | bsrw50 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 24.67243 | 1270 | 49.403603 | 0.000000 | 49.403603 | 0.000000 | 24.701801 | 49.403603 | 24.701801 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 49.403603 | 0.000000 | 74.105408 | 49.403603 | 24.701801 | 49.403603 |
1 | 23.56827 | 879 | 23.596327 | 47.192654 | 0.000000 | 0.000000 | 47.192654 | 23.596327 | 47.192654 | 23.596327 | ... | 47.192654 | 23.596327 | 0.000000 | 47.192654 | 23.596327 | 47.192654 | 23.596327 | 47.192654 | 23.596327 | 23.596327 |
2 | 24.67243 | 794 | 24.701801 | 0.000000 | 24.701801 | 0.000000 | 0.000000 | 24.701801 | 0.000000 | 0.000000 | ... | 24.701801 | 0.000000 | 0.000000 | 24.701801 | 24.701801 | 24.701801 | 0.000000 | 49.403603 | 24.701801 | 98.807205 |
3 | 20.33146 | 1446 | 40.711327 | 0.000000 | 0.000000 | 20.355663 | 40.711327 | 20.355663 | 0.000000 | 20.355663 | ... | 0.000000 | 20.355663 | 61.066994 | 61.066994 | 40.711327 | 40.711327 | 20.355663 | 20.355663 | 81.422653 | 40.711327 |
4 | 21.83328 | 830 | 21.859272 | 21.859272 | 0.000000 | 0.000000 | 0.000000 | 21.859272 | 0.000000 | 21.859272 | ... | 65.577812 | 0.000000 | 21.859272 | 21.859272 | 21.859272 | 0.000000 | 21.859272 | 0.000000 | 0.000000 | 0.000000 |
5 | 23.56827 | 1304 | 70.788986 | 23.596327 | 23.596327 | 23.596327 | 0.000000 | 23.596327 | 47.192654 | 0.000000 | ... | 47.192654 | 47.192654 | 23.596327 | 23.596327 | 23.596327 | 23.596327 | 0.000000 | 23.596327 | 23.596327 | 0.000000 |
6 | 18.67915 | 1106 | 18.701387 | 56.104160 | 0.000000 | 0.000000 | 18.701387 | 18.701387 | 18.701387 | 0.000000 | ... | 18.701387 | 0.000000 | 18.701387 | 0.000000 | 18.701387 | 18.701387 | 18.701387 | 18.701387 | 18.701387 | 37.402775 |
7 | 24.63370 | 1418 | 24.663025 | 49.326050 | 0.000000 | 24.663025 | 0.000000 | 24.663025 | 0.000000 | 24.663025 | ... | 24.663025 | 0.000000 | 0.000000 | 0.000000 | 49.326050 | 49.326050 | 24.663025 | 0.000000 | 24.663025 | 0.000000 |
8 | 20.33146 | 1474 | 0.000000 | 40.711327 | 40.711327 | 0.000000 | 20.355663 | 0.000000 | 0.000000 | 40.711327 | ... | 40.711327 | 0.000000 | 20.355663 | 20.355663 | 20.355663 | 20.355663 | 0.000000 | 20.355663 | 20.355663 | 20.355663 |
9 | 20.33146 | 454 | 0.000000 | 20.355663 | 20.355663 | 20.355663 | 61.066994 | 0.000000 | 0.000000 | 40.711327 | ... | 0.000000 | 61.066994 | 0.000000 | 20.355663 | 0.000000 | 20.355663 | 20.355663 | 20.355663 | 81.422653 | 0.000000 |
10 | 24.67243 | 1380 | 0.000000 | 24.701801 | 24.701801 | 24.701801 | 0.000000 | 49.403603 | 0.000000 | 24.701801 | ... | 0.000000 | 49.403603 | 49.403603 | 0.000000 | 74.105408 | 24.701801 | 0.000000 | 24.701801 | 98.807205 | 49.403603 |
11 | 20.33146 | 539 | 61.066994 | 20.355663 | 20.355663 | 0.000000 | 20.355663 | 20.355663 | 40.711327 | 40.711327 | ... | 20.355663 | 40.711327 | 20.355663 | 40.711327 | 61.066994 | 0.000000 | 0.000000 | 20.355663 | 20.355663 | 20.355663 |
12 | 24.63370 | 1021 | 49.326050 | 24.663025 | 98.652100 | 24.663025 | 0.000000 | 73.989075 | 24.663025 | 0.000000 | ... | 24.663025 | 0.000000 | 49.326050 | 0.000000 | 24.663025 | 73.989075 | 73.989075 | 24.663025 | 24.663025 | 0.000000 |
13 | 21.83328 | 1049 | 65.577812 | 21.859272 | 21.859272 | 21.859272 | 0.000000 | 65.577812 | 21.859272 | 21.859272 | ... | 0.000000 | 65.577812 | 0.000000 | 0.000000 | 21.859272 | 43.718544 | 21.859272 | 43.718544 | 0.000000 | 21.859272 |
14 | 18.67915 | 1134 | 0.000000 | 18.701387 | 37.402775 | 0.000000 | 37.402775 | 0.000000 | 0.000000 | 56.104160 | ... | 18.701387 | 18.701387 | 0.000000 | 18.701387 | 0.000000 | 56.104160 | 0.000000 | 56.104160 | 0.000000 | 56.104160 |
15 rows × 52 columns
Let’s estimate the average birth weight using the bootstrap weights.
# rep_wgt_boot = nmihsboot.loc[:, "bsrw1":"bsrw50"]
= ReplicateEstimator(RepMethod.bootstrap, PopParam.mean).estimate(
birthwgt =nmihs["birth_weight"],
y=nmihs["finalwgt"],
samp_weight=nmihs.loc[:, "bsrw1":"bsrw50"],
rep_weights=True,
remove_nan
)
print(birthwgt)
SAMPLICS - Estimation of Mean
Number of strata: None
Number of psus: None
Degree of freedom: 49
MEAN SE LCI UCI CV
2679.127143 31.053792 2616.722212 2741.532074 0.011591
Balanced repeated replication (BRR)
# Load NMIHS sample data
= load_nhanes2brr()
nhanes2brr_dict = nhanes2brr_dict["data"]
nhanes2brr
15) nhanes2brr.head(
height | weight | finalwgt | brr_1 | brr_2 | brr_3 | brr_4 | brr_5 | brr_6 | brr_7 | ... | brr_23 | brr_24 | brr_25 | brr_26 | brr_27 | brr_28 | brr_29 | brr_30 | brr_31 | brr_32 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 174.59801 | 62.480000 | 8995 | 0 | 17990 | 17990 | 0 | 17990 | 0 | 0 | ... | 17990 | 0 | 0 | 17990 | 17990 | 0 | 17990 | 0 | 0 | 17990 |
1 | 152.29700 | 48.759998 | 25964 | 0 | 51928 | 51928 | 0 | 51928 | 0 | 0 | ... | 51928 | 0 | 0 | 51928 | 51928 | 0 | 51928 | 0 | 0 | 51928 |
2 | 164.09801 | 67.250000 | 8752 | 0 | 17504 | 17504 | 0 | 17504 | 0 | 0 | ... | 17504 | 0 | 0 | 17504 | 17504 | 0 | 17504 | 0 | 0 | 17504 |
3 | 162.59801 | 94.459999 | 4310 | 0 | 8620 | 8620 | 0 | 8620 | 0 | 0 | ... | 8620 | 0 | 0 | 8620 | 8620 | 0 | 8620 | 0 | 0 | 8620 |
4 | 163.09801 | 74.279999 | 9011 | 0 | 18022 | 18022 | 0 | 18022 | 0 | 0 | ... | 18022 | 0 | 0 | 18022 | 18022 | 0 | 18022 | 0 | 0 | 18022 |
5 | 147.09801 | 66.000000 | 4310 | 0 | 8620 | 8620 | 0 | 8620 | 0 | 0 | ... | 8620 | 0 | 0 | 8620 | 8620 | 0 | 8620 | 0 | 0 | 8620 |
6 | 153.89799 | 54.549999 | 3201 | 0 | 6402 | 6402 | 0 | 6402 | 0 | 0 | ... | 6402 | 0 | 0 | 6402 | 6402 | 0 | 6402 | 0 | 0 | 6402 |
7 | 160.00000 | 58.970001 | 25386 | 0 | 50772 | 50772 | 0 | 50772 | 0 | 0 | ... | 50772 | 0 | 0 | 50772 | 50772 | 0 | 50772 | 0 | 0 | 50772 |
8 | 164.00000 | 68.949997 | 12102 | 0 | 24204 | 24204 | 0 | 24204 | 0 | 0 | ... | 24204 | 0 | 0 | 24204 | 24204 | 0 | 24204 | 0 | 0 | 24204 |
9 | 176.59801 | 65.430000 | 4312 | 0 | 8624 | 8624 | 0 | 8624 | 0 | 0 | ... | 8624 | 0 | 0 | 8624 | 8624 | 0 | 8624 | 0 | 0 | 8624 |
10 | 156.19901 | 76.769997 | 4031 | 0 | 8062 | 8062 | 0 | 8062 | 0 | 0 | ... | 8062 | 0 | 0 | 8062 | 8062 | 0 | 8062 | 0 | 0 | 8062 |
11 | 170.09801 | 58.400002 | 3628 | 0 | 7256 | 7256 | 0 | 7256 | 0 | 0 | ... | 7256 | 0 | 0 | 7256 | 7256 | 0 | 7256 | 0 | 0 | 7256 |
12 | 151.79700 | 65.769997 | 28590 | 0 | 57180 | 57180 | 0 | 57180 | 0 | 0 | ... | 57180 | 0 | 0 | 57180 | 57180 | 0 | 57180 | 0 | 0 | 57180 |
13 | 154.19901 | 48.540001 | 22754 | 0 | 45508 | 45508 | 0 | 45508 | 0 | 0 | ... | 45508 | 0 | 0 | 45508 | 45508 | 0 | 45508 | 0 | 0 | 45508 |
14 | 171.09801 | 73.029999 | 7119 | 0 | 14238 | 14238 | 0 | 14238 | 0 | 0 | ... | 14238 | 0 | 0 | 14238 | 14238 | 0 | 14238 | 0 | 0 | 14238 |
15 rows × 35 columns
Let’s estimate the average birth weight using the BRR weights.
= ReplicateEstimator(RepMethod.brr, PopParam.ratio)
brr
= brr.estimate(
ratio_wgt_hgt =nhanes2brr["weight"],
y=nhanes2brr["finalwgt"],
samp_weight=nhanes2brr["height"],
x=nhanes2brr.loc[:, "brr_1":"brr_32"],
rep_weights=True,
remove_nan
)
print(ratio_wgt_hgt)
SAMPLICS - Estimation of Ratio
Number of strata: None
Number of psus: None
Degree of freedom: 16
RATIO SE LCI UCI CV
0.426082 0.00273 0.420295 0.43187 0.006407
Jackknife
# Load NMIHS sample data
= load_nhanes2jk()
nhanes2jk_dict = nhanes2jk_dict["data"]
nhanes2jk
15) nhanes2jk.head(
height | weight | finalwgt | jkw_1 | jkw_2 | jkw_3 | jkw_4 | jkw_5 | jkw_6 | jkw_7 | ... | jkw_53 | jkw_54 | jkw_55 | jkw_56 | jkw_57 | jkw_58 | jkw_59 | jkw_60 | jkw_61 | jkw_62 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 174.59801 | 62.480000 | 8995 | 0 | 17990 | 8995 | 8995 | 8995 | 8995 | 8995 | ... | 8995 | 8995 | 8995 | 8995 | 8995 | 8995 | 8995 | 8995 | 8995 | 8995 |
1 | 152.29700 | 48.759998 | 25964 | 0 | 51928 | 25964 | 25964 | 25964 | 25964 | 25964 | ... | 25964 | 25964 | 25964 | 25964 | 25964 | 25964 | 25964 | 25964 | 25964 | 25964 |
2 | 164.09801 | 67.250000 | 8752 | 0 | 17504 | 8752 | 8752 | 8752 | 8752 | 8752 | ... | 8752 | 8752 | 8752 | 8752 | 8752 | 8752 | 8752 | 8752 | 8752 | 8752 |
3 | 162.59801 | 94.459999 | 4310 | 0 | 8620 | 4310 | 4310 | 4310 | 4310 | 4310 | ... | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 |
4 | 163.09801 | 74.279999 | 9011 | 0 | 18022 | 9011 | 9011 | 9011 | 9011 | 9011 | ... | 9011 | 9011 | 9011 | 9011 | 9011 | 9011 | 9011 | 9011 | 9011 | 9011 |
5 | 147.09801 | 66.000000 | 4310 | 0 | 8620 | 4310 | 4310 | 4310 | 4310 | 4310 | ... | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 | 4310 |
6 | 153.89799 | 54.549999 | 3201 | 0 | 6402 | 3201 | 3201 | 3201 | 3201 | 3201 | ... | 3201 | 3201 | 3201 | 3201 | 3201 | 3201 | 3201 | 3201 | 3201 | 3201 |
7 | 160.00000 | 58.970001 | 25386 | 0 | 50772 | 25386 | 25386 | 25386 | 25386 | 25386 | ... | 25386 | 25386 | 25386 | 25386 | 25386 | 25386 | 25386 | 25386 | 25386 | 25386 |
8 | 164.00000 | 68.949997 | 12102 | 0 | 24204 | 12102 | 12102 | 12102 | 12102 | 12102 | ... | 12102 | 12102 | 12102 | 12102 | 12102 | 12102 | 12102 | 12102 | 12102 | 12102 |
9 | 176.59801 | 65.430000 | 4312 | 0 | 8624 | 4312 | 4312 | 4312 | 4312 | 4312 | ... | 4312 | 4312 | 4312 | 4312 | 4312 | 4312 | 4312 | 4312 | 4312 | 4312 |
10 | 156.19901 | 76.769997 | 4031 | 0 | 8062 | 4031 | 4031 | 4031 | 4031 | 4031 | ... | 4031 | 4031 | 4031 | 4031 | 4031 | 4031 | 4031 | 4031 | 4031 | 4031 |
11 | 156.69901 | 57.040001 | 3813 | 7626 | 0 | 3813 | 3813 | 3813 | 3813 | 3813 | ... | 3813 | 3813 | 3813 | 3813 | 3813 | 3813 | 3813 | 3813 | 3813 | 3813 |
12 | 182.00000 | 99.110001 | 7445 | 14890 | 0 | 7445 | 7445 | 7445 | 7445 | 7445 | ... | 7445 | 7445 | 7445 | 7445 | 7445 | 7445 | 7445 | 7445 | 7445 | 7445 |
13 | 158.29700 | 84.480003 | 3528 | 7056 | 0 | 3528 | 3528 | 3528 | 3528 | 3528 | ... | 3528 | 3528 | 3528 | 3528 | 3528 | 3528 | 3528 | 3528 | 3528 | 3528 |
14 | 152.89799 | 70.309998 | 7790 | 15580 | 0 | 7790 | 7790 | 7790 | 7790 | 7790 | ... | 7790 | 7790 | 7790 | 7790 | 7790 | 7790 | 7790 | 7790 | 7790 | 7790 |
15 rows × 65 columns
In this case, stratification was used to calculate the jackknife weights. The stratum variable is not indicated in the dataset or survey design description. However, it says that the number of strata is 31 and the number of replicates is 62. Hence, the jackknife replicate coefficient is \((n_h - 1) / n_h = (2-1) / 2 = 0.5\). Now we can call replicate() and specify rep_coefs = 0.5.
= ReplicateEstimator(RepMethod.jackknife, PopParam.ratio)
jackknife
= jackknife.estimate(
ratio_wgt_hgt2 =nhanes2jk["weight"],
y=nhanes2jk["finalwgt"],
samp_weight=nhanes2jk["height"],
x=nhanes2jk.loc[:, "jkw_1":"jkw_62"],
rep_weights=0.5,
rep_coefs=True,
remove_nan
)
print(ratio_wgt_hgt2)
SAMPLICS - Estimation of Ratio
Number of strata: None
Number of psus: None
Degree of freedom: 61
RATIO SE LCI UCI CV
0.423502 0.003464 0.416574 0.430429 0.00818