"""Bootstrap functions for standardized IPW estimators with repeated cross-sections."""
import warnings
import numpy as np
import statsmodels.api as sm
from moderndid.cupy.backend import get_backend, to_numpy
from moderndid.cupy.regression import cupy_logistic_irls
from ..utils import _validate_inputs
[docs]
def wboot_std_ipw_rc(
y,
post,
d,
x,
i_weights,
n_bootstrap=1000,
trim_level=0.995,
random_state=None,
):
r"""Compute bootstrap standardized IPW DiD estimator for repeated cross-sections.
Implements the bootstrap procedure for computing standardized inverse
probability weighted (IPW) difference-in-differences estimates with
repeated cross-sectional data. The standardized IPW estimator normalizes
the weighted outcomes by the sum of weights within each group-period cell.
Parameters
----------
y : ndarray
Outcome variable array of shape (n_units,).
post : ndarray
Post-treatment period indicator array of shape (n_units,).
Must contain only 0 and 1 values.
d : ndarray
Treatment group indicator array of shape (n_units,).
Must contain only 0 and 1 values.
x : ndarray
Covariate matrix of shape (n_units, n_features) including intercept.
i_weights : ndarray
Individual observation weights of shape (n_units,).
n_bootstrap : int, default=1000
Number of bootstrap iterations.
trim_level : float, default=0.995
Trimming threshold for propensity scores. Control units with
propensity scores above this level are given zero weight.
random_state : int | np.random.Generator | None, default=None
Controls random number generation for reproducibility.
Returns
-------
ndarray
Bootstrap estimates of shape (n_bootstrap,) containing the
standardized IPW DiD estimates for each bootstrap iteration.
See Also
--------
wboot_ipw_rc : Non-standardized IPW bootstrap for repeated cross-sections.
wboot_aipw_rc : Bootstrap AIPW estimator for repeated cross-sections.
"""
arrays_dict = {
"y": y,
"post": post,
"d": d,
"i_weights": i_weights,
}
n_units = _validate_inputs(arrays_dict, x, n_bootstrap, trim_level)
rng = np.random.default_rng(random_state)
bootstrap_estimates = np.zeros(n_bootstrap)
for b in range(n_bootstrap):
v = rng.exponential(scale=1.0, size=n_units)
b_weights = i_weights * v
try:
xp = get_backend()
if xp is not np:
_, ps_b = cupy_logistic_irls(
xp.asarray(d, dtype=xp.float64),
xp.asarray(x, dtype=xp.float64),
xp.asarray(b_weights, dtype=xp.float64),
)
ps_b = to_numpy(ps_b)
else:
logit_model = sm.GLM(d, x, family=sm.families.Binomial(), freq_weights=b_weights)
logit_results = logit_model.fit()
ps_b = logit_results.predict(x)
ps_b = np.clip(ps_b, 1e-6, 1 - 1e-6)
except (ValueError, np.linalg.LinAlgError) as e:
warnings.warn(
f"Bootstrap iteration {b}: Propensity score estimation failed: {e}",
UserWarning,
)
bootstrap_estimates[b] = np.nan
continue
trim_ps_mask = np.ones_like(ps_b, dtype=bool)
control_mask = d == 0
trim_ps_mask[control_mask] = ps_b[control_mask] < trim_level
b_weights_trimmed = b_weights.copy()
b_weights_trimmed[~trim_ps_mask] = 0
try:
w_treat_pre = b_weights_trimmed * d * (1 - post)
w_treat_post = b_weights_trimmed * d * post
w_cont_pre = b_weights_trimmed * ps_b * (1 - d) * (1 - post) / (1 - ps_b)
w_cont_post = b_weights_trimmed * ps_b * (1 - d) * post / (1 - ps_b)
w_cont_pre[~trim_ps_mask] = 0
w_cont_post[~trim_ps_mask] = 0
denom_treat_pre = np.sum(w_treat_pre)
denom_treat_post = np.sum(w_treat_post)
denom_cont_pre = np.sum(w_cont_pre)
denom_cont_post = np.sum(w_cont_post)
if denom_treat_pre == 0 or denom_treat_post == 0 or denom_cont_pre == 0 or denom_cont_post == 0:
warnings.warn(
f"Bootstrap iteration {b}: Zero weight sum in one or more groups.",
UserWarning,
)
bootstrap_estimates[b] = np.nan
continue
ipw_treat = np.sum(w_treat_post * y) / denom_treat_post - np.sum(w_treat_pre * y) / denom_treat_pre
ipw_control = np.sum(w_cont_post * y) / denom_cont_post - np.sum(w_cont_pre * y) / denom_cont_pre
bootstrap_estimates[b] = ipw_treat - ipw_control
except (ValueError, ZeroDivisionError) as e:
warnings.warn(
f"Bootstrap iteration {b}: IPW computation failed: {e}",
UserWarning,
)
bootstrap_estimates[b] = np.nan
continue
except RuntimeWarning:
warnings.warn(
f"Bootstrap iteration {b}: Numerical warning in IPW computation.",
UserWarning,
)
bootstrap_estimates[b] = np.nan
continue
n_failed = np.sum(np.isnan(bootstrap_estimates))
if n_failed > 0:
pct_failed = (n_failed / n_bootstrap) * 100
warnings.warn(
f"{n_failed} out of {n_bootstrap} bootstrap iterations failed ({pct_failed:.1f}%). "
f"Results based on {n_bootstrap - n_failed} successful iterations.",
UserWarning,
)
return bootstrap_estimates