Source code for moderndid.drdid.bootstrap.boot_std_ipw_rc

"""Bootstrap functions for standardized IPW estimators with repeated cross-sections."""

import warnings

import numpy as np
import statsmodels.api as sm

from moderndid.cupy.backend import get_backend, to_numpy
from moderndid.cupy.regression import cupy_logistic_irls

from ..utils import _validate_inputs


[docs] def wboot_std_ipw_rc( y, post, d, x, i_weights, n_bootstrap=1000, trim_level=0.995, random_state=None, ): r"""Compute bootstrap standardized IPW DiD estimator for repeated cross-sections. Implements the bootstrap procedure for computing standardized inverse probability weighted (IPW) difference-in-differences estimates with repeated cross-sectional data. The standardized IPW estimator normalizes the weighted outcomes by the sum of weights within each group-period cell. Parameters ---------- y : ndarray Outcome variable array of shape (n_units,). post : ndarray Post-treatment period indicator array of shape (n_units,). Must contain only 0 and 1 values. d : ndarray Treatment group indicator array of shape (n_units,). Must contain only 0 and 1 values. x : ndarray Covariate matrix of shape (n_units, n_features) including intercept. i_weights : ndarray Individual observation weights of shape (n_units,). n_bootstrap : int, default=1000 Number of bootstrap iterations. trim_level : float, default=0.995 Trimming threshold for propensity scores. Control units with propensity scores above this level are given zero weight. random_state : int | np.random.Generator | None, default=None Controls random number generation for reproducibility. Returns ------- ndarray Bootstrap estimates of shape (n_bootstrap,) containing the standardized IPW DiD estimates for each bootstrap iteration. See Also -------- wboot_ipw_rc : Non-standardized IPW bootstrap for repeated cross-sections. wboot_aipw_rc : Bootstrap AIPW estimator for repeated cross-sections. """ arrays_dict = { "y": y, "post": post, "d": d, "i_weights": i_weights, } n_units = _validate_inputs(arrays_dict, x, n_bootstrap, trim_level) rng = np.random.default_rng(random_state) bootstrap_estimates = np.zeros(n_bootstrap) for b in range(n_bootstrap): v = rng.exponential(scale=1.0, size=n_units) b_weights = i_weights * v try: xp = get_backend() if xp is not np: _, ps_b = cupy_logistic_irls( xp.asarray(d, dtype=xp.float64), xp.asarray(x, dtype=xp.float64), xp.asarray(b_weights, dtype=xp.float64), ) ps_b = to_numpy(ps_b) else: logit_model = sm.GLM(d, x, family=sm.families.Binomial(), freq_weights=b_weights) logit_results = logit_model.fit() ps_b = logit_results.predict(x) ps_b = np.clip(ps_b, 1e-6, 1 - 1e-6) except (ValueError, np.linalg.LinAlgError) as e: warnings.warn( f"Bootstrap iteration {b}: Propensity score estimation failed: {e}", UserWarning, ) bootstrap_estimates[b] = np.nan continue trim_ps_mask = np.ones_like(ps_b, dtype=bool) control_mask = d == 0 trim_ps_mask[control_mask] = ps_b[control_mask] < trim_level b_weights_trimmed = b_weights.copy() b_weights_trimmed[~trim_ps_mask] = 0 try: w_treat_pre = b_weights_trimmed * d * (1 - post) w_treat_post = b_weights_trimmed * d * post w_cont_pre = b_weights_trimmed * ps_b * (1 - d) * (1 - post) / (1 - ps_b) w_cont_post = b_weights_trimmed * ps_b * (1 - d) * post / (1 - ps_b) w_cont_pre[~trim_ps_mask] = 0 w_cont_post[~trim_ps_mask] = 0 denom_treat_pre = np.sum(w_treat_pre) denom_treat_post = np.sum(w_treat_post) denom_cont_pre = np.sum(w_cont_pre) denom_cont_post = np.sum(w_cont_post) if denom_treat_pre == 0 or denom_treat_post == 0 or denom_cont_pre == 0 or denom_cont_post == 0: warnings.warn( f"Bootstrap iteration {b}: Zero weight sum in one or more groups.", UserWarning, ) bootstrap_estimates[b] = np.nan continue ipw_treat = np.sum(w_treat_post * y) / denom_treat_post - np.sum(w_treat_pre * y) / denom_treat_pre ipw_control = np.sum(w_cont_post * y) / denom_cont_post - np.sum(w_cont_pre * y) / denom_cont_pre bootstrap_estimates[b] = ipw_treat - ipw_control except (ValueError, ZeroDivisionError) as e: warnings.warn( f"Bootstrap iteration {b}: IPW computation failed: {e}", UserWarning, ) bootstrap_estimates[b] = np.nan continue except RuntimeWarning: warnings.warn( f"Bootstrap iteration {b}: Numerical warning in IPW computation.", UserWarning, ) bootstrap_estimates[b] = np.nan continue n_failed = np.sum(np.isnan(bootstrap_estimates)) if n_failed > 0: pct_failed = (n_failed / n_bootstrap) * 100 warnings.warn( f"{n_failed} out of {n_bootstrap} bootstrap iterations failed ({pct_failed:.1f}%). " f"Results based on {n_bootstrap - n_failed} successful iterations.", UserWarning, ) return bootstrap_estimates