Source code for moderndid.drdid.estimators.twfe_did_rc

"""Two-way fixed effects DiD estimator for repeated cross-sections."""

from typing import NamedTuple

import numpy as np
import statsmodels.api as sm
from scipy import stats

from moderndid.cupy.backend import get_backend, to_numpy
from moderndid.cupy.regression import cupy_wls

from ..bootstrap.boot_mult import mboot_did
from ..bootstrap.boot_twfe_rc import wboot_twfe_rc


class TWFEDIDRCResult(NamedTuple):
    """Result from the two-way fixed effects DiD RC estimator."""

    att: float
    se: float
    uci: float
    lci: float
    boots: np.ndarray | None
    att_inf_func: np.ndarray | None
    args: dict


[docs] def twfe_did_rc( y, post, d, covariates=None, i_weights=None, boot=False, boot_type="weighted", nboot=999, influence_func=False, ): r"""Compute linear two-way fixed effects DiD estimator for the ATT with repeated cross-sections. Implements the linear two-way fixed effects (TWFE) estimator for the ATT with repeated cross-section data, as illustrated in [1]_. The estimator is based on the regression model from equation (2.5) of [1]_ as .. math:: Y_{it} = \alpha_1 + \alpha_2 T_i + \alpha_3 D_i + \tau^{fe}(T_i \cdot D_i) + \theta' X_i + \varepsilon_{it}. Parameters ---------- y : ndarray A 1D array of outcomes from both pre and post-treatment periods. post : ndarray A 1D array of post-treatment dummies (1 if observation belongs to post-treatment period, 0 if observation belongs to pre-treatment period). d : ndarray A 1D array of group indicators (1 if observation is treated in the post-treatment period, 0 otherwise). covariates : ndarray, optional A 2D array of covariates to be used in the regression estimation. We will always include an intercept. i_weights : ndarray, optional A 1D array of weights. If None, then every observation has equal weight. Weights are normalized to have mean 1. boot : bool, default=False Whether to compute bootstrap standard errors. boot_type : {"weighted", "multiplier"}, default="weighted" Type of bootstrap to be performed (not relevant if boot = False). nboot : int, default=999 Number of bootstrap repetitions (not relevant if boot = False). influence_func : bool, default=False Whether to return the influence function. Returns ------- TWFEDIDRCResult A NamedTuple containing the TWFE DiD point estimate, standard error, confidence interval, bootstrap draws, and influence function. Warnings -------- This estimator generally does not recover the ATT. We encourage users to adopt alternative specifications. See Also -------- reg_did_rc : Outcome regression DiD for repeated cross-sections. drdid_imp_rc : Improved doubly robust DiD for repeated cross-sections. ipw_did_rc : Inverse propensity weighted DiD for repeated cross-sections. References ---------- .. [1] Sant'Anna, P. H. C. and Zhao, J. (2020), "Doubly Robust Difference-in-Differences Estimators." Journal of Econometrics, Vol. 219 (1), pp. 101-122. https://doi.org/10.1016/j.jeconom.2020.06.003 """ y, post, d, x, i_weights, n = _validate_and_preprocess_inputs(y, post, d, covariates, i_weights) att, att_inf_func = _fit_twfe_regression(y, post, d, x, i_weights, n) if not boot: se = np.std(att_inf_func, ddof=1) / np.sqrt(n) uci = att + 1.96 * se lci = att - 1.96 * se boots = None else: if nboot is None: nboot = 999 if boot_type == "multiplier": boots = mboot_did(att_inf_func, nboot) se = stats.iqr(boots) / (stats.norm.ppf(0.75) - stats.norm.ppf(0.25)) critical_value = np.quantile(np.abs(boots / se), 0.95) uci = att + critical_value * se lci = att - critical_value * se else: # "weighted" boots = wboot_twfe_rc( y=y, post=post, d=d, x=x if x is not None else np.empty((n, 0)), i_weights=i_weights, n_bootstrap=nboot, ) se = stats.iqr(boots - att) / (stats.norm.ppf(0.75) - stats.norm.ppf(0.25)) critical_value = np.quantile(np.abs((boots - att) / se), 0.95) uci = att + critical_value * se lci = att - critical_value * se if not influence_func: att_inf_func = None return TWFEDIDRCResult( att=att, se=se, uci=uci, lci=lci, boots=boots, att_inf_func=att_inf_func, args={ "panel": False, "boot": boot, "boot_type": boot_type, "nboot": nboot, "type": "twfe", }, )
def _validate_and_preprocess_inputs(y, post, d, covariates, i_weights): """Validate and preprocess input arrays.""" d = np.asarray(d).flatten() post = np.asarray(post).flatten() y = np.asarray(y).flatten() n = len(d) if i_weights is None: i_weights = np.ones(n) else: i_weights = np.asarray(i_weights).flatten() if np.any(i_weights < 0): raise ValueError("i_weights must be non-negative.") i_weights = i_weights / np.mean(i_weights) x = None if covariates is not None: covariates = np.asarray(covariates) if covariates.ndim == 1: covariates = covariates.reshape(-1, 1) if covariates.shape[1] > 0 and np.all(covariates[:, 0] == 1): covariates = covariates[:, 1:] if covariates.shape[1] == 0: covariates = None x = None if covariates is not None: x = covariates return y, post, d, x, i_weights, n def _fit_twfe_regression(y, post, d, x, i_weights, n): """Fit TWFE regression and compute influence function.""" if x is not None: design_matrix = np.column_stack( [ np.ones(n), post, d, d * post, x, ] ) else: design_matrix = np.column_stack( [ np.ones(n), post, d, d * post, ] ) try: xp = get_backend() if xp is not np: beta, _ = cupy_wls(xp.asarray(y), xp.asarray(design_matrix), xp.asarray(i_weights)) params = to_numpy(beta) att = params[3] fitted = design_matrix @ params residuals = y - fitted else: wls_model = sm.WLS(y, design_matrix, weights=i_weights) wls_results = wls_model.fit() att = wls_results.params[3] residuals = wls_results.resid # Elements for influence function x_prime_x = design_matrix.T @ (i_weights[:, np.newaxis] * design_matrix) / n if np.linalg.cond(x_prime_x) > 1e15: raise ValueError("The regression design matrix is singular. Consider removing some covariates.") x_prime_x_inv = np.linalg.inv(x_prime_x) # Influence function of the TWFE regression influence_reg = (i_weights[:, np.newaxis] * design_matrix * residuals[:, np.newaxis]) @ x_prime_x_inv selection_theta = np.zeros(design_matrix.shape[1]) selection_theta[3] = 1 # d:post interaction is at index 3 # Influence function of the ATT att_inf_func = influence_reg @ selection_theta except (np.linalg.LinAlgError, ValueError) as e: raise ValueError(f"Failed to fit TWFE regression model: {e}") from e return att, att_inf_func