"""Two-way fixed effects DiD estimator for repeated cross-sections."""
from typing import NamedTuple
import numpy as np
import statsmodels.api as sm
from scipy import stats
from moderndid.cupy.backend import get_backend, to_numpy
from moderndid.cupy.regression import cupy_wls
from ..bootstrap.boot_mult import mboot_did
from ..bootstrap.boot_twfe_rc import wboot_twfe_rc
class TWFEDIDRCResult(NamedTuple):
"""Result from the two-way fixed effects DiD RC estimator."""
att: float
se: float
uci: float
lci: float
boots: np.ndarray | None
att_inf_func: np.ndarray | None
args: dict
[docs]
def twfe_did_rc(
y,
post,
d,
covariates=None,
i_weights=None,
boot=False,
boot_type="weighted",
nboot=999,
influence_func=False,
):
r"""Compute linear two-way fixed effects DiD estimator for the ATT with repeated cross-sections.
Implements the linear two-way fixed effects (TWFE) estimator for the ATT with repeated cross-section data,
as illustrated in [1]_. The estimator is based on the regression model from equation (2.5) of [1]_ as
.. math::
Y_{it} = \alpha_1 + \alpha_2 T_i + \alpha_3 D_i + \tau^{fe}(T_i \cdot D_i)
+ \theta' X_i + \varepsilon_{it}.
Parameters
----------
y : ndarray
A 1D array of outcomes from both pre and post-treatment periods.
post : ndarray
A 1D array of post-treatment dummies (1 if observation belongs to post-treatment
period, 0 if observation belongs to pre-treatment period).
d : ndarray
A 1D array of group indicators (1 if observation is treated in the post-treatment
period, 0 otherwise).
covariates : ndarray, optional
A 2D array of covariates to be used in the regression estimation. We will always
include an intercept.
i_weights : ndarray, optional
A 1D array of weights. If None, then every observation has equal weight.
Weights are normalized to have mean 1.
boot : bool, default=False
Whether to compute bootstrap standard errors.
boot_type : {"weighted", "multiplier"}, default="weighted"
Type of bootstrap to be performed (not relevant if boot = False).
nboot : int, default=999
Number of bootstrap repetitions (not relevant if boot = False).
influence_func : bool, default=False
Whether to return the influence function.
Returns
-------
TWFEDIDRCResult
A NamedTuple containing the TWFE DiD point estimate, standard error, confidence interval,
bootstrap draws, and influence function.
Warnings
--------
This estimator generally does not recover the ATT. We encourage users to adopt alternative specifications.
See Also
--------
reg_did_rc : Outcome regression DiD for repeated cross-sections.
drdid_imp_rc : Improved doubly robust DiD for repeated cross-sections.
ipw_did_rc : Inverse propensity weighted DiD for repeated cross-sections.
References
----------
.. [1] Sant'Anna, P. H. C. and Zhao, J. (2020), "Doubly Robust Difference-in-Differences Estimators."
Journal of Econometrics, Vol. 219 (1), pp. 101-122. https://doi.org/10.1016/j.jeconom.2020.06.003
"""
y, post, d, x, i_weights, n = _validate_and_preprocess_inputs(y, post, d, covariates, i_weights)
att, att_inf_func = _fit_twfe_regression(y, post, d, x, i_weights, n)
if not boot:
se = np.std(att_inf_func, ddof=1) / np.sqrt(n)
uci = att + 1.96 * se
lci = att - 1.96 * se
boots = None
else:
if nboot is None:
nboot = 999
if boot_type == "multiplier":
boots = mboot_did(att_inf_func, nboot)
se = stats.iqr(boots) / (stats.norm.ppf(0.75) - stats.norm.ppf(0.25))
critical_value = np.quantile(np.abs(boots / se), 0.95)
uci = att + critical_value * se
lci = att - critical_value * se
else: # "weighted"
boots = wboot_twfe_rc(
y=y,
post=post,
d=d,
x=x if x is not None else np.empty((n, 0)),
i_weights=i_weights,
n_bootstrap=nboot,
)
se = stats.iqr(boots - att) / (stats.norm.ppf(0.75) - stats.norm.ppf(0.25))
critical_value = np.quantile(np.abs((boots - att) / se), 0.95)
uci = att + critical_value * se
lci = att - critical_value * se
if not influence_func:
att_inf_func = None
return TWFEDIDRCResult(
att=att,
se=se,
uci=uci,
lci=lci,
boots=boots,
att_inf_func=att_inf_func,
args={
"panel": False,
"boot": boot,
"boot_type": boot_type,
"nboot": nboot,
"type": "twfe",
},
)
def _validate_and_preprocess_inputs(y, post, d, covariates, i_weights):
"""Validate and preprocess input arrays."""
d = np.asarray(d).flatten()
post = np.asarray(post).flatten()
y = np.asarray(y).flatten()
n = len(d)
if i_weights is None:
i_weights = np.ones(n)
else:
i_weights = np.asarray(i_weights).flatten()
if np.any(i_weights < 0):
raise ValueError("i_weights must be non-negative.")
i_weights = i_weights / np.mean(i_weights)
x = None
if covariates is not None:
covariates = np.asarray(covariates)
if covariates.ndim == 1:
covariates = covariates.reshape(-1, 1)
if covariates.shape[1] > 0 and np.all(covariates[:, 0] == 1):
covariates = covariates[:, 1:]
if covariates.shape[1] == 0:
covariates = None
x = None
if covariates is not None:
x = covariates
return y, post, d, x, i_weights, n
def _fit_twfe_regression(y, post, d, x, i_weights, n):
"""Fit TWFE regression and compute influence function."""
if x is not None:
design_matrix = np.column_stack(
[
np.ones(n),
post,
d,
d * post,
x,
]
)
else:
design_matrix = np.column_stack(
[
np.ones(n),
post,
d,
d * post,
]
)
try:
xp = get_backend()
if xp is not np:
beta, _ = cupy_wls(xp.asarray(y), xp.asarray(design_matrix), xp.asarray(i_weights))
params = to_numpy(beta)
att = params[3]
fitted = design_matrix @ params
residuals = y - fitted
else:
wls_model = sm.WLS(y, design_matrix, weights=i_weights)
wls_results = wls_model.fit()
att = wls_results.params[3]
residuals = wls_results.resid
# Elements for influence function
x_prime_x = design_matrix.T @ (i_weights[:, np.newaxis] * design_matrix) / n
if np.linalg.cond(x_prime_x) > 1e15:
raise ValueError("The regression design matrix is singular. Consider removing some covariates.")
x_prime_x_inv = np.linalg.inv(x_prime_x)
# Influence function of the TWFE regression
influence_reg = (i_weights[:, np.newaxis] * design_matrix * residuals[:, np.newaxis]) @ x_prime_x_inv
selection_theta = np.zeros(design_matrix.shape[1])
selection_theta[3] = 1 # d:post interaction is at index 3
# Influence function of the ATT
att_inf_func = influence_reg @ selection_theta
except (np.linalg.LinAlgError, ValueError) as e:
raise ValueError(f"Failed to fit TWFE regression model: {e}") from e
return att, att_inf_func