Source code for moderndid.drdid.propensity.ipw_estimators
"""Inverse propensity weighted (IPW) estimators for DiD."""
import warnings
import numpy as np
[docs]
def ipw_rc(y, post, d, ps, i_weights, trim_ps=None):
r"""Compute the inverse propensity weighted (IPW) estimator for repeated cross-sections.
This function implements the inverse propensity weighted (IPW) estimator from
[1]_ for repeated cross-sections. The weights are not normalized to sum to 1, e.g., the estimator is
of the Horwitz-Thompson type.
The IPW estimator for the ATT in repeated cross-sections is given by
.. math::
\tau^{ipw, rc} = \frac{1}{\mathbb{E}[D]} \mathbb{E}\left[
\left(\frac{D - \hat{\pi}(X)}{1 - \hat{\pi}(X)}\right)
\left(\frac{T - \lambda}{\lambda(1-\lambda)}\right) Y\right]
where :math:`D` is the treatment status, :math:`T` is the time period (1 for post-treatment,
0 for pre-treatment), :math:`Y` is the outcome, :math:`X` are covariates,
:math:`\hat{\pi}(X)` is an estimator of the propensity score :math:`\pi(X) = P(D=1|X)`,
and :math:`\lambda = P(T=1)` is the probability of being in the post-treatment
period.
Parameters
----------
y : ndarray
A 1D array representing the outcome variable for each unit.
post : ndarray
A 1D array representing the post-treatment period indicator (1 for post, 0 for pre)
for each unit.
d : ndarray
A 1D array representing the treatment indicator (1 for treated, 0 for control)
for each unit.
ps : ndarray
A 1D array of propensity scores (estimated probability of being treated,
:math:`P(D=1|X)`) for each unit.
i_weights : ndarray
A 1D array of individual observation weights for each unit.
trim_ps : ndarray or None
A 1D boolean array indicating which units to keep after trimming.
If None, no trimming is applied (all units are kept).
Returns
-------
float
The IPW ATT estimate for repeated cross-sections.
See Also
--------
wboot_ipw_rc : Bootstrap inference for IPW DiD.
References
----------
.. [1] Abadie, A. (2005). Semiparametric difference-in-differences estimators.
The Review of Economic Studies, 72(1), 1-19.
https://www.jstor.org/stable/3700681
"""
arrays = {"y": y, "post": post, "d": d, "ps": ps, "i_weights": i_weights}
if trim_ps is not None:
arrays["trim_ps"] = trim_ps
if not all(isinstance(arr, np.ndarray) for arr in arrays.values()):
raise TypeError("All inputs must be NumPy arrays.")
if not all(arr.ndim == 1 for arr in arrays.values()):
raise ValueError("All input arrays must be 1-dimensional.")
first_shape = next(iter(arrays.values())).shape
if not all(arr.shape == first_shape for arr in arrays.values()):
raise ValueError("All input arrays must have the same shape.")
if trim_ps is None:
trim_ps = np.ones_like(d, dtype=bool)
lambda_val = np.mean(i_weights * trim_ps * post)
if lambda_val in (0, 1):
warnings.warn(f"Lambda is {lambda_val}, cannot compute IPW estimator.", UserWarning)
return np.nan
denominator_ps = 1 - ps
problematic_ps = (denominator_ps == 0) & (d == 0)
if np.any(problematic_ps):
warnings.warn(
"Propensity score is 1 for some control units, cannot compute IPW.",
UserWarning,
)
return np.nan
with np.errstate(divide="ignore", invalid="ignore"):
ipw_term = d - ps * (1 - d) / denominator_ps
time_adj = (post - lambda_val) / (lambda_val * (1 - lambda_val))
numerator = np.mean(i_weights * trim_ps * ipw_term * time_adj * y)
denominator = np.mean(i_weights * d)
if denominator == 0:
warnings.warn("No treated units found (denominator is 0).", UserWarning)
return np.nan
att = numerator / denominator
if not np.isfinite(att):
warnings.warn(f"IPW estimator is not finite: {att}.", UserWarning)
return np.nan
return float(att)