Source code for moderndid.drdid.propensity.ipw_estimators

"""Inverse propensity weighted (IPW) estimators for DiD."""

import warnings

import numpy as np



[docs]
def ipw_rc(y, post, d, ps, i_weights, trim_ps=None):
    r"""Compute the inverse propensity weighted (IPW) estimator for repeated cross-sections.

    This function implements the inverse propensity weighted (IPW) estimator from
    [1]_ for repeated cross-sections. The weights are not normalized to sum to 1, e.g., the estimator is
    of the Horwitz-Thompson type.

    The IPW estimator for the ATT in repeated cross-sections is given by

    .. math::

        \tau^{ipw, rc} = \frac{1}{\mathbb{E}[D]} \mathbb{E}\left[
        \left(\frac{D - \hat{\pi}(X)}{1 - \hat{\pi}(X)}\right)
        \left(\frac{T - \lambda}{\lambda(1-\lambda)}\right) Y\right]

    where :math:`D` is the treatment status, :math:`T` is the time period (1 for post-treatment,
    0 for pre-treatment), :math:`Y` is the outcome, :math:`X` are covariates,
    :math:`\hat{\pi}(X)` is an estimator of the propensity score :math:`\pi(X) = P(D=1|X)`,
    and :math:`\lambda = P(T=1)` is the probability of being in the post-treatment
    period.

    Parameters
    ----------
    y : ndarray
        A 1D array representing the outcome variable for each unit.
    post : ndarray
        A 1D array representing the post-treatment period indicator (1 for post, 0 for pre)
        for each unit.
    d : ndarray
        A 1D array representing the treatment indicator (1 for treated, 0 for control)
        for each unit.
    ps : ndarray
        A 1D array of propensity scores (estimated probability of being treated,
        :math:`P(D=1|X)`) for each unit.
    i_weights : ndarray
        A 1D array of individual observation weights for each unit.
    trim_ps : ndarray or None
        A 1D boolean array indicating which units to keep after trimming.
        If None, no trimming is applied (all units are kept).

    Returns
    -------
    float
        The IPW ATT estimate for repeated cross-sections.

    See Also
    --------
    wboot_ipw_rc : Bootstrap inference for IPW DiD.

    References
    ----------

    .. [1] Abadie, A. (2005). Semiparametric difference-in-differences estimators.
        The Review of Economic Studies, 72(1), 1-19.
        https://www.jstor.org/stable/3700681
    """
    arrays = {"y": y, "post": post, "d": d, "ps": ps, "i_weights": i_weights}
    if trim_ps is not None:
        arrays["trim_ps"] = trim_ps

    if not all(isinstance(arr, np.ndarray) for arr in arrays.values()):
        raise TypeError("All inputs must be NumPy arrays.")

    if not all(arr.ndim == 1 for arr in arrays.values()):
        raise ValueError("All input arrays must be 1-dimensional.")

    first_shape = next(iter(arrays.values())).shape
    if not all(arr.shape == first_shape for arr in arrays.values()):
        raise ValueError("All input arrays must have the same shape.")

    if trim_ps is None:
        trim_ps = np.ones_like(d, dtype=bool)

    lambda_val = np.mean(i_weights * trim_ps * post)

    if lambda_val in (0, 1):
        warnings.warn(f"Lambda is {lambda_val}, cannot compute IPW estimator.", UserWarning)
        return np.nan

    denominator_ps = 1 - ps
    problematic_ps = (denominator_ps == 0) & (d == 0)
    if np.any(problematic_ps):
        warnings.warn(
            "Propensity score is 1 for some control units, cannot compute IPW.",
            UserWarning,
        )
        return np.nan

    with np.errstate(divide="ignore", invalid="ignore"):
        ipw_term = d - ps * (1 - d) / denominator_ps

    time_adj = (post - lambda_val) / (lambda_val * (1 - lambda_val))
    numerator = np.mean(i_weights * trim_ps * ipw_term * time_adj * y)
    denominator = np.mean(i_weights * d)

    if denominator == 0:
        warnings.warn("No treated units found (denominator is 0).", UserWarning)
        return np.nan

    att = numerator / denominator

    if not np.isfinite(att):
        warnings.warn(f"IPW estimator is not finite: {att}.", UserWarning)
        return np.nan

    return float(att)