Source code for moderndid.didhonest.honest_did

"""Sensitivity analysis using the approach of Rambachan and Roth."""

from typing import NamedTuple, Protocol, runtime_checkable

import numpy as np
import polars as pl

from .sensitivity import (
    OriginalCSResult,
    construct_original_cs,
    create_sensitivity_results_rm,
    create_sensitivity_results_sm,
)
from .utils import basis_vector



[docs]
class HonestDiDResult(NamedTuple):
    """Container for honest_did sensitivity analysis results.

    Attributes
    ----------
    robust_ci : pl.DataFrame
        DataFrame with columns for the smoothness/relative magnitude
        parameter (M or Mbar) and the corresponding lower and upper
        confidence interval bounds that are robust to violations of
        parallel trends.
    original_ci : OriginalCSResult
        Original confidence set assuming exact parallel trends holds.
    sensitivity_type : str
        Type of sensitivity analysis performed ('smoothness' or
        'relative_magnitude').
    """

    #: Robust confidence intervals under violations of parallel trends.
    robust_ci: pl.DataFrame
    #: Original confidence set assuming exact parallel trends.
    original_ci: OriginalCSResult
    #: Type of sensitivity analysis ('smoothness' or 'relative_magnitude').
    sensitivity_type: str



@runtime_checkable
class EventStudyProtocol(Protocol):
    """Protocol for event study result objects."""

    aggregation_type: str
    influence_func: np.ndarray | None
    event_times: np.ndarray | None
    att_by_event: np.ndarray | None
    estimation_params: dict



[docs]
def honest_did(
    event_study,
    event_time=0,
    sensitivity_type="smoothness",
    grid_points=100,
    **kwargs,
):
    r"""Compute sensitivity analysis for event study estimates.

    Implements the approach of [1]_ for robust inference in difference-in-differences
    and event study designs. This method relaxes the parallel trends assumption by
    allowing for bounded violations, providing confidence intervals that remain
    valid under specified deviations from exact parallel trends.

    The event-study coefficient vector :math:`\boldsymbol{\beta}` admits the causal
    decomposition

    .. math::

        \boldsymbol{\beta} = \begin{pmatrix} \boldsymbol{\tau}_{pre} \\
        \boldsymbol{\tau}_{post} \end{pmatrix} + \begin{pmatrix} \boldsymbol{\delta}_{pre} \\
        \boldsymbol{\delta}_{post} \end{pmatrix},

    where :math:`\boldsymbol{\tau}` represents treatment effects and
    :math:`\boldsymbol{\delta}` represents differential trends between treated and
    comparison groups. The conventional parallel trends assumption imposes
    :math:`\boldsymbol{\delta}_{post} = \mathbf{0}`, which this method relaxes by
    assuming :math:`\boldsymbol{\delta} \in \Delta` for a researcher-specified set
    :math:`\Delta`.

    The smoothness restriction bounds the discrete second derivative of the trend
    by a constant :math:`M`

    .. math::

        \Delta^{SD}(M) = \big\{\boldsymbol{\delta}:
        |(\delta_{t+1} - \delta_t) - (\delta_t - \delta_{t-1})| \le M, \forall t \big\}.

    The relative magnitudes restriction bounds post-treatment violations relative to
    the maximum pre-treatment violation

    .. math::

        \Delta^{RM}(\bar{M}) = \big\{\boldsymbol{\delta}:
        |\delta_{t+1} - \delta_t| \le \bar{M} \cdot \max_{s<0} |\delta_{s+1} - \delta_s|,
        \forall t \ge 0 \big\}.

    Parameters
    ----------
    event_study : AGGTEResult or similar
        Event study result object containing influence functions and estimates.
    event_time : int, default=0
        Event time to compute sensitivity analysis for. Default is 0 (on impact).
    sensitivity_type : {'smoothness', 'relative_magnitude'}, default='smoothness'
        Type of sensitivity analysis:

        - 'smoothness': Allows violations of linear trends in pre-treatment periods
        - 'relative_magnitude': Based on relative magnitudes of deviations from parallel trends

    grid_points : int, default=100
        Number of grid points for underlying test inversion.
    **kwargs : Additional parameters
        Additional parameters passed to sensitivity analysis functions:

        - method : CI method ('FLCI', 'Conditional', 'C-F', 'C-LF')
        - m_vec : Vector of M values for smoothness bounds
        - m_bar_vec : Vector of Mbar values for relative magnitude bounds
        - monotonicity_direction : 'increasing' or 'decreasing'
        - bias_direction : 'positive' or 'negative'
        - alpha : Significance level (default 0.05)

    Returns
    -------
    HonestDiDResult
        NamedTuple containing:

        - **robust_ci**: DataFrame with sensitivity analysis results
        - **original_ci**: Original confidence interval
        - **sensitivity_type**: Type of analysis performed

    Examples
    --------
    The ``honest_did`` function performs sensitivity analysis on event study estimates to assess
    the robustness of results to violations of parallel trends. We demonstrate this below with a
    staggered treatment adoption design.

    First, we need to compute an event study. The function requires an event study
    object that contains the dynamic influence functions, which we obtain by first computing group-time
    effects and then aggregating them.

    .. ipython::
        :okwarning:

        In [1]: import numpy as np
           ...: from moderndid import att_gt, aggte, load_mpdta
           ...: from moderndid.didhonest import honest_did
           ...
           ...: df = load_mpdta()
           ...: gt_result = att_gt(
           ...:     data=df,
           ...:     yname="lemp",
           ...:     tname="year",
           ...:     gname="first.treat",
           ...:     idname="countyreal",
           ...:     est_method="dr",
           ...:     boot=False
           ...: )
           ...: es_result = aggte(gt_result, type="dynamic")
           ...: print(es_result)

    Now we can apply the honest DiD sensitivity analysis. We can examine how robust
    our estimated treatment effects are when we allow for potential violations of the parallel
    trends assumption. We'll analyze the on-impact effect (event_time=0) using smoothness
    restrictions, which bound how much pre-treatment trends can deviate.

    .. ipython::
        :okwarning:

        In [2]: hd_result = honest_did(
           ...:     event_study=es_result,
           ...:     event_time=0,
           ...:     sensitivity_type="smoothness",
           ...:     m_vec=[0.01, 0.02, 0.03]
           ...: )
           ...:
           ...: hd_result

    The output shows the original confidence interval (which assumes parallel trends hold exactly)
    and robust confidence intervals under different smoothness bounds. The ``m_vec`` parameter specifies
    different bounds on how much the linear trend can change between consecutive periods. Larger
    values of :math:`M` allow for bigger violations of parallel trends. If the robust CIs remain informative
    even under reasonable violations, this provides evidence that the results are credible.

    References
    ----------

    .. [1] Rambachan, A., & Roth, J. (2021). A more credible approach to
       parallel trends. Review of Economic Studies.
    """
    if hasattr(event_study, "aggregation_type") and isinstance(event_study, EventStudyProtocol):
        return _honest_did_aggte(
            event_study=event_study,
            event_time=event_time,
            sensitivity_type=sensitivity_type,
            grid_points=grid_points,
            **kwargs,
        )
    raise TypeError(
        f"honest_did not implemented for object of type {type(event_study).__name__}. "
        "Expected AGGTEResult or similar event study object."
    )



def _honest_did_aggte(
    event_study,
    event_time=0,
    sensitivity_type="smoothness",
    grid_points=100,
    **kwargs,
):
    """Implement sensitivity analysis for event study objects.

    Parameters
    ----------
    event_study : AGGTEResult
        Event study result from aggte function.
    event_time : int, default=0
        Event time to compute sensitivity analysis for. Default is 0 (on impact).
    sensitivity_type : str
        Type of sensitivity analysis:

        - 'smoothness': Allows violations of linear trends in pre-treatment periods
        - 'relative_magnitude': Based on relative magnitudes of deviations from parallel trends

    grid_points : int, default=100
        Number of grid points for underlying test inversion.
    **kwargs : Additional parameters
        Additional parameters passed to sensitivity analysis functions:

        - method : CI method ('FLCI', 'Conditional', 'C-F', 'C-LF')
        - m_vec : Vector of M values for smoothness bounds
        - m_bar_vec : Vector of Mbar values for relative magnitude bounds
        - monotonicity_direction : 'increasing' or 'decreasing'
        - bias_direction : 'positive' or 'negative'
        - alpha : Significance level (default 0.05)

    Returns
    -------
    HonestDiDResult
        NamedTuple with:

        - robust_ci : DataFrame with sensitivity analysis results
        - original_ci : Original confidence interval
        - sensitivity_type : Type of analysis performed
    """
    if event_study.aggregation_type != "dynamic":
        raise ValueError("honest_did requires an event study (dynamic aggregation).")

    if event_study.influence_func is None:
        raise ValueError("Event study must have influence functions computed.")

    influence_func = event_study.influence_func

    if influence_func.ndim != 2:
        raise ValueError(
            f"Expected 2D influence function matrix for dynamic aggregation, got shape {influence_func.shape}. "
        )

    event_times = event_study.event_times
    att_estimates = event_study.att_by_event

    reference_period = -1

    pre_periods = event_times[event_times < reference_period]
    post_periods = event_times[event_times > reference_period]

    if len(pre_periods) > 1 and not np.all(np.diff(pre_periods) == 1):
        raise ValueError(
            "honest_did expects consecutive pre-treatment periods. Please re-code your event study accordingly."
        )

    if len(post_periods) > 1 and not np.all(np.diff(post_periods) == 1):
        raise ValueError(
            "honest_did expects consecutive post-treatment periods. Please re-code your event study accordingly."
        )

    has_reference = reference_period in event_times
    if has_reference:
        ref_idx = np.where(event_times == reference_period)[0][0]
        event_times_no_ref = np.delete(event_times, ref_idx)
        att_no_ref = np.delete(att_estimates, ref_idx)
        influence_func_no_ref = np.delete(influence_func, ref_idx, axis=1)
    else:
        event_times_no_ref = event_times
        att_no_ref = att_estimates
        influence_func_no_ref = influence_func

    n = influence_func_no_ref.shape[0]
    vcov_matrix = influence_func_no_ref.T @ influence_func_no_ref / (n * n)

    num_pre_periods = np.sum(event_times_no_ref < reference_period)
    num_post_periods = len(event_times_no_ref) - num_pre_periods

    if num_pre_periods <= 0:
        raise ValueError("Not enough pre-treatment periods for honest_did.")
    if num_post_periods <= 0:
        raise ValueError("Not enough post-treatment periods for honest_did.")

    # Create weight vector for the requested event time
    # Note that event_time is relative to treatment, so we need to find its position in post-periods
    post_event_times = event_times_no_ref[num_pre_periods:]
    if event_time not in post_event_times:
        available_times = ", ".join(map(str, post_event_times))
        raise ValueError(
            f"Event time {event_time} not found in post-treatment periods. Available event times: {available_times}"
        )

    event_time_idx = np.where(post_event_times == event_time)[0][0] + 1
    l_vec = basis_vector(index=event_time_idx, size=num_post_periods)

    original_ci = construct_original_cs(
        betahat=att_no_ref,
        sigma=vcov_matrix,
        num_pre_periods=num_pre_periods,
        num_post_periods=num_post_periods,
        l_vec=l_vec,
        alpha=kwargs.get("alpha", 0.05),
    )

    if sensitivity_type == "relative_magnitude":
        robust_ci = create_sensitivity_results_rm(
            betahat=att_no_ref,
            sigma=vcov_matrix,
            num_pre_periods=num_pre_periods,
            num_post_periods=num_post_periods,
            l_vec=l_vec,
            grid_points=grid_points,
            **kwargs,
        )
    elif sensitivity_type == "smoothness":
        robust_ci = create_sensitivity_results_sm(
            betahat=att_no_ref,
            sigma=vcov_matrix,
            num_pre_periods=num_pre_periods,
            num_post_periods=num_post_periods,
            l_vec=l_vec,
            grid_points=grid_points,
            **kwargs,
        )
    else:
        raise ValueError(f"sensitivity_type must be 'smoothness' or 'relative_magnitude', got {sensitivity_type}")

    return HonestDiDResult(
        robust_ci=robust_ci,
        original_ci=original_ci,
        sensitivity_type=sensitivity_type,
    )