Source code for moderndid.didhonest.honest_did

"""Sensitivity analysis using the approach of Rambachan and Roth."""

from typing import NamedTuple, Protocol, runtime_checkable

import numpy as np
import polars as pl

from .sensitivity import (
    OriginalCSResult,
    construct_original_cs,
    create_sensitivity_results_rm,
    create_sensitivity_results_sm,
)
from .utils import basis_vector


[docs] class HonestDiDResult(NamedTuple): """Container for honest_did sensitivity analysis results. Attributes ---------- robust_ci : pl.DataFrame DataFrame with columns for the smoothness/relative magnitude parameter (M or Mbar) and the corresponding lower and upper confidence interval bounds that are robust to violations of parallel trends. original_ci : OriginalCSResult Original confidence set assuming exact parallel trends holds. sensitivity_type : str Type of sensitivity analysis performed ('smoothness' or 'relative_magnitude'). """ #: Robust confidence intervals under violations of parallel trends. robust_ci: pl.DataFrame #: Original confidence set assuming exact parallel trends. original_ci: OriginalCSResult #: Type of sensitivity analysis ('smoothness' or 'relative_magnitude'). sensitivity_type: str
@runtime_checkable class EventStudyProtocol(Protocol): """Protocol for event study result objects.""" aggregation_type: str influence_func: np.ndarray | None event_times: np.ndarray | None att_by_event: np.ndarray | None estimation_params: dict
[docs] def honest_did( event_study, event_time=0, sensitivity_type="smoothness", grid_points=100, **kwargs, ): r"""Compute sensitivity analysis for event study estimates. Implements the approach of [1]_ for robust inference in difference-in-differences and event study designs. This method relaxes the parallel trends assumption by allowing for bounded violations, providing confidence intervals that remain valid under specified deviations from exact parallel trends. The event-study coefficient vector :math:`\boldsymbol{\beta}` admits the causal decomposition .. math:: \boldsymbol{\beta} = \begin{pmatrix} \boldsymbol{\tau}_{pre} \\ \boldsymbol{\tau}_{post} \end{pmatrix} + \begin{pmatrix} \boldsymbol{\delta}_{pre} \\ \boldsymbol{\delta}_{post} \end{pmatrix}, where :math:`\boldsymbol{\tau}` represents treatment effects and :math:`\boldsymbol{\delta}` represents differential trends between treated and comparison groups. The conventional parallel trends assumption imposes :math:`\boldsymbol{\delta}_{post} = \mathbf{0}`, which this method relaxes by assuming :math:`\boldsymbol{\delta} \in \Delta` for a researcher-specified set :math:`\Delta`. The smoothness restriction bounds the discrete second derivative of the trend by a constant :math:`M` .. math:: \Delta^{SD}(M) = \big\{\boldsymbol{\delta}: |(\delta_{t+1} - \delta_t) - (\delta_t - \delta_{t-1})| \le M, \forall t \big\}. The relative magnitudes restriction bounds post-treatment violations relative to the maximum pre-treatment violation .. math:: \Delta^{RM}(\bar{M}) = \big\{\boldsymbol{\delta}: |\delta_{t+1} - \delta_t| \le \bar{M} \cdot \max_{s<0} |\delta_{s+1} - \delta_s|, \forall t \ge 0 \big\}. Parameters ---------- event_study : AGGTEResult or similar Event study result object containing influence functions and estimates. event_time : int, default=0 Event time to compute sensitivity analysis for. Default is 0 (on impact). sensitivity_type : {'smoothness', 'relative_magnitude'}, default='smoothness' Type of sensitivity analysis: - 'smoothness': Allows violations of linear trends in pre-treatment periods - 'relative_magnitude': Based on relative magnitudes of deviations from parallel trends grid_points : int, default=100 Number of grid points for underlying test inversion. **kwargs : Additional parameters Additional parameters passed to sensitivity analysis functions: - method : CI method ('FLCI', 'Conditional', 'C-F', 'C-LF') - m_vec : Vector of M values for smoothness bounds - m_bar_vec : Vector of Mbar values for relative magnitude bounds - monotonicity_direction : 'increasing' or 'decreasing' - bias_direction : 'positive' or 'negative' - alpha : Significance level (default 0.05) Returns ------- HonestDiDResult NamedTuple containing: - **robust_ci**: DataFrame with sensitivity analysis results - **original_ci**: Original confidence interval - **sensitivity_type**: Type of analysis performed Examples -------- The ``honest_did`` function performs sensitivity analysis on event study estimates to assess the robustness of results to violations of parallel trends. We demonstrate this below with a staggered treatment adoption design. First, we need to compute an event study. The function requires an event study object that contains the dynamic influence functions, which we obtain by first computing group-time effects and then aggregating them. .. ipython:: :okwarning: In [1]: import numpy as np ...: from moderndid import att_gt, aggte, load_mpdta ...: from moderndid.didhonest import honest_did ... ...: df = load_mpdta() ...: gt_result = att_gt( ...: data=df, ...: yname="lemp", ...: tname="year", ...: gname="first.treat", ...: idname="countyreal", ...: est_method="dr", ...: boot=False ...: ) ...: es_result = aggte(gt_result, type="dynamic") ...: print(es_result) Now we can apply the honest DiD sensitivity analysis. We can examine how robust our estimated treatment effects are when we allow for potential violations of the parallel trends assumption. We'll analyze the on-impact effect (event_time=0) using smoothness restrictions, which bound how much pre-treatment trends can deviate. .. ipython:: :okwarning: In [2]: hd_result = honest_did( ...: event_study=es_result, ...: event_time=0, ...: sensitivity_type="smoothness", ...: m_vec=[0.01, 0.02, 0.03] ...: ) ...: ...: hd_result The output shows the original confidence interval (which assumes parallel trends hold exactly) and robust confidence intervals under different smoothness bounds. The ``m_vec`` parameter specifies different bounds on how much the linear trend can change between consecutive periods. Larger values of :math:`M` allow for bigger violations of parallel trends. If the robust CIs remain informative even under reasonable violations, this provides evidence that the results are credible. References ---------- .. [1] Rambachan, A., & Roth, J. (2021). A more credible approach to parallel trends. Review of Economic Studies. """ if hasattr(event_study, "aggregation_type") and isinstance(event_study, EventStudyProtocol): return _honest_did_aggte( event_study=event_study, event_time=event_time, sensitivity_type=sensitivity_type, grid_points=grid_points, **kwargs, ) raise TypeError( f"honest_did not implemented for object of type {type(event_study).__name__}. " "Expected AGGTEResult or similar event study object." )
def _honest_did_aggte( event_study, event_time=0, sensitivity_type="smoothness", grid_points=100, **kwargs, ): """Implement sensitivity analysis for event study objects. Parameters ---------- event_study : AGGTEResult Event study result from aggte function. event_time : int, default=0 Event time to compute sensitivity analysis for. Default is 0 (on impact). sensitivity_type : str Type of sensitivity analysis: - 'smoothness': Allows violations of linear trends in pre-treatment periods - 'relative_magnitude': Based on relative magnitudes of deviations from parallel trends grid_points : int, default=100 Number of grid points for underlying test inversion. **kwargs : Additional parameters Additional parameters passed to sensitivity analysis functions: - method : CI method ('FLCI', 'Conditional', 'C-F', 'C-LF') - m_vec : Vector of M values for smoothness bounds - m_bar_vec : Vector of Mbar values for relative magnitude bounds - monotonicity_direction : 'increasing' or 'decreasing' - bias_direction : 'positive' or 'negative' - alpha : Significance level (default 0.05) Returns ------- HonestDiDResult NamedTuple with: - robust_ci : DataFrame with sensitivity analysis results - original_ci : Original confidence interval - sensitivity_type : Type of analysis performed """ if event_study.aggregation_type != "dynamic": raise ValueError("honest_did requires an event study (dynamic aggregation).") if event_study.influence_func is None: raise ValueError("Event study must have influence functions computed.") influence_func = event_study.influence_func if influence_func.ndim != 2: raise ValueError( f"Expected 2D influence function matrix for dynamic aggregation, got shape {influence_func.shape}. " ) event_times = event_study.event_times att_estimates = event_study.att_by_event reference_period = -1 pre_periods = event_times[event_times < reference_period] post_periods = event_times[event_times > reference_period] if len(pre_periods) > 1 and not np.all(np.diff(pre_periods) == 1): raise ValueError( "honest_did expects consecutive pre-treatment periods. Please re-code your event study accordingly." ) if len(post_periods) > 1 and not np.all(np.diff(post_periods) == 1): raise ValueError( "honest_did expects consecutive post-treatment periods. Please re-code your event study accordingly." ) has_reference = reference_period in event_times if has_reference: ref_idx = np.where(event_times == reference_period)[0][0] event_times_no_ref = np.delete(event_times, ref_idx) att_no_ref = np.delete(att_estimates, ref_idx) influence_func_no_ref = np.delete(influence_func, ref_idx, axis=1) else: event_times_no_ref = event_times att_no_ref = att_estimates influence_func_no_ref = influence_func n = influence_func_no_ref.shape[0] vcov_matrix = influence_func_no_ref.T @ influence_func_no_ref / (n * n) num_pre_periods = np.sum(event_times_no_ref < reference_period) num_post_periods = len(event_times_no_ref) - num_pre_periods if num_pre_periods <= 0: raise ValueError("Not enough pre-treatment periods for honest_did.") if num_post_periods <= 0: raise ValueError("Not enough post-treatment periods for honest_did.") # Create weight vector for the requested event time # Note that event_time is relative to treatment, so we need to find its position in post-periods post_event_times = event_times_no_ref[num_pre_periods:] if event_time not in post_event_times: available_times = ", ".join(map(str, post_event_times)) raise ValueError( f"Event time {event_time} not found in post-treatment periods. Available event times: {available_times}" ) event_time_idx = np.where(post_event_times == event_time)[0][0] + 1 l_vec = basis_vector(index=event_time_idx, size=num_post_periods) original_ci = construct_original_cs( betahat=att_no_ref, sigma=vcov_matrix, num_pre_periods=num_pre_periods, num_post_periods=num_post_periods, l_vec=l_vec, alpha=kwargs.get("alpha", 0.05), ) if sensitivity_type == "relative_magnitude": robust_ci = create_sensitivity_results_rm( betahat=att_no_ref, sigma=vcov_matrix, num_pre_periods=num_pre_periods, num_post_periods=num_post_periods, l_vec=l_vec, grid_points=grid_points, **kwargs, ) elif sensitivity_type == "smoothness": robust_ci = create_sensitivity_results_sm( betahat=att_no_ref, sigma=vcov_matrix, num_pre_periods=num_pre_periods, num_post_periods=num_post_periods, l_vec=l_vec, grid_points=grid_points, **kwargs, ) else: raise ValueError(f"sensitivity_type must be 'smoothness' or 'relative_magnitude', got {sensitivity_type}") return HonestDiDResult( robust_ci=robust_ci, original_ci=original_ci, sensitivity_type=sensitivity_type, )