"""Aggregate Group-Time Average Treatment Effects."""
from __future__ import annotations
import numpy as np
from .compute_aggte import compute_aggte
[docs]
def aggte(
MP,
type="group",
balance_e=None,
min_e=-np.inf,
max_e=np.inf,
na_rm=False,
boot=None,
biters=None,
cband=None,
alp=None,
clustervars=None,
random_state=None,
):
r"""Aggregate group-time average treatment effects.
Takes the full set of group-time average treatment effects from ``att_gt``
and aggregates them into interpretable summary measures, following Callaway
and Sant'Anna (2021) [1]_. Different aggregation schemes answer different
policy questions about treatment effect heterogeneity.
Let :math:`\mathcal{G}` denote the set of treatment groups, :math:`\mathcal{T}`
the final time period, :math:`G` the random variable for treatment timing,
and :math:`ATT(g,t)` the group-time average treatment effect for group
:math:`g` at time :math:`t`.
The event-study or dynamic aggregation reveals how effects evolve with
exposure time :math:`e = t - g`. The event-study parameter averages effects
across groups observed :math:`e` periods after treatment
.. math::
\theta_{es}(e) = \sum_{g \in \mathcal{G}} \mathbf{1}\{g + e \le \mathcal{T}\}
P(G = g \mid G + e \le \mathcal{T}) \, ATT(g, g + e).
Group-specific aggregation averages effects over time for each treatment
cohort :math:`\tilde{g}`, revealing whether early versus late adopters
experience different effects
.. math::
\theta_{sel}(\tilde{g}) = \frac{1}{\mathcal{T} - \tilde{g} + 1}
\sum_{t=\tilde{g}}^{\mathcal{T}} ATT(\tilde{g}, t).
Calendar-time aggregation averages across treated groups within each period
:math:`\tilde{t}`, showing how effects vary with time-specific factors
.. math::
\theta_{c}(\tilde{t}) = \sum_{g \in \mathcal{G}} \mathbf{1}\{\tilde{t} \ge g\}
P(G = g \mid G \le \tilde{t}) \, ATT(g, \tilde{t}).
The simple or overall aggregation provides a single summary measure by
weighting group-specific effects by the distribution of treatment timing
.. math::
\theta_{sel}^O = \sum_{g \in \mathcal{G}} \theta_{sel}(g) \,
P(G = g \mid G \le \mathcal{T}).
Parameters
----------
MP : MPResult
An MP object (i.e., the results of the att_gt() method).
type : {'simple', 'dynamic', 'group', 'calendar'}, default='group'
Which type of aggregated treatment effect parameter to compute:
- 'simple': Computes a weighted average of all group-time average
treatment effects with weights proportional to group size.
- 'dynamic': Computes average effects across different lengths of
exposure to the treatment (similar to an event study).
- 'group': Computes average treatment effects across different groups.
- 'calendar': Computes average treatment effects across different
time periods.
balance_e : int, optional
If set (and if one computes dynamic effects), it balances the sample
with respect to event time. For example, if balance_e=2, aggte will
drop groups that are not exposed to treatment for at least three
periods (the initial period when e=0 as well as the next two periods
when e=1 and e=2). This ensures that the composition of groups does
not change when event time changes.
min_e : float, default=-inf
For event studies, this is the smallest event time to compute dynamic
effects for. By default, min_e = -Inf so that effects at all lengths
of exposure are computed.
max_e : float, default=inf
For event studies, this is the largest event time to compute dynamic
effects for. By default, max_e = Inf so that effects at all lengths
of exposure are computed.
na_rm : bool, default=False
Logical value if we are to remove missing values from analyses.
boot : bool, optional
Boolean for whether or not to compute standard errors using the
multiplier bootstrap. If standard errors are clustered, then one must
set boot=True. Default is value set in the MP object. If boot is
False, then analytical standard errors are reported.
biters : int, optional
The number of bootstrap iterations to use. The default is the value
set in the MP object, and this is only applicable if boot=True.
cband : bool, optional
Boolean for whether or not to compute a uniform confidence band that
covers all of the group-time average treatment effects with fixed
probability 1-alp. In order to compute uniform confidence bands,
boot must also be set to True. The default is the value set in
the MP object.
alp : float, optional
The significance level, default is value set in the MP object.
clustervars : list[str], optional
A vector of variables to cluster on. At most, there can be two
variables (otherwise will throw an error) and one of these must be
the same as idname which allows for clustering at the individual
level. Default is the variables set in the MP object.
random_state : int, Generator, optional
Controls the randomness of the bootstrap. Pass an int for reproducible
results across multiple function calls. Can also accept a NumPy
``Generator`` instance.
Returns
-------
AGGTEResult
An AGGTEobj object that holds the results from the aggregation.
Examples
--------
First, we compute group-time average treatment effects using the ``att_gt`` function:
.. ipython::
:okwarning:
In [1]: import numpy as np
...: from moderndid import att_gt, aggte, load_mpdta
...:
...: df = load_mpdta()
...:
...: # Compute group-time ATTs
...: att_gt_result = att_gt(
...: data=df,
...: yname="lemp",
...: tname="year",
...: gname="first.treat",
...: idname="countyreal",
...: est_method="dr",
...: boot=False
...: )
Now we can aggregate these group-time effects in different ways. The "simple" aggregation
computes an overall ATT by taking a weighted average of all group-time ATTs:
.. ipython::
:okwarning:
In [2]: # Simple aggregation - overall ATT
...: simple_agg = aggte(MP=att_gt_result, type="simple")
...: print(simple_agg)
The "group" aggregation computes average treatment effects separately for each treatment
cohort (units first treated in the same period):
.. ipython::
:okwarning:
In [3]: # Group aggregation - ATT by treatment cohort
...: group_agg = aggte(MP=att_gt_result, type="group")
...: print(group_agg)
The "dynamic" aggregation creates an event study, showing how treatment effects evolve
relative to the treatment start date:
.. ipython::
:okwarning:
In [4]: # Dynamic aggregation - event study
...: dynamic_agg = aggte(MP=att_gt_result, type="dynamic")
...: print(dynamic_agg)
We can also limit the event study to specific event times:
.. ipython::
:okwarning:
In [5]: # Dynamic effects from 2 periods before to 2 periods after treatment
...: dynamic_limited = aggte(
...: MP=att_gt_result,
...: type="dynamic",
...: min_e=-2,
...: max_e=2
...: )
...: print(dynamic_limited)
The "calendar" aggregation computes average treatment effects by calendar time period:
.. ipython::
:okwarning:
In [6]: # Calendar time aggregation - ATT by year
...: calendar_agg = aggte(MP=att_gt_result, type="calendar")
...: print(calendar_agg)
See Also
--------
att_gt : Compute group-time average treatment effects.
References
----------
.. [1] Callaway, B., & Sant'Anna, P. H. (2021). Difference-in-differences
with multiple time periods. Journal of Econometrics, 225(2), 200-230.
https://doi.org/10.1016/j.jeconom.2020.12.001
"""
valid_types = ("simple", "dynamic", "group", "calendar")
if type not in valid_types:
raise ValueError(f"type='{type}' is not valid. Must be one of: 'simple', 'dynamic', 'group', 'calendar'.")
if alp is not None and not 0 < alp < 1:
raise ValueError(f"alp={alp} is not valid. Must be between 0 and 1 (exclusive).")
if biters is not None and (not isinstance(biters, int) or biters < 1):
raise ValueError(f"biters={biters} is not valid. Must be a positive integer.")
if balance_e is not None and (not isinstance(balance_e, int) or balance_e < 0):
raise ValueError(f"balance_e={balance_e} is not valid. Must be a non-negative integer.")
if min_e > max_e:
raise ValueError(f"min_e={min_e} must be less than or equal to max_e={max_e}.")
if clustervars is not None and isinstance(clustervars, str):
raise TypeError(f"clustervars must be a list of strings, not a string. Use clustervars=['{clustervars}'].")
call_info = {
"function": f"aggte(MP, type='{type}')",
}
result = compute_aggte(
multi_period_result=MP,
aggregation_type=type,
balance_e=balance_e,
min_e=min_e,
max_e=max_e,
dropna=na_rm,
bootstrap=boot,
bootstrap_iterations=biters,
confidence_band=cband,
alpha=alp,
clustervars=clustervars,
random_state=random_state,
)
result.call_info.update(call_info)
return result