Source code for moderndid.did.mboot

"""Multiplier bootstrap for multiple time period DiD estimators."""

import warnings

import numpy as np

from moderndid.core.numba_utils import multiplier_bootstrap
from moderndid.cupy.backend import get_backend
from moderndid.cupy.bootstrap import _multiplier_bootstrap_cupy



[docs]
def mboot(
    inf_func,
    n_units,
    biters=999,
    alp=0.05,
    cluster=None,
    random_state=None,
):
    """Compute multiplier bootstrap for DiD influence functions.

    Implements the multiplier bootstrap for computing standard errors and critical
    values for uniform confidence bands. It handles both individual and clustered
    data using Mammen weights.

    Parameters
    ----------
    inf_func : ndarray
        Influence function matrix of shape (n, k) where n is the number of
        observations and k is the number of parameters.
    n_units : int
        Number of cross-sectional units.
    biters : int, default=999
        Number of bootstrap iterations.
    alp : float, default=0.05
        Significance level for confidence intervals.
    cluster : ndarray, optional
        Cluster indicators for each unit. If provided, bootstrap is performed
        at the cluster level.
    random_state : int, Generator, optional
        Controls the randomness of the bootstrap. Pass an int for reproducible
        results across multiple function calls. Can also accept a NumPy
        ``Generator`` instance.

    Returns
    -------
    dict
        Dictionary containing:

        - 'bres': Bootstrap results matrix of shape (biters, k)
        - 'V': Variance-covariance matrix
        - 'se': Standard errors for each parameter
        - 'crit_val': Critical value for uniform confidence bands

    Notes
    -----
    The function uses Mammen (1993) weights for the multiplier bootstrap.
    When clustering is specified, the bootstrap is performed at the cluster
    level to preserve within-cluster dependence.

    References
    ----------
    .. [1] Mammen, E. (1993). "Bootstrap and wild bootstrap for high dimensional
           linear models". The Annals of Statistics, 21(1), 255-285.
    """
    inf_func = inf_func.reshape(-1, 1) if inf_func.ndim == 1 else np.atleast_2d(inf_func)

    n_obs, n_params = inf_func.shape

    if n_obs != len(inf_func):
        raise ValueError("Number of observations in inf_func must match its length.")

    if cluster is not None:
        if len(cluster) != n_units:
            raise ValueError("cluster must have length equal to n_units.")
        n_clusters = len(np.unique(cluster))
    else:
        n_clusters = n_units

    if cluster is None:
        bres = np.sqrt(n_units) * _run_multiplier_bootstrap(inf_func, biters, random_state)
    else:
        # Cluster-level bootstrap
        # Aggregate influence function to cluster level
        _, cluster_inverse, cluster_counts = np.unique(cluster, return_inverse=True, return_counts=True)

        cluster_sum_inf_func = np.zeros((n_clusters, n_params))
        for i in range(n_params):
            cluster_sum_inf_func[:, i] = np.bincount(cluster_inverse, weights=inf_func[:, i])

        cluster_inf_func = cluster_sum_inf_func / cluster_counts[:, np.newaxis]
        bres = np.sqrt(n_clusters) * _run_multiplier_bootstrap(cluster_inf_func, biters, random_state)

    col_sums_sq = np.sum(bres**2, axis=0)
    ndg_dim = (~np.isnan(col_sums_sq)) & (col_sums_sq > np.sqrt(np.finfo(float).eps) * 10)

    bres_clean = bres[:, ndg_dim]

    V = np.cov(bres_clean.T)
    if V.ndim == 0:
        V = np.array([[V]])

    se_full = np.full(n_params, np.nan)
    if bres_clean.shape[1] > 0:
        q75 = np.percentile(bres_clean, 75, axis=0, method="inverted_cdf")
        q25 = np.percentile(bres_clean, 25, axis=0, method="inverted_cdf")
        se_bootstrap = (q75 - q25) / (1.3489795)
        se_full[ndg_dim] = se_bootstrap / np.sqrt(n_clusters)

    crit_val = np.nan
    if bres_clean.shape[1] > 0:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)
            bT = np.max(np.abs(bres_clean / se_bootstrap), axis=1)

        bT_finite = bT[np.isfinite(bT)]
        if len(bT_finite) > 0:
            crit_val = np.percentile(bT_finite, 100 * (1 - alp), method="inverted_cdf")

    return {
        "bres": bres,
        "V": V,
        "se": se_full,
        "crit_val": crit_val,
    }



def _run_multiplier_bootstrap(
    inf_func,
    biters,
    random_state=None,
):
    """Run the core multiplier bootstrap using Mammen weights.

    Parameters
    ----------
    inf_func : ndarray
        Influence function matrix of shape (n, k).
    biters : int
        Number of bootstrap iterations.
    random_state : int, Generator, optional
        Controls the randomness of the bootstrap. Pass an int for reproducible
        results across multiple function calls. Can also accept a NumPy
        ``Generator`` instance.

    Returns
    -------
    ndarray
        Bootstrap results of shape (biters, k).
    """
    xp = get_backend()
    if xp is not np:
        return _multiplier_bootstrap_cupy(inf_func, biters, random_state)

    return multiplier_bootstrap(inf_func, biters, random_state)