"""Andrews-Roth-Pakes (ARP) confidence intervals with nuisance parameters."""
import warnings
from functools import partial
from typing import NamedTuple
import numpy as np
import scipy.optimize as opt
from scipy import stats
from .conditional import _norminvp_generalized
from .numba import compute_hybrid_dbar, prepare_theta_grid_y_values
from .utils import basis_vector
[docs]
class ARPNuisanceCIResult(NamedTuple):
"""Container for ARP confidence interval results with nuisance parameters.
Attributes
----------
ci_lb : float
Lower bound of confidence interval.
ci_ub : float
Upper bound of confidence interval.
accept_grid : np.ndarray
Grid of values tested (1st column) and acceptance indicators (2nd column).
length : float
Length of the confidence interval.
"""
#: Lower bound of confidence interval.
ci_lb: float
#: Upper bound of confidence interval.
ci_ub: float
#: Grid of values tested and acceptance indicators.
accept_grid: np.ndarray
#: Length of the confidence interval.
length: float
[docs]
def compute_arp_nuisance_ci(
betahat,
sigma,
l_vec,
a_matrix,
d_vec,
num_pre_periods,
num_post_periods,
alpha=0.05,
hybrid_flag="ARP",
hybrid_list=None,
grid_lb=None,
grid_ub=None,
grid_points=1000,
rows_for_arp=None,
):
r"""Compute Andrews-Roth-Pakes (ARP) confidence interval with nuisance parameters.
Computes confidence interval for :math:`\theta = l'\tau_{post}` subject to the constraint
that :math:`\delta \in \Delta`, where :math:`\Delta = \{\delta : A\delta \leq d\}`.
This implements the conditional inference approach from Andrews, Roth & Pakes (2023)
that provides uniformly valid inference over the identified set.
The method tests the composite null hypothesis from equation (12) in [2]_
.. math::
H_0: \exists \tau_{post} \in \mathbb{R}^{\bar{T}} \text{ s.t. } l'\tau_{post} = \bar{\theta}
\text{ and } \mathbb{E}_{\hat{\beta}_n \sim
\mathcal{N}(\delta+\tau, \Sigma_n)}[Y_n - AL_{post}\tau_{post}] \leq 0,
where :math:`Y_n = A\hat{\beta}_n - d` and :math:`L_{post} = [0, I]'`. After a change of
basis using matrix :math:`\Gamma` with :math:`l'` as its first row, this becomes
equation (13) in [2]_
.. math::
H_0: \exists \tilde{\tau} \in \mathbb{R}^{\bar{T}-1} \text{ s.t. }
\mathbb{E}[\tilde{Y}_n(\bar{\theta}) - \tilde{X}\tilde{\tau}] \leq 0,
where :math:`\tilde{Y}(\bar{\theta}) = Y_n - \tilde{A}_{(\cdot,1)}\bar{\theta}` and
:math:`\tilde{X} = \tilde{A}_{(\cdot,-1)}`.
Parameters
----------
betahat : ndarray
Vector of estimated event study coefficients :math:`\hat{\beta}`.
sigma : ndarray
Covariance matrix :math:`\Sigma` of betahat.
l_vec : ndarray
Vector :math:`l` defining parameter of interest :math:`\theta = l'\tau_{post}`.
a_matrix : ndarray
Constraint matrix :math:`A` defining the set :math:`\Delta`.
d_vec : ndarray
Constraint bounds :math:`d` such that :math:`\Delta = \{\delta : A\delta \leq d\}`.
num_pre_periods : int
Number of pre-treatment periods :math:`T_{pre}`.
num_post_periods : int
Number of post-treatment periods :math:`T_{post}`.
alpha : float, default=0.05
Significance level :math:`\alpha` for confidence interval.
hybrid_flag : {'ARP', 'LF', 'FLCI'}, default='ARP'
Type of test to use. 'ARP' is the standard conditional test, 'LF' uses
a least favorable critical value for the first stage, and 'FLCI' uses
fixed-length confidence intervals for improved power.
hybrid_list : dict, optional
Parameters for hybrid tests, including hybrid_kappa (first-stage size),
lf_cv (least favorable critical value), or FLCI parameters.
grid_lb : float, optional
Lower bound for grid search. If None, uses :math:`-20 \cdot SE(\theta)`.
grid_ub : float, optional
Upper bound for grid search. If None, uses :math:`20 \cdot SE(\theta)`.
grid_points : int, default=1000
Number of grid points to test.
rows_for_arp : ndarray, optional
Subset of moments to use for ARP test. Useful when some moments are
uninformative about post-treatment effects.
Returns
-------
ARPNuisanceCIResult
NamedTuple containing CI bounds, acceptance grid, and length.
Notes
-----
The method handles nuisance parameters by reparametrizing the problem using
an invertible transformation :math:`\Gamma` with :math:`l` as its first row.
This allows expressing the constraints in terms of :math:`(\theta, \xi)` where
:math:`\xi` are nuisance parameters. The test then profiles over :math:`\xi` for each
value of :math:`\theta`, using either primal or dual optimization depending on the conditioning
set's geometry.
The test controls size uniformly without requiring the linear independence constraint
qualification (LICQ). However, when LICQ holds (i.e., gradients of binding constraints
are linearly independent), Proposition 3.3 shows the conditional test achieves optimal
local asymptotic power converging to the power envelope for tests controlling size in
the finite-sample normal model.
References
----------
.. [1] Andrews, I., Roth, J., & Pakes, A. (2023). Inference for Linear
Conditional Moment Inequalities. Review of Economic Studies.
.. [2] Rambachan, A., & Roth, J. (2023). A more credible approach to
parallel trends. Review of Economic Studies, 90(5), 2555-2591.
"""
if hybrid_list is None:
hybrid_list = {}
if grid_lb is None or grid_ub is None:
sd_theta = np.sqrt(l_vec.flatten() @ sigma[num_pre_periods:, num_pre_periods:] @ l_vec.flatten())
if grid_lb is None:
grid_lb = -20.0 * sd_theta
if grid_ub is None:
grid_ub = 20.0 * sd_theta
theta_grid = np.linspace(grid_lb, grid_ub, grid_points)
# Construct invertible transformation matrix Gamma with l_vec as first row
# This allows us to reparametrize the problem in terms of theta = l'*beta
gamma = _construct_gamma(l_vec)
# Transform constraint matrix A using Gamma^(-1) to work in theta-space
# Extract columns corresponding to post-treatment periods and transform
a_gamma_inv = a_matrix[:, num_pre_periods : num_pre_periods + num_post_periods] @ np.linalg.inv(gamma)
# First column corresponds to theta, remaining columns to nuisance parameters
a_gamma_inv_one = a_gamma_inv[:, 0]
a_gamma_inv_minus_one = a_gamma_inv[:, 1:]
y = a_matrix @ betahat - d_vec
sigma_y = a_matrix @ sigma @ a_matrix.T
# Least favorable CV if needed
if hybrid_flag == "LF":
hybrid_list["lf_cv"] = compute_least_favorable_cv(
a_gamma_inv_minus_one,
sigma_y,
hybrid_list["hybrid_kappa"],
rows_for_arp=rows_for_arp,
)
y_t_matrix = prepare_theta_grid_y_values(y, a_gamma_inv_one, theta_grid)
test_fn = partial(
_test_theta_accepted,
y_t_matrix=y_t_matrix,
hybrid_flag=hybrid_flag,
hybrid_list=hybrid_list,
d_vec=d_vec,
a_gamma_inv_one=a_gamma_inv_one,
theta_grid=theta_grid,
a_gamma_inv_minus_one=a_gamma_inv_minus_one,
sigma_y=sigma_y,
alpha=alpha,
rows_for_arp=rows_for_arp,
)
lb_idx, ub_idx = _binary_search_ci_bounds(test_fn, grid_points)
accept_grid = np.zeros(grid_points)
if lb_idx is not None:
accept_grid[lb_idx : ub_idx + 1] = 1.0
results_grid = np.column_stack([theta_grid, accept_grid])
if lb_idx is not None:
ci_lb = theta_grid[lb_idx]
ci_ub = theta_grid[ub_idx]
else:
ci_lb = np.nan
ci_ub = np.nan
grid_spacing = np.diff(theta_grid)
grid_lengths = 0.5 * np.concatenate([[grid_spacing[0]], grid_spacing[:-1] + grid_spacing[1:], [grid_spacing[-1]]])
length = np.sum(accept_grid * grid_lengths) if lb_idx is not None else np.nan
if accept_grid[0] == 1 or accept_grid[-1] == 1:
warnings.warn("CI is open at one of the endpoints; CI bounds may not be accurate.", UserWarning)
return ARPNuisanceCIResult(
ci_lb=ci_lb,
ci_ub=ci_ub,
accept_grid=results_grid,
length=length,
)
[docs]
def lp_conditional_test(
y_t,
x_t=None,
sigma=None,
alpha=0.05,
hybrid_flag="ARP",
hybrid_list=None,
rows_for_arp=None,
):
r"""Perform Andrews-Roth-Pakes (ARP) test of moment inequality with nuisance parameters.
Tests the null hypothesis :math:`H_0: \exists \tilde{\tau} \in \mathbb{R}^{\bar{T}-1} \text{ s.t. }
\mathbb{E}[\tilde{Y}_n(\bar{\theta}) - \tilde{X}\tilde{\tau}] \leq 0`, where
:math:`\tilde{Y}_n(\bar{\theta}) = Y_n - \tilde{A}_{(\cdot,1)}\bar{\theta}` has been adjusted
for the hypothesized value of :math:`\theta`. This is the core testing problem in the ARP framework.
The test statistic from equation (14) in [2]_ is
.. math::
\hat{\eta} = \min_{\eta, \tilde{\tau}} \eta \text{ s.t. }
\tilde{Y}_n(\bar{\theta}) - \tilde{X}\tilde{\tau} \leq \tilde{\sigma}_n \cdot \eta,
where :math:`\tilde{\sigma}_n = \sqrt{\text{diag}(\tilde{\Sigma}_n)}` and
:math:`\tilde{\Sigma}_n = A\Sigma_n A'`. The test conditions on the binding moments at
the optimum, leading to a truncated normal critical value.
The conditional distribution of :math:`\hat{\eta}` given :math:`\gamma_* \in \hat{V}_n` and
:math:`S_n = s` is
.. math::
\hat{\eta} | \{\gamma_* \in \hat{V}_n, S_n = s\} \sim \xi | \xi \in [v^{lo}, v^{up}],
where
.. math::
\xi \sim \mathcal{N}(\gamma_*'\tilde{\mu}(\bar{\theta}), \gamma_*'\tilde{\Sigma}_n\gamma_*),
.. math::
S_n = \left(I - \frac{\tilde{\Sigma}_n\gamma_*\gamma_*'}
{\gamma_*'\tilde{\Sigma}_n\gamma_*}\right)\tilde{Y}_n(\bar{\theta}),
and :math:`[v^{lo}, v^{up}]` are truncation bounds. The conditional test uses critical value
:math:`\max\{0, c_{C,\alpha}\}` where :math:`c_{C,\alpha}` is the :math:`(1-\alpha)` quantile of the
truncated normal under :math:`\gamma_*'\tilde{\mu}(\bar{\theta}) = 0`.
When the optimization problem is degenerate or the binding moments don't have
full rank, the method switches to a dual approach that works directly with
the Lagrange multipliers. This ensures numerical stability and correct inference
even in challenging cases.
Parameters
----------
y_t : ndarray
Outcome vector :math:`Y_T = A\hat{\beta} - d` (already adjusted by :math:`\theta`).
x_t : ndarray or None
Covariate matrix :math:`X_T` for nuisance parameters. If None, no nuisance
parameters are present.
sigma : ndarray
Covariance matrix :math:`\Sigma_Y = A\Sigma A'` of y_t.
alpha : float
Significance level :math:`\alpha` for the test.
hybrid_flag : {'ARP', 'LF', 'FLCI'}
Type of test to perform. 'ARP' is standard conditional test, 'LF' adds
least favorable first stage, 'FLCI' adds fixed-length CI constraints.
hybrid_list : dict, optional
Additional parameters for hybrid tests including hybrid_kappa, lf_cv,
flci_halflength, vbar, and dbar.
rows_for_arp : ndarray, optional
Subset of rows to use for ARP test, allowing focus on informative moments.
Returns
-------
dict
Dictionary containing:
- reject: bool, whether test rejects the null
- eta: float, test statistic value :math:`\eta^*`
- delta: ndarray, optimal nuisance parameters :math:`\xi^*`
- lambda: ndarray, Lagrange multipliers :math:`\lambda^*` at optimum
Notes
-----
The test constructs the least favorable distribution by finding the value of
:math:`\tilde{\tau}` that minimizes the test statistic. Under the null hypothesis,
:math:`\gamma_*'\tilde{\mu}(\bar{\theta}) \leq 0` since :math:`\gamma_* \geq 0`,
:math:`\gamma_*'\tilde{X} = 0`, and there exists :math:`\tilde{\tau}` such that
:math:`\tilde{\mu}(\bar{\theta}) - \tilde{X}\tilde{\tau} \leq 0`.
For the hybrid test, if the first-stage LF test with size :math:`\kappa` rejects
(i.e., :math:`\hat{\eta} > c_{LF,\kappa}`), the test rejects. Otherwise, it applies
a modified conditional test with size :math:`(\alpha-\kappa)/(1-\kappa)` that
conditions on :math:`\hat{\eta} \leq c_{LF,\kappa}`, using :math:`v_H^{up} = \min\{v^{up}, c_{LF,\kappa}\}`.
Under LICQ, the LF-hybrid test's local asymptotic power is at least as good as the
power of the optimal size-:math:`(\alpha-\kappa)/(1-\kappa)` test (Corollary 3.1).
References
----------
.. [1] Andrews, I., Roth, J., & Pakes, A. (2023). Inference for Linear
Conditional Moment Inequalities. Review of Economic Studies.
.. [2] Rambachan, A., & Roth, J. (2023). A more credible approach to
parallel trends. Review of Economic Studies, 90(5), 2555-2591.
"""
if hybrid_list is None:
hybrid_list = {}
if rows_for_arp is None:
rows_for_arp = np.arange(len(y_t))
y_t_arp = y_t[rows_for_arp]
sigma_arp = sigma[np.ix_(rows_for_arp, rows_for_arp)]
x_t_arp = x_t[rows_for_arp] if x_t is not None else None
# No nuisance parameter case
if x_t_arp is None:
sd_vec = np.sqrt(np.diag(sigma_arp))
eta_star = np.max(y_t_arp / sd_vec)
# Hybrid tests
if hybrid_flag == "LF":
mod_size = (alpha - hybrid_list["hybrid_kappa"]) / (1 - hybrid_list["hybrid_kappa"])
if eta_star > hybrid_list.get("lf_cv", np.inf):
return {"reject": True, "eta": eta_star, "delta": np.array([]), "lambda": np.array([])}
elif hybrid_flag == "FLCI":
mod_size = (alpha - hybrid_list["hybrid_kappa"]) / (1 - hybrid_list["hybrid_kappa"])
vbar_mat = np.vstack([hybrid_list["vbar"].T, -hybrid_list["vbar"].T])
if np.max(vbar_mat @ y_t - hybrid_list["dbar"]) > 0:
return {"reject": True, "eta": eta_star, "delta": np.array([]), "lambda": np.array([])}
else:
mod_size = alpha
# Simple case: compare to standard normal
cval = stats.norm.ppf(1 - mod_size)
reject = eta_star > cval
return {
"reject": bool(reject),
"eta": eta_star,
"delta": np.array([]),
"lambda": np.array([]),
}
# Compute eta and argmin delta
lin_soln = _test_delta_lp(y_t_arp, x_t_arp, sigma_arp)
if not lin_soln["success"]:
return {
"reject": False,
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
# First-stage hybrid tests
if hybrid_flag == "LF":
mod_size = (alpha - hybrid_list["hybrid_kappa"]) / (1 - hybrid_list["hybrid_kappa"])
if lin_soln["eta_star"] > hybrid_list.get("lf_cv", np.inf):
return {
"reject": True,
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
elif hybrid_flag == "FLCI":
mod_size = (alpha - hybrid_list["hybrid_kappa"]) / (1 - hybrid_list["hybrid_kappa"])
vbar_mat = np.vstack([hybrid_list["vbar"].T, -hybrid_list["vbar"].T])
if np.max(vbar_mat @ y_t - hybrid_list["dbar"]) > 0:
return {
"reject": True,
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
elif hybrid_flag == "ARP":
mod_size = alpha
else:
raise ValueError(f"Invalid hybrid_flag: {hybrid_flag}")
tol_lambda = 1e-6
k = x_t_arp.shape[1] if x_t_arp.ndim > 1 else 1
degenerate_flag = np.sum(lin_soln["lambda"] > tol_lambda) != (k + 1)
b_index = lin_soln["lambda"] > tol_lambda
bc_index = ~b_index
if x_t_arp.ndim == 1:
x_t_arp = x_t_arp.reshape(-1, 1)
x_tb = x_t_arp[b_index]
if x_tb.size == 0 or (x_tb.ndim == 1 and len(x_tb) < k):
full_rank_flag = False
else:
if x_tb.ndim == 1:
x_tb = x_tb.reshape(-1, 1)
full_rank_flag = np.linalg.matrix_rank(x_tb) == min(x_tb.shape)
# Use dual approach if degenerate or not full rank
# The dual approach handles cases where the primal problem is ill-conditioned
# by working with the Lagrangian dual formulation
if not full_rank_flag or degenerate_flag:
# Work with Lagrange multipliers directly
lp_dual_soln = _lp_dual_wrapper(y_t_arp, x_t_arp, lin_soln["eta_star"], lin_soln["lambda"], sigma_arp)
sigma_b_dual2 = float(lp_dual_soln["gamma_tilde"].T @ sigma_arp @ lp_dual_soln["gamma_tilde"])
if abs(sigma_b_dual2) < np.finfo(float).eps:
reject = lin_soln["eta_star"] > 0
return {
"reject": bool(reject),
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
if sigma_b_dual2 < 0:
raise ValueError("Negative variance in dual approach")
sigma_b_dual = np.sqrt(sigma_b_dual2)
maxstat = lp_dual_soln["eta"] / sigma_b_dual
if hybrid_flag == "LF":
zlo_dual = lp_dual_soln["vlo"] / sigma_b_dual
zup_dual = min(lp_dual_soln["vup"], hybrid_list.get("lf_cv", np.inf)) / sigma_b_dual
elif hybrid_flag == "FLCI":
gamma_full = np.zeros(len(y_t))
gamma_full[rows_for_arp] = lp_dual_soln["gamma_tilde"]
sigma_gamma = (sigma @ gamma_full) / float(gamma_full.T @ sigma @ gamma_full)
s_vec = y_t - sigma_gamma * float(gamma_full.T @ y_t)
v_flci = _compute_flci_vlo_vup(hybrid_list["vbar"], hybrid_list["dbar"], s_vec, sigma_gamma)
zlo_dual = max(lp_dual_soln["vlo"], v_flci["vlo"]) / sigma_b_dual
zup_dual = min(lp_dual_soln["vup"], v_flci["vup"]) / sigma_b_dual
else:
zlo_dual = lp_dual_soln["vlo"] / sigma_b_dual
zup_dual = lp_dual_soln["vup"] / sigma_b_dual
if not zlo_dual <= maxstat <= zup_dual:
return {
"reject": False,
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
cval = max(0.0, _norminvp_generalized(1 - mod_size, zlo_dual, zup_dual))
reject = maxstat > cval
else:
# Construct test statistic using binding constraints
# This approach leverages the KKT conditions at the optimum
size_b = np.sum(b_index)
sd_vec = np.sqrt(np.diag(sigma_arp))
sd_vec_b = sd_vec[b_index]
sd_vec_bc = sd_vec[bc_index]
x_tbc = x_t_arp[bc_index]
# Selection matrices for binding and non-binding constraints
s_b = np.eye(len(y_t_arp))[b_index]
s_bc = np.eye(len(y_t_arp))[bc_index]
# Construct matrix that relates binding and non-binding constraints
# W matrices combine standard deviations and covariates
w_b = np.column_stack([sd_vec_b.reshape(-1, 1), x_tb])
w_bc = np.column_stack([sd_vec_bc.reshape(-1, 1), x_tbc])
# Project non-binding constraints onto the space of binding constraints
gamma_b = w_bc @ np.linalg.inv(w_b) @ s_b - s_bc
# Efficient score direction for eta
# e1 is first basis vector, which selects eta from (eta, delta)
e1 = basis_vector(1, size_b)
v_b_short = np.linalg.inv(w_b).T @ e1
# Project back to full space of all constraints
v_b = s_b.T @ v_b_short
# Variance of the test statistic
sigma2_b = float((v_b.T @ sigma_arp @ v_b).item())
sigma_b = np.sqrt(sigma2_b)
# Correlation vector between non-binding and binding constraints
rho = gamma_b @ sigma_arp @ v_b / sigma2_b
# Bounds for the test stat under the null
# These bounds arise from the constraint that non-binding constraints remain non-binding
numerator = -gamma_b @ y_t_arp
denominator = rho.flatten()
v_b_y = float((v_b.T @ y_t_arp).item())
# Each constraint gives either an upper or lower bound depending on sign of rho
maximand_or_minimand = numerator / denominator + v_b_y
vlo = np.max(maximand_or_minimand[denominator > 0]) if np.any(denominator > 0) else -np.inf
vup = np.min(maximand_or_minimand[denominator < 0]) if np.any(denominator < 0) else np.inf
if hybrid_flag == "LF":
zlo = vlo / sigma_b
zup = min(vup, hybrid_list.get("lf_cv", np.inf)) / sigma_b
elif hybrid_flag == "FLCI":
gamma_full = np.zeros(len(y_t))
gamma_full[rows_for_arp] = v_b.flatten()
sigma_gamma = (sigma @ gamma_full) / float(gamma_full.T @ sigma @ gamma_full)
s_vec = y_t - sigma_gamma * float(gamma_full.T @ y_t)
v_flci = _compute_flci_vlo_vup(hybrid_list["vbar"], hybrid_list["dbar"], s_vec, sigma_gamma)
zlo = max(vlo, v_flci["vlo"]) / sigma_b
zup = min(vup, v_flci["vup"]) / sigma_b
else:
zlo = vlo / sigma_b
zup = vup / sigma_b
maxstat = lin_soln["eta_star"] / sigma_b
if not zlo <= maxstat <= zup:
return {
"reject": False,
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
cval = max(0.0, _norminvp_generalized(1 - mod_size, zlo, zup))
reject = maxstat > cval
return {
"reject": bool(reject),
"eta": lin_soln["eta_star"],
"delta": lin_soln["delta_star"],
"lambda": lin_soln["lambda"],
}
[docs]
def compute_vlo_vup_dual(
eta,
s_t,
gamma_tilde,
sigma,
w_t,
):
r"""Compute the truncation bounds for the test statistic using dual approach with bisection.
Computes the truncation bounds for the test statistic :math:`\tilde{\gamma}'Y`
under the conditional distribution using the dual formulation. The bounds
:math:`[v_{lo}, v_{up}]` are determined by the requirement that :math:`\tilde{\gamma}`
remains the optimal dual solution.
The method uses a hybrid approach that first attempts a shortcut based on
the first-order optimality conditions, then falls back to bisection if needed.
The shortcut exploits the fact that at the boundary :math:`v = v_{lo}` or
:math:`v = v_{up}`, a new constraint becomes binding, allowing direct computation
in many cases.
Parameters
----------
eta : float
Optimal value :math:`\eta^*` from the linear program.
s_t : ndarray
Residual vector :math:`s = (I - b\gamma')Y` where :math:`b = \Sigma\gamma/(\gamma'\Sigma\gamma)`.
gamma_tilde : ndarray
Normalized dual solution :math:`\tilde{\gamma}` with :math:`\sum_i \tilde{\gamma}_i = 1`.
sigma : ndarray
Covariance matrix :math:`\Sigma_Y`.
w_t : ndarray
Constraint matrix :math:`W = [\text{diag}(\sigma)^{1/2}, X_T]`.
Returns
-------
dict
Dictionary with:
- vlo: float, lower bound of conditional support
- vup: float, upper bound of conditional support
Notes
-----
The bounds are found by solving :math:`\max_{\lambda \geq 0} \lambda's` subject to
:math:`W'\lambda = e_1` and :math:`\lambda'\mathbf{1} = 1`, where :math:`e_1` is the
first standard basis vector. The value :math:`v` where this maximum equals :math:`v`
itself determines the boundary of the support.
References
----------
.. [1] Andrews, I., Roth, J., & Pakes, A. (2023). Inference for Linear
Conditional Moment Inequalities. Review of Economic Studies.
.. [2] Rambachan, A., & Roth, J. (2023). A more credible approach to
parallel trends. Review of Economic Studies, 90(5), 2555-2591.
"""
tol_c = 1e-6
tol_equality = 1e-6
sigma_b = np.sqrt(float(gamma_tilde.T @ sigma @ gamma_tilde))
low_initial = min(-100.0, eta - 20 * sigma_b)
high_initial = max(100.0, eta + 20 * sigma_b)
max_iters = 10000
switch_iters = 10
_, is_solution = _check_if_solution(eta, tol_equality, s_t, gamma_tilde, sigma, w_t)
if not is_solution:
return {"vlo": eta, "vup": np.inf}
# Upper bound for the test stat support
result, is_solution = _check_if_solution(high_initial, tol_equality, s_t, gamma_tilde, sigma, w_t)
if is_solution:
vup = np.inf
else:
# Try shortcut method first: use LP solution to get better initial guess
# This exploits the structure of the problem to converge faster than bisection
iters = 1
sigma_gamma = float(gamma_tilde.T @ sigma @ gamma_tilde)
b = (sigma @ gamma_tilde) / sigma_gamma
# Use first-order approximation from LP solution if successful
mid = _round_eps(float(result.x @ s_t)) / (1 - float(result.x @ b)) if result.success else high_initial
# Iterate shortcut method for a few steps
while iters < switch_iters:
result, is_solution = _check_if_solution(mid, tol_equality, s_t, gamma_tilde, sigma, w_t)
if is_solution:
break
iters += 1
if result.success:
mid = _round_eps(float(result.x @ s_t)) / (1 - float(result.x @ b))
else:
break
# Bisection method: guaranteed to converge but slower
# Use when shortcut method hasn't found the boundary
low, high = eta, mid
diff = tol_c + 1
while diff > tol_c and iters < max_iters:
iters += 1
mid = (high + low) / 2
_, is_solution = _check_if_solution(mid, tol_equality, s_t, gamma_tilde, sigma, w_t)
if is_solution:
low = mid
else:
high = mid
diff = high - low
vup = mid
# Compute vlo using bisection method
result, is_solution = _check_if_solution(low_initial, tol_equality, s_t, gamma_tilde, sigma, w_t)
if is_solution:
vlo = -np.inf
else:
# Try shortcut method first
iters = 1
sigma_gamma = float(gamma_tilde.T @ sigma @ gamma_tilde)
b = (sigma @ gamma_tilde) / sigma_gamma
mid = _round_eps(float(result.x @ s_t)) / (1 - float(result.x @ b)) if result.success else low_initial
while iters < switch_iters:
result, is_solution = _check_if_solution(mid, tol_equality, s_t, gamma_tilde, sigma, w_t)
if is_solution:
break
iters += 1
if result.success:
mid = _round_eps(float(result.x @ s_t)) / (1 - float(result.x @ b))
else:
break
# Bisection method now that shortcut method failed
low, high = mid, eta
diff = tol_c + 1
while diff > tol_c and iters < max_iters:
iters += 1
mid = (low + high) / 2
_, is_solution = _check_if_solution(mid, tol_equality, s_t, gamma_tilde, sigma, w_t)
if is_solution:
high = mid
else:
low = mid
diff = high - low
vlo = mid
return {"vlo": vlo, "vup": vup}
[docs]
def compute_least_favorable_cv(
x_t=None,
sigma=None,
hybrid_kappa=0.05,
sims=1000,
rows_for_arp=None,
seed=0,
):
r"""Compute least favorable critical value.
Computes the critical value :math:`c_{LF,\kappa}` for the least favorable (LF) hybrid test,
which uses a data-dependent first stage that rejects for large values of the test
statistic :math:`\hat{\eta}`. The LF critical value is the :math:`(1-\kappa)` quantile of
:math:`\max_{\gamma \in V(\Sigma)} \gamma'\xi` where :math:`\xi \sim \mathcal{N}(0, \tilde{\Sigma}_n)`.
The least favorable distribution assumes :math:`\tilde{\mu}(\bar{\theta}) = 0`, which
maximizes the rejection probability under the null. Since the distribution of :math:`\hat{\eta}`
under the null is bounded above (in first-order stochastic dominance) by its distribution
when :math:`\tilde{\mu}(\bar{\theta}) = 0`, this ensures size control.
For the hybrid test, if :math:`\hat{\eta} > c_{LF,\kappa}`, the first stage rejects.
Otherwise, the second stage applies a modified conditional test with size
:math:`(\alpha - \kappa)/(1 - \kappa)` that conditions on :math:`\hat{\eta} \leq c_{LF,\kappa}`.
Parameters
----------
x_t : ndarray or None
Covariate matrix :math:`X_T` for nuisance parameters. If None, the test
has no nuisance parameters.
sigma : ndarray
Covariance matrix :math:`\Sigma_Y` of the moments.
hybrid_kappa : float
First-stage size :math:`\kappa`, typically :math:`\alpha/10`.
sims : int
Number of Monte Carlo simulations for critical value computation.
rows_for_arp : ndarray, optional
Subset of rows to use, focusing on informative moments.
seed : int or None
Random seed for reproducibility.
Returns
-------
float
Least favorable critical value :math:`c_{LF}` such that
:math:`\mathbb{P}(\eta^* > c_{LF}) = \kappa` under the least favorable distribution.
Notes
-----
Without nuisance parameters, the least favorable distribution is standard
multivariate normal and :math:`\eta^* = \max_i Z_i/\sigma_i` where :math:`Z_i`
are the standardized moments. With nuisance parameters, each simulation
requires solving the linear program to account for optimization over :math:`\xi`.
References
----------
.. [1] Andrews, I., Roth, J., & Pakes, A. (2023). Inference for Linear
Conditional Moment Inequalities. Review of Economic Studies.
.. [2] Rambachan, A., & Roth, J. (2023). A more credible approach to
parallel trends. Review of Economic Studies, 90(5), 2555-2591.
"""
rng = np.random.default_rng(seed)
if rows_for_arp is not None:
if x_t is not None:
x_t = x_t[rows_for_arp]
sigma = sigma[np.ix_(rows_for_arp, rows_for_arp)]
if x_t is None:
# No nuisance parameter case: simulate max of standardized normal vector
xi_draws = rng.multivariate_normal(mean=np.zeros(sigma.shape[0]), cov=sigma, size=sims)
sd_vec = np.sqrt(np.diag(sigma))
xi_draws = xi_draws / sd_vec
eta_vec = np.max(xi_draws, axis=1)
return float(np.quantile(eta_vec, 1 - hybrid_kappa))
# Nuisance parameter case: need to solve LP for each simulation
# This finds the least favorable distribution that maximizes size
if x_t.ndim == 1:
x_t = x_t.reshape(-1, 1)
sd_vec = np.sqrt(np.diag(sigma))
dim_delta = x_t.shape[1]
c = np.concatenate([[1.0], np.zeros(dim_delta)])
C = -np.column_stack([sd_vec, x_t])
xi_draws = rng.multivariate_normal(mean=np.zeros(sigma.shape[0]), cov=sigma, size=sims)
eta_vec = []
for xi in xi_draws:
# Dual simplex
result = opt.linprog(
c=c,
A_ub=C,
b_ub=-xi,
bounds=[(None, None) for _ in range(len(c))],
method="highs-ds",
)
if result.success:
eta_vec.append(result.x[0])
if len(eta_vec) == 0:
raise RuntimeError("Failed to compute any valid eta values")
return float(np.quantile(eta_vec, 1 - hybrid_kappa))
def _test_delta_lp(y_t, x_t, sigma):
r"""Solve linear program for delta test.
Solves the primal optimization problem from equation (14) in [2]_
.. math::
\hat{\eta} = \min_{\eta, \tilde{\tau}} \eta \quad \text{s.t.} \quad
\tilde{Y}_n(\bar{\theta}) - \tilde{X}\tilde{\tau} \leq \tilde{\sigma}_n \cdot \eta.
This linear program finds the smallest scaling :math:`\eta` such that there exists
a nuisance parameter vector :math:`\tilde{\tau}` making all moment inequalities satisfied.
The solution characterizes whether the null hypothesis can be rejected and identifies
which moments bind at the optimum.
By duality (equation (15) in [2]_), this equals
.. math::
\hat{\eta} = \max_{\gamma} \gamma'\tilde{Y}_n(\bar{\theta}) \text{ s.t. }
\gamma'\tilde{X} = 0, \gamma'\tilde{\sigma}_n = 1, \gamma \geq 0.
The dual solution :math:`\gamma_*` (Lagrange multipliers) is crucial for the conditional
inference approach as it determines the conditioning event. The vertices :math:`\hat{V}_n \subset V(\Sigma_n)`
correspond to basic feasible solutions of the dual program, and conditioning on :math:`\gamma_* \in \hat{V}_n`
ensures correct coverage.
Parameters
----------
y_t : ndarray
Outcome vector :math:`y_T = A\hat{\beta} - d` adjusted for hypothesized :math:`\theta`.
x_t : ndarray
Covariate matrix :math:`X_T` corresponding to nuisance parameters.
sigma : ndarray
Covariance matrix :math:`\Sigma_Y` of y_t.
Returns
-------
dict
Dictionary containing:
- eta_star: float, optimal value :math:`\eta^*`
- delta_star: ndarray, optimal nuisance parameters :math:`\xi^*`
- lambda: ndarray, Lagrange multipliers :math:`\lambda^*` (dual solution)
- success: bool, whether optimization succeeded
Notes
-----
The constraint :math:`y_T - X_T\xi \leq \eta \cdot \text{diag}(\Sigma)^{1/2}` normalizes
each moment by its standard deviation, ensuring scale invariance. The optimal
:math:`\eta^*` can be interpreted as the maximum standardized violation of the
moment inequalities under the least favorable choice of :math:`\xi`.
References
----------
.. [1] Andrews, I., Roth, J., & Pakes, A. (2023). Inference for Linear
Conditional Moment Inequalities. Review of Economic Studies.
.. [2] Rambachan, A., & Roth, J. (2023). A more credible approach to
parallel trends. Review of Economic Studies, 90(5), 2555-2591.
"""
if x_t.ndim == 1:
x_t = x_t.reshape(-1, 1)
dim_delta = x_t.shape[1]
sd_vec = np.sqrt(np.diag(sigma))
# Minimize eta
c = np.concatenate([[1.0], np.zeros(dim_delta)])
# Constraints are -sd_vec*eta - X_T*delta <= -y_T (y_T = a_matrix @ betahat - d_vec)
A_ub = -np.column_stack([sd_vec, x_t])
b_ub = -y_t
# Bounds: eta and delta unbounded
bounds = [(None, None) for _ in range(len(c))]
# Use dual simplex approach
result = opt.linprog(
c=c,
A_ub=A_ub,
b_ub=b_ub,
bounds=bounds,
method="highs-ds",
)
if result.success:
eta_star = result.x[0]
delta_star = result.x[1:]
# Get dual variables (Lagrange multipliers)
dual_vars = -result.ineqlin.marginals if hasattr(result, "ineqlin") else np.zeros(len(b_ub))
else:
eta_star = np.nan
delta_star = np.full(dim_delta, np.nan)
dual_vars = np.zeros(len(b_ub))
return {
"eta_star": eta_star,
"delta_star": delta_star,
"lambda": dual_vars,
"success": result.success,
}
def _lp_dual_wrapper(
y_t,
x_t,
eta,
gamma_tilde,
sigma,
):
r"""Wrap vlo and vup computation using bisection approach.
Computes the support bounds :math:`[v_{lo}, v_{up}]` for the test statistic
under the conditional distribution when using the dual approach. The dual
approach is necessary when the primal problem is degenerate or when the
binding constraints don't have full rank.
The key here is that we can work directly with the Lagrange multipliers
:math:`\gamma` (normalized to sum to 1) rather than the primal variables.
The test statistic :math:`\gamma'Y` has a truncated normal distribution
with truncation bounds determined by the requirement that :math:`\gamma`
remains optimal.
Parameters
----------
y_t : ndarray
Outcome vector :math:`Y_T = A\hat{\beta} - d`.
x_t : ndarray
Covariate matrix :math:`X_T` (may not have full column rank).
eta : float
Optimal value :math:`\eta^*` from the linear program.
gamma_tilde : ndarray
Normalized Lagrange multipliers :math:`\tilde{\gamma} = \lambda / \sum_i \lambda_i`.
sigma : ndarray
Covariance matrix :math:`\Sigma_Y` of y_t.
Returns
-------
dict
Dictionary containing:
- vlo: float, lower bound of support
- vup: float, upper bound of support
- eta: float, optimal value (passed through)
- gamma_tilde: ndarray, normalized multipliers (passed through)
Notes
-----
The dual approach constructs a valid test by working with the optimality
conditions of the linear program. Even when the primal problem is ill-conditioned,
the dual formulation provides a well-defined test statistic with computable
truncation bounds.
"""
if x_t.ndim == 1:
x_t = x_t.reshape(-1, 1)
sd_vec = np.sqrt(np.diag(sigma))
w_t = np.column_stack([sd_vec, x_t])
# Residual after projecting out gamma_tilde direction
# This is the component of y_t orthogonal to gamma_tilde under the metric sigma
gamma_sigma_gamma = float(gamma_tilde.T @ sigma @ gamma_tilde)
# This handles the degenerate case where gamma_sigma_gamma is zero or negative
# This can occur when constraints are linear combinations of each other
# (e.g., row i and row j are negatives with equal Lagrange multipliers)
# Return special values that the caller can detect and handle
if gamma_sigma_gamma <= np.finfo(float).eps:
return {
"vlo": -np.inf,
"vup": np.inf,
"eta": eta,
"gamma_tilde": gamma_tilde,
}
# Projection matrix is I - (sigma * gamma * gamma') / (gamma' * sigma * gamma)
s_t = (np.eye(len(y_t)) - (sigma @ np.outer(gamma_tilde, gamma_tilde)) / gamma_sigma_gamma) @ y_t
v_dict = compute_vlo_vup_dual(eta, s_t, gamma_tilde, sigma, w_t)
return {
"vlo": v_dict["vlo"],
"vup": v_dict["vup"],
"eta": eta,
"gamma_tilde": gamma_tilde,
}
def _solve_max_program(
s_t,
gamma_tilde,
sigma,
w_t,
c,
):
r"""Solve linear program for maximum.
Solves: :math:`\\max f'x \\text{ s.t. } W_T'x = b_{eq}`
Parameters
----------
s_t : ndarray
Modified outcome vector.
gamma_tilde : ndarray
Dual solution vector.
sigma : ndarray
Covariance matrix.
w_t : ndarray
Constraint matrix.
c : float
Scalar parameter.
Returns
-------
OptimizeResult
Linear programming solution.
"""
sigma_gamma = float(gamma_tilde.T @ sigma @ gamma_tilde)
if sigma_gamma <= 0:
raise ValueError("gamma'*sigma*gamma must be positive")
f = s_t + (sigma @ gamma_tilde) * c / sigma_gamma
A_eq = w_t.T
b_eq = np.zeros(A_eq.shape[0])
b_eq[0] = 1.0
n_vars = len(f)
bounds = [(0, None) for _ in range(n_vars)]
result = opt.linprog(
c=-f,
A_eq=A_eq,
b_eq=b_eq,
bounds=bounds,
method="highs",
)
result.objective_value = -result.fun if result.success else np.nan
return result
def _check_if_solution(
c,
tol,
s_t,
gamma_tilde,
sigma,
w_t,
):
"""Check if :math:`c` is a solution to the dual problem.
Parameters
----------
c : float
Value to test.
tol : float
Tolerance for equality check.
s_t : ndarray
Modified outcome vector.
gamma_tilde : ndarray
Dual solution vector.
sigma : ndarray
Covariance matrix.
w_t : ndarray
Constraint matrix.
Returns
-------
result : OptimizeResult
Linear programming result.
is_solution : bool
Whether c is a solution.
"""
result = _solve_max_program(s_t, gamma_tilde, sigma, w_t, c)
is_solution = result.success and abs(c - result.objective_value) <= tol
return result, is_solution
def _compute_flci_vlo_vup(vbar, dbar, s_vec, c_vec):
r"""Compute vlo and vup for FLCI hybrid.
Computes the truncation bounds :math:`[v_{lo}, v_{up}]` that arise from
imposing the FLCI constraints :math:`|\bar{v}'Y| \leq \bar{d}` in the
hybrid test. These bounds ensure that the FLCI constraints remain satisfied
under the conditional distribution.
The FLCI constraints can be written as :math:`\bar{v}'Y \leq \bar{d}` and
:math:`-\bar{v}'Y \leq \bar{d}`. Under the conditional distribution where
:math:`Y = s + cv` for scalar :math:`v`, these become linear constraints
on :math:`v`, yielding the truncation bounds.
Parameters
----------
vbar : ndarray
FLCI coefficient vector :math:`\bar{v}` from the FLCI optimization.
dbar : ndarray
FLCI bounds :math:`\bar{d}`, typically the FLCI half-length.
s_vec : ndarray
Residual vector :math:`s` after projecting out the test direction.
c_vec : ndarray
Scaling vector :math:`c` for the test statistic direction.
Returns
-------
dict
Dictionary with:
- vlo: float, lower truncation bound
- vup: float, upper truncation bound
Notes
-----
Each FLCI constraint :math:`\pm\bar{v}'(s + cv) \leq \bar{d}` gives either
an upper or lower bound on :math:`v` depending on the sign of :math:`\bar{v}'c`.
Constraints with :math:`\bar{v}'c > 0` yield upper bounds, while those with
:math:`\bar{v}'c < 0` yield lower bounds.
"""
vbar_mat = np.vstack([vbar.T, -vbar.T])
vbar_c = vbar_mat @ c_vec
vbar_s = vbar_mat @ s_vec
# Solve for critical values where linear constraints become binding
# Each constraint vbar'(s + c*v) <= d gives bound on v (v = vbar)
max_or_min = (dbar - vbar_s) / vbar_c
# Constraints with negative coefficients give lower bounds (vlo)
vlo = np.max(max_or_min[vbar_c < 0]) if np.any(vbar_c < 0) else -np.inf
# Constraints with positive coefficients give upper bounds (vup)
vup = np.min(max_or_min[vbar_c > 0]) if np.any(vbar_c > 0) else np.inf
return {"vlo": vlo, "vup": vup}
def _construct_gamma(l_vec):
r"""Construct invertible matrix Gamma with l_vec as first row.
Constructs an invertible matrix :math:`\Gamma` such that its first row equals
:math:`l'`. This transformation is central to the ARP approach as it allows
reparametrizing :math:`\beta_{post} = \Gamma^{-1}(\theta, \xi)'` where
:math:`\theta = l'\beta_{post}` is the parameter of interest and :math:`\xi`
are nuisance parameters.
The construction places :math:`l` as the first row and fills the remaining
rows with standard basis vectors, dropping the one corresponding to the
largest absolute component of :math:`l` for numerical stability.
Parameters
----------
l_vec : ndarray
Vector :math:`l` defining the parameter of interest :math:`\theta = l'\beta_{post}`.
Must have length equal to the number of post-treatment periods.
Returns
-------
ndarray
Invertible matrix :math:`\Gamma` with :math:`l'` as its first row.
Shape is :math:`(T_{post}, T_{post})` where :math:`T_{post}` is the
number of post-treatment periods.
Raises
------
ValueError
If construction fails to produce an invertible matrix.
Notes
-----
This transformation enables the ARP test to separate the parameter of interest
:math:`\theta` from nuisance parameters :math:`\xi`. After transformation,
the constraints become :math:`A\Gamma^{-1}(\theta, \xi)' \leq d`, allowing
the test to profile over :math:`\xi` for each value of :math:`\theta`.
"""
bar_t = len(l_vec)
l_flat = l_vec.flatten()
drop_idx = int(np.argmax(np.abs(l_flat)))
gamma = np.zeros((bar_t, bar_t))
gamma[0] = l_flat
row = 1
for i in range(bar_t):
if i != drop_idx:
gamma[row, i] = 1.0
row += 1
if abs(np.linalg.det(gamma)) < 1e-10:
raise ValueError("Failed to construct invertible Gamma matrix")
return gamma
def _binary_search_ci_bounds(test_fn, n_points):
"""Find CI boundaries using binary search assuming contiguous acceptance.
Parameters
----------
test_fn : callable
Function that takes an index i and returns True if accepted.
n_points : int
Number of grid points.
Returns
-------
tuple
(lb_idx, ub_idx) or (None, None) if no accepted points.
"""
mid = n_points // 2
seed_idx = None
if test_fn(mid):
seed_idx = mid
else:
for offset in range(1, n_points):
left = mid - offset
right = mid + offset
if left >= 0 and test_fn(left):
seed_idx = left
break
if right < n_points and test_fn(right):
seed_idx = right
break
if left < 0 and right >= n_points:
break
if seed_idx is None:
return None, None
if seed_idx == 0 or not test_fn(0):
lo, hi = 0, seed_idx
while lo < hi:
m = (lo + hi) // 2
if test_fn(m):
hi = m
else:
lo = m + 1
lb_idx = lo
else:
lb_idx = 0
if seed_idx == n_points - 1 or not test_fn(n_points - 1):
lo, hi = seed_idx, n_points - 1
while lo < hi:
m = (lo + hi + 1) // 2
if test_fn(m):
lo = m
else:
hi = m - 1
ub_idx = lo
else:
ub_idx = n_points - 1
return lb_idx, ub_idx
def _round_eps(x, eps=None):
r"""Round value to zero if within machine epsilon.
Parameters
----------
x : float
Value to round.
eps : float, optional
Epsilon threshold. If None, uses machine epsilon :math:`\\epsilon^{3/4}`.
Returns
-------
float
Rounded value.
"""
if eps is None:
eps = np.finfo(float).eps ** (3 / 4)
return 0.0 if abs(x) < eps else x
def _test_theta_accepted(
i,
y_t_matrix,
hybrid_flag,
hybrid_list,
d_vec,
a_gamma_inv_one,
theta_grid,
a_gamma_inv_minus_one,
sigma_y,
alpha,
rows_for_arp,
):
"""Test whether theta_grid[i] is accepted.
Parameters
----------
i : int
Index into theta_grid to test.
y_t_matrix : ndarray
Pre-computed y vectors for each theta grid point.
hybrid_flag : str
Type of test ('ARP', 'LF', 'FLCI').
hybrid_list : dict
Parameters for hybrid tests.
d_vec : ndarray
Constraint bounds.
a_gamma_inv_one : ndarray
First column of A @ Gamma^(-1).
theta_grid : ndarray
Grid of theta values.
a_gamma_inv_minus_one : ndarray
Remaining columns of A @ Gamma^(-1).
sigma_y : ndarray
Covariance matrix of y.
alpha : float
Significance level.
rows_for_arp : ndarray or None
Subset of rows for ARP test.
Returns
-------
bool
True if theta_grid[i] is accepted (not rejected).
"""
y_t = y_t_matrix[i]
if hybrid_flag == "FLCI":
hybrid_list["dbar"] = compute_hybrid_dbar(
hybrid_list["flci_halflength"],
hybrid_list["vbar"],
d_vec,
a_gamma_inv_one,
theta_grid[i],
)
result = lp_conditional_test(
y_t=y_t,
x_t=a_gamma_inv_minus_one,
sigma=sigma_y,
alpha=alpha,
hybrid_flag=hybrid_flag,
hybrid_list=hybrid_list,
rows_for_arp=rows_for_arp,
)
return not result["reject"]