Source code for moderndid.didhonest.fixed_length_ci

"""Functions for constructing fixed-length confidence intervals (FLCI)."""

from typing import NamedTuple

import cvxpy as cp
import numpy as np
from scipy import stats
from scipy.optimize import brentq

from .utils import basis_vector, validate_conformable



[docs]
class FLCIResult(NamedTuple):
    """Container for fixed-length confidence interval results.

    Attributes
    ----------
    flci : tuple[float, float]
        The fixed-length confidence interval as (lower, upper).
    optimal_vec : ndarray
        The optimal weight vector over all periods used to construct
        the affine estimator.
    optimal_pre_period_vec : ndarray
        The optimal weight vector restricted to pre-treatment periods.
    optimal_half_length : float
        The half-length of the confidence interval.
    smoothness_bound : float
        The smoothness bound M used in the computation.
    status : str
        Optimization status from the solver.
    """

    #: Fixed-length confidence interval as (lower, upper).
    flci: tuple[float, float]
    #: Optimal weight vector over all periods.
    optimal_vec: np.ndarray
    #: Optimal weight vector restricted to pre-treatment periods.
    optimal_pre_period_vec: np.ndarray
    #: Half-length of the confidence interval.
    optimal_half_length: float
    #: Smoothness bound M used in the computation.
    smoothness_bound: float
    #: Optimization status from the solver.
    status: str




[docs]
def compute_flci(
    beta_hat,
    sigma,
    smoothness_bound,
    n_pre_periods,
    n_post_periods,
    post_period_weights=None,
    num_points=100,
    alpha=0.05,
    seed=0,
):
    r"""Compute fixed-length confidence intervals under smoothness restrictions.

    Constructs fixed-length confidence intervals (FLCIs) based on affine estimators
    that are valid for the linear combination :math:`l'\tau_{post}` under the
    restriction that the underlying trend :math:`\delta` lies in the smoothness
    constraint set :math:`\Delta^{SD}(M)`.

    The FLCI takes the form

    .. math::

        \mathcal{C}_{\alpha,n}(a, v, \chi) = (a + v'\hat{\beta}_n) \pm \chi,

    where :math:`a` is a scalar, :math:`v \in \mathbb{R}^{\underline{T}+\bar{T}}` is a
    weight vector, and :math:`\chi` is the half-length of the confidence interval.

    The optimization minimizes :math:`\chi` subject to the coverage requirement
    in the finite-sample normal model. The smallest value of :math:`\chi` that
    satisfies coverage is

    .. math::

        \chi_n(a, v; \alpha) = \sigma_{v,n} \cdot cv_{\alpha}(\bar{b}(a, v) / \sigma_{v,n}),

    where :math:`\sigma_{v,n} = \sqrt{v'\Sigma_n v}` and :math:`cv_{\alpha}(t)` denotes
    the :math:`1-\alpha` quantile of the folded normal distribution :math:`|N(t, 1)|`.

    Parameters
    ----------
    beta_hat : ndarray
        Vector of estimated event study coefficients :math:`\hat{\beta}`.
        First `n_pre_periods` elements are pre-treatment, remainder are post-treatment.
    sigma : ndarray
        Covariance matrix of estimated coefficients :math:`\Sigma`.
    smoothness_bound : float
        Smoothness parameter :math:`M` for the restriction set :math:`\Delta^{SD}(M)`.
        Bounds the second differences: :math:`|\delta_{t-1} - 2\delta_t + \delta_{t+1}| \leq M`.
    n_pre_periods : int
        Number of pre-treatment periods :math:`T_{pre}`.
    n_post_periods : int
        Number of post-treatment periods :math:`T_{post}`.
    post_period_weights : ndarray, optional
        Weight vector :math:`\ell_{post}` for post-treatment periods. Default is the
        first post-period (i.e., :math:`\ell_{post} = e_1`).
    num_points : int, default=100
        Number of points for grid search in optimization.
    alpha : float, default=0.05
        Significance level for confidence interval.
    seed : int, default=0
        Random seed for reproducibility.

    Returns
    -------
    FLCIResult
        NamedTuple containing:

        - flci: Tuple of (lower, upper) confidence interval bounds
        - optimal_vec: Optimal weight vector :math:`(\ell_{pre}, \ell_{post})` for all periods
        - optimal_pre_period_vec: Optimal weights :math:`\ell_{pre}` for pre-periods
        - optimal_half_length: Half-length of the confidence interval
        - smoothness_bound: Smoothness parameter :math:`M` used
        - status: Optimization status

    Notes
    -----
    The FLCI is computed by solving a nested optimization problem. For each
    candidate standard deviation :math:`h`, we find the worst-case bias under
    :math:`\Delta^{SD}(M)`, then choose :math:`h` to minimize the resulting
    confidence interval length.

    For :math:`\Delta^{SD}(M)` with :math:`\theta = \tau_1`, the affine estimator
    used by the optimal FLCI takes the form in [1]_

    .. math::

        a + v'\hat{\beta}_n = \hat{\beta}_{n,1} -
            \sum_{s=-\underline{T}+1}^{0} w_s(\hat{\beta}_{n,s} - \hat{\beta}_{n,s-1}),

    where the weights :math:`w_s` sum to one (but may be negative). This estimator
    adjusts the event-study coefficient for :math:`t=1` by an estimate of the
    differential trend between :math:`t=0` and :math:`t=1` formed by taking a
    weighted average of the differential trends in periods prior to treatment.

    Under convexity and centrosymmetry conditions on the identified set, FLCIs achieve near-optimal
    expected length in finite samples. When :math:`\alpha = 0.05`, the expected
    length of the shortest possible confidence set that satisfies coverage is at
    most 28% shorter than the FLCI.

    References
    ----------

    .. [1] Rambachan, A., & Roth, J. (2023). A more credible approach to
        parallel trends. Review of Economic Studies, 90(5), 2555-2591.
    """
    if post_period_weights is None:
        post_period_weights = basis_vector(index=1, size=n_post_periods).flatten()
    else:
        post_period_weights = np.asarray(post_period_weights).flatten()

    beta_hat = np.asarray(beta_hat).flatten()
    sigma = np.asarray(sigma)

    validate_conformable(beta_hat, sigma, n_pre_periods, n_post_periods, post_period_weights)

    flci_results = _optimize_flci_params(
        sigma=sigma,
        smoothness_bound=smoothness_bound,
        n_pre_periods=n_pre_periods,
        n_post_periods=n_post_periods,
        post_period_weights=post_period_weights,
        num_points=num_points,
        alpha=alpha,
        seed=seed,
    )

    point_estimate = flci_results["optimal_vec"] @ beta_hat
    flci_lower = point_estimate - flci_results["optimal_half_length"]
    flci_upper = point_estimate + flci_results["optimal_half_length"]

    return FLCIResult(
        flci=(flci_lower, flci_upper),
        optimal_vec=flci_results["optimal_vec"],
        optimal_pre_period_vec=flci_results["optimal_pre_period_vec"],
        optimal_half_length=flci_results["optimal_half_length"],
        smoothness_bound=flci_results["smoothness_bound"],
        status=flci_results["status"],
    )




[docs]
def maximize_bias(
    h,
    sigma,
    n_pre_periods,
    n_post_periods,
    post_period_weights,
    smoothness_bound=1.0,
):
    r"""Find worst-case bias subject to standard deviation constraint :math:`h`.

    Computes the affine estimator's worst-case bias, which for :math:`\Delta^{SD}(M)`
    is found by solving the following Second-Order Cone Program (SOCP)

    .. math::

        \min_{w, t} \quad & C_{bias} + \sum_{s=-\underline{T}+1}^{0} t_s \\
        \text{s.t.} \quad & -t_s \leq \sum_{j=-\underline{T}+1}^{s} w_j \leq t_s, \quad \forall s \\
                          & \sum_{s=-\underline{T}+1}^{0} w_s = \sum_{s=1}^{\bar{T}} s \cdot \ell_{post,s} \\
                          & \text{Var}(\ell'_{pre}\hat{\beta}_{pre} + \ell'_{post}\hat{\beta}_{post}) \leq h^2.

    Here, the optimization is over first-difference weights :math:`w` and slack
    variables :math:`t`. The vector :math:`\ell_{pre}` contains the cumulative sums
    of :math:`w`. The quadratic variance constraint is reformulated as a second-order
    cone, and the problem is solved using an interior-point method from [2]_.

    Parameters
    ----------
    h : float
        Standard deviation constraint for the affine estimator.
    sigma : ndarray
        Covariance matrix :math:`\Sigma` of event study coefficients.
    n_pre_periods : int
        Number of pre-treatment periods.
    n_post_periods : int
        Number of post-treatment periods.
    post_period_weights : ndarray
        Post-treatment weight vector :math:`\ell_{post}`.
    smoothness_bound : float
        Smoothness parameter :math:`M` (not directly used in optimization,
        applied as scaling factor to result).

    Returns
    -------
    dict
        Dictionary with optimization results:

        - status: 'optimal' if successful, 'failed' or error message otherwise
        - value: Maximum bias value (scaled by smoothness_bound)
        - optimal_l: Optimal pre-period weights :math:`\ell_{pre}`
        - optimal_w: Optimal weights in :math:`w` parameterization
        - optimal_x: Full solution vector from optimization

    Notes
    -----
    This implementation is specific to :math:`\Delta^{SD}(M)`. For other restriction
    sets, the worst-case bias computation differs significantly. For :math:`\Delta^{SDPB}(M)`
    and :math:`\Delta^{SDI}(M)`, the worst-case bias of any affine estimator equals
    its worst-case bias over :math:`\Delta^{SD}(M)`, meaning sign and monotonicity
    restrictions provide no benefit for FLCIs. For :math:`\Delta^{RM}(\bar{M})`,
    the worst-case bias is infinite whenever :math:`\bar{M} > 0`, as pre-treatment
    violations can be arbitrarily scaled up.

    References
    ----------

    .. [1] Rambachan, A., & Roth, J. (2023). A more credible approach to
        parallel trends. Review of Economic Studies, 90(5), 2555-2591.

    .. [2] Goulart, P. J., & Chen, Y. (2024). Clarabel: An interior-point solver
        for conic programs with quadratic objectives. *arXiv preprint arXiv:2405.13033*.
    """
    stacked_vars = cp.Variable(2 * n_pre_periods)

    bias_constant = sum(
        abs(np.dot(np.arange(1, s + 1), post_period_weights[(n_post_periods - s) : n_post_periods]))
        for s in range(1, n_post_periods + 1)
    ) - np.dot(np.arange(1, n_post_periods + 1), post_period_weights)

    objective = cp.Minimize(bias_constant + cp.sum(stacked_vars[:n_pre_periods]))

    constraints = []

    absolute_values = stacked_vars[:n_pre_periods]
    weight_vector = stacked_vars[n_pre_periods:]
    lower_triangular = np.tril(np.ones((n_pre_periods, n_pre_periods)))

    constraints.extend(
        [-absolute_values <= lower_triangular @ weight_vector, lower_triangular @ weight_vector <= absolute_values]
    )

    target_sum = np.dot(np.arange(1, n_post_periods + 1), post_period_weights)
    constraints.append(cp.sum(weight_vector) == target_sum)

    weights_to_levels_matrix = _create_diff_matrix(n_pre_periods)

    stacked_transform_matrix = np.hstack([np.zeros((n_pre_periods, n_pre_periods)), weights_to_levels_matrix])

    sigma_pre = sigma[:n_pre_periods, :n_pre_periods]
    sigma_pre_post = sigma[:n_pre_periods, n_pre_periods:]
    sigma_post = post_period_weights @ sigma[n_pre_periods:, n_pre_periods:] @ post_period_weights

    A_quadratic = stacked_transform_matrix.T @ sigma_pre @ stacked_transform_matrix
    A_linear = 2 * stacked_transform_matrix.T @ sigma_pre_post @ post_period_weights

    variance_expr = cp.quad_form(stacked_vars, A_quadratic) + A_linear @ stacked_vars + sigma_post
    constraints.append(variance_expr <= h**2)

    problem = cp.Problem(objective, constraints)

    try:
        problem.solve(solver=cp.CLARABEL, verbose=False)

        if problem.status in ["optimal", "optimal_inaccurate"]:
            optimal_w = stacked_vars.value[n_pre_periods:]
            optimal_l_pre = _weights_to_l(optimal_w)

            bias_value = problem.value * smoothness_bound

            return {
                "status": "optimal",
                "value": bias_value,
                "optimal_x": stacked_vars.value,
                "optimal_w": optimal_w,
                "optimal_l": optimal_l_pre,
            }

        return {
            "status": "failed",
            "value": np.inf,
            "optimal_x": None,
            "optimal_w": None,
            "optimal_l": None,
        }
    except (ValueError, RuntimeError, cp.error.SolverError) as e:
        return {
            "status": f"error: {e!s}",
            "value": np.inf,
            "optimal_x": None,
            "optimal_w": None,
            "optimal_l": None,
        }




[docs]
def minimize_variance(
    sigma,
    n_pre_periods,
    n_post_periods,
    post_period_weights,
):
    r"""Find the minimum achievable standard deviation :math:`h`.

    Solves a Quadratic Program (QP) to find the minimum variance of an affine
    estimator subject to bias constraints arising from :math:`\Delta^{SD}(M)`.
    The optimization problem is formulated as

    .. math::

        \min_{w, t} \quad & \text{Var}(\ell'_{pre}\hat{\beta}_{pre} + \ell'_{post}\hat{\beta}_{post}) \\
        \text{s.t.} \quad & -t_s \leq \sum_{j=-\underline{T}+1}^{s} w_j \leq t_s, \quad \forall s \\
                          & \sum_{s=-\underline{T}+1}^{0} w_s = \sum_{s=1}^{\bar{T}} s \cdot \ell_{post,s}.

    The variance is a quadratic function of the first-difference weights :math:`w`,
    making this a QP. The problem is solved using an interior-point method from [2]_.
    The solution provides a lower bound for the feasible values of :math:`h` in
    the FLCI optimization.

    Parameters
    ----------
    sigma : ndarray
        Covariance matrix :math:`\Sigma` of event study coefficients.
    n_pre_periods : int
        Number of pre-treatment periods.
    n_post_periods : int
        Number of post-treatment periods.
    post_period_weights : ndarray
        Post-treatment weight vector :math:`\ell_{post}`.

    Returns
    -------
    float
        Minimum achievable standard deviation :math:`h_{min}`.

    References
    ----------

    .. [1] Rambachan, A., & Roth, J. (2023). A more credible approach to
        parallel trends. Review of Economic Studies, 90(5), 2555-2591.

    .. [2] Goulart, P. J., & Chen, Y. (2024). Clarabel: An interior-point solver
        for conic programs with quadratic objectives. *arXiv preprint arXiv:2405.13033*.
    """
    stacked_vars = cp.Variable(2 * n_pre_periods)

    absolute_values = stacked_vars[:n_pre_periods]
    weight_vector = stacked_vars[n_pre_periods:]

    weights_to_levels_matrix = _create_diff_matrix(n_pre_periods)
    stacked_transform_matrix = np.hstack([np.zeros((n_pre_periods, n_pre_periods)), weights_to_levels_matrix])

    sigma_pre = sigma[:n_pre_periods, :n_pre_periods]
    sigma_pre_post = sigma[:n_pre_periods, n_pre_periods:]
    sigma_post = post_period_weights @ sigma[n_pre_periods:, n_pre_periods:] @ post_period_weights

    A_quadratic = stacked_transform_matrix.T @ sigma_pre @ stacked_transform_matrix
    A_linear = 2 * stacked_transform_matrix.T @ sigma_pre_post @ post_period_weights

    variance_expr = cp.quad_form(stacked_vars, A_quadratic) + A_linear @ stacked_vars + sigma_post
    objective = cp.Minimize(variance_expr)

    constraints = []

    lower_triangular = np.tril(np.ones((n_pre_periods, n_pre_periods)))
    constraints.extend(
        [-absolute_values <= lower_triangular @ weight_vector, lower_triangular @ weight_vector <= absolute_values]
    )

    target_sum = np.dot(np.arange(1, n_post_periods + 1), post_period_weights)
    constraints.append(cp.sum(weight_vector) == target_sum)

    problem = cp.Problem(objective, constraints)

    try:
        problem.solve(solver=cp.CLARABEL, verbose=False)

        if problem.status in ["optimal", "optimal_inaccurate"]:
            return np.sqrt(problem.value)

        for scale_factor in [10, 100, 1000]:
            scaled_A_quadratic = A_quadratic * scale_factor
            scaled_A_linear = A_linear * scale_factor
            scaled_sigma_post = sigma_post * scale_factor

            scaled_variance_expr = (
                cp.quad_form(stacked_vars, scaled_A_quadratic) + scaled_A_linear @ stacked_vars + scaled_sigma_post
            )
            scaled_objective = cp.Minimize(scaled_variance_expr)
            scaled_problem = cp.Problem(scaled_objective, constraints)

            scaled_problem.solve(solver=cp.CLARABEL, verbose=False)

            if scaled_problem.status in ["optimal", "optimal_inaccurate"]:
                return np.sqrt(scaled_problem.value / scale_factor)

        raise ValueError("Error in optimization for minimum variance")
    except (ValueError, RuntimeError, cp.error.SolverError) as e:
        raise ValueError(f"Error in optimization for minimum variance: {e!s}") from e



def affine_variance(
    l_pre,
    l_post,
    sigma,
    n_pre_periods,
):
    r"""Compute variance of affine estimator.

    Computes the variance of the affine estimator

    .. math::

        \hat{\theta} = \ell'_{pre}\hat{\beta}_{pre} + \ell'_{post}\hat{\beta}_{post}.

    Under standard asymptotics, this has variance

    .. math::

        \text{Var}(\hat{\theta}) = \begin{pmatrix} \ell_{pre} \\ \ell_{post} \end{pmatrix}'
        \begin{pmatrix} \Sigma_{pre,pre} & \Sigma_{pre,post} \\
        \Sigma_{post,pre} & \Sigma_{post,post} \end{pmatrix}
        \begin{pmatrix} \ell_{pre} \\ \ell_{post} \end{pmatrix}.

    Parameters
    ----------
    l_pre : ndarray
        Pre-treatment weight vector :math:`\ell_{pre}`.
    l_post : ndarray
        Post-treatment weight vector :math:`\ell_{post}`.
    sigma : ndarray
        Full covariance matrix :math:`\Sigma` of event study coefficients.
    n_pre_periods : int
        Number of pre-treatment periods.

    Returns
    -------
    float
        Variance of the affine estimator.

    References
    ----------

    .. [1] Rambachan, A., & Roth, J. (2023). A more credible approach to
        parallel trends. Review of Economic Studies, 90(5), 2555-2591.
    """
    sigma_pre = sigma[:n_pre_periods, :n_pre_periods]
    sigma_pre_post = sigma[:n_pre_periods, n_pre_periods:]
    sigma_post = l_post @ sigma[n_pre_periods:, n_pre_periods:] @ l_post

    variance = l_pre @ sigma_pre @ l_pre + 2 * l_pre @ sigma_pre_post @ l_post + sigma_post

    return variance



[docs]
def folded_normal_quantile(
    p,
    mu=0.0,
    sd=1.0,
    seed=0,
):
    r"""Compute quantile of folded normal distribution :math:`cv_{\alpha}(t)`.

    Computes the :math:`1-\alpha` quantile of the folded normal distribution
    :math:`|N(t, 1)|`, denoted :math:`cv_{\alpha}(t)` in the paper. This function
    arises in the FLCI construction through equation (18) in [1]_

    .. math::

        \chi_n(a, v; \alpha) = \sigma_{v,n} \cdot cv_{\alpha}(\bar{b}(a, v) / \sigma_{v,n}).

    The folded normal is the distribution of :math:`|X|` where :math:`X \sim N(\mu, \sigma^2)`.
    For the FLCI, we need this because the affine estimator has distribution

    .. math::

        a + v'\hat{\beta}_n \sim N(a + v'\beta, v'\Sigma_n v),

    and thus :math:`|a + v'\hat{\beta}_n - \theta| \sim |N(b, v'\Sigma_n v)|` where
    :math:`b = a + v'\beta - \theta` is the bias.

    Parameters
    ----------
    p : float
        Probability level (between 0 and 1), typically :math:`1 - \alpha`.
    mu : float
        Mean parameter :math:`t` of the underlying normal distribution, equal to
        :math:`\bar{b}(a, v) / \sigma_{v,n}` in the FLCI context.
    sd : float
        Standard deviation of underlying normal (typically 1).
    seed : int
        Random seed for Monte Carlo approximation.

    Returns
    -------
    float
        The value :math:`cv_p(t)`, the p-th quantile of :math:`|N(t, 1)|`.

    Notes
    -----
    When :math:`t = 0`, this reduces to the half-normal distribution.
    For non-zero :math:`t`, we use Monte Carlo simulation to approximate
    the quantile as no closed-form expression exists.

    If :math:`t = \infty`, we define :math:`cv_{\alpha}(t) = \infty` as noted
    in the paper (footnote 22).

    References
    ----------

    .. [1] Rambachan, A., & Roth, J. (2023). A more credible approach to
        parallel trends. Review of Economic Studies, 90(5), 2555-2591.
    """
    if sd <= 0:
        raise ValueError("Standard deviation must be positive")

    mu_abs = abs(mu)

    if mu_abs == 0:
        return sd * stats.halfnorm.ppf(p)

    upper_bracket = mu_abs + 8 * sd
    while _folded_normal_cdf(upper_bracket, mu_abs, sd) - p < 0:
        upper_bracket *= 2

    return brentq(_folded_normal_cdf, 0, upper_bracket, args=(mu_abs, sd, p))



def get_min_bias_h(
    sigma,
    n_pre_periods,
    n_post_periods,
    post_period_weights,
):
    r"""Compute :math:`h` that yields minimum bias.

    Finds the standard deviation :math:`h` corresponding to the estimator that
    minimizes worst-case bias under :math:`\Delta^{SD}(M)`. This occurs when
    all pre-treatment weight is placed on the last pre-treatment period.

    The minimum bias estimator uses

    .. math::

        \ell_{pre} = (0, ..., 0, \sum_{s=1}^{T_{post}} s \cdot \ell_{post,s}).

    This choice minimizes bias because it uses only the pre-treatment coefficient
    closest to the treatment period, reducing extrapolation error.

    Parameters
    ----------
    sigma : ndarray
        Covariance matrix :math:`\Sigma` of event study coefficients.
    n_pre_periods : int
        Number of pre-treatment periods :math:`T_{pre}`.
    n_post_periods : int
        Number of post-treatment periods :math:`T_{post}`.
    post_period_weights : ndarray
        Post-treatment weight vector :math:`\ell_{post}`.

    Returns
    -------
    float
        Standard deviation :math:`h_{max}` for minimum bias configuration.

    Notes
    -----
    This provides an upper bound for the feasible values of :math:`h` in the
    FLCI optimization. For :math:`h > h_{max}`, the bias constraint becomes
    slack and further increases in :math:`h` do not improve the confidence
    interval length.
    """
    weights = np.zeros(n_pre_periods)
    weights[-1] = np.dot(np.arange(1, n_post_periods + 1), post_period_weights)

    l_pre = _weights_to_l(weights)
    variance = affine_variance(l_pre, post_period_weights, sigma, n_pre_periods)

    return np.sqrt(variance)


def _optimize_flci_params(
    sigma,
    smoothness_bound,
    n_pre_periods,
    n_post_periods,
    post_period_weights,
    num_points,
    alpha,
    seed,
):
    r"""Compute optimal FLCI parameters.

    Solves the FLCI optimization problem to minimize the confidence interval
    half-length :math:`\chi_n(a, v; \alpha)` defined as:

    .. math::

        \chi_n(a, v; \alpha) = \sigma_{v,n} \cdot cv_{\alpha}(\bar{b}(a, v) / \sigma_{v,n}),

    where :math:`\sigma_{v,n} = \sqrt{v'\Sigma_n v}` is the standard deviation
    of the affine estimator, :math:`\bar{b}(a, v)` is the worst-case bias from
    equation (17), and :math:`cv_{\alpha}(t)` denotes the :math:`1-\alpha` quantile
    of the folded normal distribution :math:`|N(t, 1)|`.

    The optimization is performed over :math:`(a, v)` pairs, which for
    :math:`\Delta^{SD}(M)` reduces to optimizing over :math:`h \in [h_{min}, h_{max}]`
    where :math:`h_{min}` minimizes variance and :math:`h_{max}` minimizes bias.

    Parameters
    ----------
    sigma : ndarray
        Covariance matrix of coefficients.
    smoothness_bound : float
        Smoothness parameter :math:`M`.
    n_pre_periods : int
        Number of pre-treatment periods.
    n_post_periods : int
        Number of post-treatment periods.
    post_period_weights : ndarray
        Weight vector for post-treatment periods.
    num_points : int
        Number of grid points for search.
    alpha : float
        Significance level.
    seed : int
        Random seed.

    Returns
    -------
    dict
        Dictionary containing optimal parameters:

        - optimal_vec: Optimal weight vector :math:`(\ell_{pre}, \ell_{post})`
        - optimal_pre_period_vec: Optimal pre-period weights :math:`\ell_{pre}`
        - optimal_half_length: Optimal CI half-length
        - smoothness_bound: Smoothness parameter used
        - status: Optimization status

    Notes
    -----
    The optimization uses golden section search (bisection) when possible,
    falling back to grid search if the bisection method fails. The search
    is over :math:`h \in [h_{min}, h_{max}]` where :math:`h_{min}` minimizes
    variance and :math:`h_{max}` minimizes bias.
    """
    h_min_bias = get_min_bias_h(sigma, n_pre_periods, n_post_periods, post_period_weights)
    h_min_variance = minimize_variance(sigma, n_pre_periods, n_post_periods, post_period_weights)

    h_optimal = _optimize_h_bisection(
        h_min_variance,
        h_min_bias,
        smoothness_bound,
        num_points,
        alpha,
        sigma,
        n_pre_periods,
        n_post_periods,
        post_period_weights,
        seed,
    )

    if np.isnan(h_optimal):
        # Fall back to grid search if bisection fails
        h_grid = np.linspace(h_min_variance, h_min_bias, num_points)
        ci_half_lengths = []

        for h in h_grid:
            bias_result = maximize_bias(h, sigma, n_pre_periods, n_post_periods, post_period_weights, smoothness_bound)
            if bias_result["status"] == "optimal":
                max_bias = bias_result["value"]
                ci_half_length = folded_normal_quantile(1 - alpha, mu=max_bias / h, sd=1.0, seed=seed) * h
                ci_half_lengths.append(
                    {
                        "h": h,
                        "ci_half_length": ci_half_length,
                        "optimal_l": bias_result["optimal_l"],
                        "status": bias_result["status"],
                    }
                )

        if ci_half_lengths:
            optimal_result = min(ci_half_lengths, key=lambda x: x["ci_half_length"])
        else:
            raise ValueError("Optimization failed for all values of h")
    else:
        bias_result = maximize_bias(
            h_optimal, sigma, n_pre_periods, n_post_periods, post_period_weights, smoothness_bound
        )
        optimal_result = {
            "optimal_l": bias_result["optimal_l"],
            "ci_half_length": folded_normal_quantile(1 - alpha, mu=bias_result["value"] / h_optimal, sd=1.0, seed=seed)
            * h_optimal,
            "status": bias_result["status"],
        }

    return {
        "optimal_vec": np.concatenate([optimal_result["optimal_l"], post_period_weights]),
        "optimal_pre_period_vec": optimal_result["optimal_l"],
        "optimal_half_length": optimal_result["ci_half_length"],
        "smoothness_bound": smoothness_bound,
        "status": optimal_result["status"],
    }


def _optimize_h_bisection(
    h_min,
    h_max,
    smoothness_bound,
    num_points,
    alpha,
    sigma,
    n_pre_periods,
    n_post_periods,
    post_period_weights,
    seed=0,
):
    r"""Find optimal h using golden section search.

    Implements golden section search to find the value of :math:`h` that
    minimizes the confidence interval half-length. The objective function is

    .. math::

        f(h) = h \cdot q_{1-\alpha}\left(\left|N\left(\frac{M \cdot b^*(h)}{h}, 1\right)\right|\right),

    where :math:`b^*(h)` is the maximum bias achievable with standard deviation :math:`h`.

    Golden section search is used because the objective is unimodal in :math:`h`
    and it converges faster than grid search.

    Parameters
    ----------
    h_min : float
        Lower bound for :math:`h` (minimum variance solution).
    h_max : float
        Upper bound for :math:`h` (minimum bias solution).
    smoothness_bound : float
        Smoothness parameter :math:`M`.
    num_points : int
        Number of points for tolerance calculation.
    alpha : float
        Significance level :math:`\alpha`.
    sigma : ndarray
        Covariance matrix :math:`\Sigma`.
    n_pre_periods : int
        Number of pre-treatment periods.
    n_post_periods : int
        Number of post-treatment periods.
    post_period_weights : ndarray
        Post-treatment weight vector :math:`\ell_{post}`.
    seed : int
        Random seed for folded normal quantile computation.

    Returns
    -------
    float
        Optimal :math:`h` value, or NaN if optimization fails.
    """

    def _compute_ci_half_length(h):
        bias_result = maximize_bias(h, sigma, n_pre_periods, n_post_periods, post_period_weights, smoothness_bound)

        if bias_result["status"] == "optimal" and bias_result["value"] < np.inf:
            max_bias = bias_result["value"]
            return folded_normal_quantile(1 - alpha, mu=max_bias / h, sd=1.0, seed=seed) * h

        return np.nan

    tolerance = min((h_max - h_min) / num_points, abs(h_max) * 1e-6)
    golden_ratio = (1 + np.sqrt(5)) / 2

    h_lower = h_min
    h_upper = h_max
    h_mid_low = h_upper - (h_upper - h_lower) / golden_ratio
    h_mid_high = h_lower + (h_upper - h_lower) / golden_ratio

    ci_mid_low = _compute_ci_half_length(h_mid_low)
    ci_mid_high = _compute_ci_half_length(h_mid_high)

    if np.isnan(ci_mid_low) or np.isnan(ci_mid_high):
        return np.nan

    while abs(h_upper - h_lower) > tolerance:
        if ci_mid_low < ci_mid_high:
            h_upper = h_mid_high
            h_mid_high = h_mid_low
            ci_mid_high = ci_mid_low
            h_mid_low = h_upper - (h_upper - h_lower) / golden_ratio
            ci_mid_low = _compute_ci_half_length(h_mid_low)
            if np.isnan(ci_mid_low):
                return np.nan
        else:
            h_lower = h_mid_low
            h_mid_low = h_mid_high
            ci_mid_low = ci_mid_high
            h_mid_high = h_lower + (h_upper - h_lower) / golden_ratio
            ci_mid_high = _compute_ci_half_length(h_mid_high)
            if np.isnan(ci_mid_high):
                return np.nan

    return (h_lower + h_upper) / 2


def _weights_to_l(weights):
    r"""Convert from weight parameterization to :math:`\ell` parameterization.

    Applies the first-difference transformation to convert from the weight
    parameterization :math:`w` to the levels parameterization :math:`\ell`:

    .. math::

        \ell_1 = w_1, \quad \ell_t = w_t - w_{t-1} \text{ for } t > 1.

    This is equivalent to multiplying by the lower bidiagonal matrix with
    1s on the diagonal and -1s on the subdiagonal.

    Parameters
    ----------
    weights : ndarray
        Weight vector :math:`w`.

    Returns
    -------
    ndarray
        Level vector :math:`\ell`.
    """
    result = np.empty_like(weights)
    result[0] = weights[0]
    result[1:] = np.diff(weights)
    return result


def _create_diff_matrix(size):
    mat = np.eye(size)
    if size > 1:
        for i in range(1, size):
            mat[i, i - 1] = -1
    return mat


def _folded_normal_cdf(x, mu_abs, sd, p=0.0):
    """CDF of the folded normal distribution minus p, for root-finding."""
    return stats.norm.cdf((x - mu_abs) / sd) - stats.norm.cdf((-x - mu_abs) / sd) - p