#!/usr/bin/env python3
"""Math verification suite for paper_v3 (task_frontier_v3.tex).

Every numbered claim in the paper must have a check here.
Part 1: core derivations (pre-writing verification).
Run: python3 math_check_v3.py
"""
import math
import numpy as np

PASS, FAIL = 0, 0
FAILURES = []

def check(name, cond, detail=""):
    global PASS, FAIL
    if cond:
        PASS += 1
        print(f"  PASS  {name}")
    else:
        FAIL += 1
        FAILURES.append((name, detail))
        print(f"  FAIL  {name}  {detail}")

# ----------------------------------------------------------------------
# Section T1: O-ring with retries, reduced threshold, Gumbel kernel
# ----------------------------------------------------------------------
print("== T1: sequential task model ==")

def logistic(x):
    return 1.0 / (1.0 + math.exp(-x))

def logit(p):
    return math.log(p / (1.0 - p))

def stage_success(pi, ell):
    """Stage succeeds if any of 1+ell attempts succeeds."""
    return 1.0 - (1.0 - pi) ** (1 + ell)

def task_success(qF, phi, n, ell):
    """Numerically stable: 1-pi = logistic(-(qF-phi)) exactly; work in logs."""
    one_minus_pi = logistic(-(qF - phi))
    y = one_minus_pi ** (1 + ell)          # stage failure prob after retries
    return math.exp(n * math.log1p(-y))    # S^n = (1-y)^n

def reduced_threshold(n, ell, eps, phi):
    """Exact threshold: minimal qF s.t. task success >= 1-eps (logistic G_F).

    Numerically stable: g = 1-(1-eps)^(1/n) via expm1/log1p, and
    logit(1-y) = log1p(-y) - log(y) for y = g^(1/(1+ell)).
    """
    g = -math.expm1(math.log1p(-eps) / n)
    y = g ** (1.0 / (1 + ell))
    return phi + (math.log1p(-y) - math.log(y))

# C1: threshold identity  S^n >= 1-eps  <=>  qF >= f(n,ell,eps,phi)
ok = True
rng = np.random.default_rng(0)
for _ in range(400):
    n = int(rng.integers(1, 2000))
    ell = int(rng.integers(0, 6))
    eps = float(rng.uniform(0.001, 0.6))
    phi = float(rng.uniform(-2, 2))
    f = reduced_threshold(n, ell, eps, phi)
    # just above threshold -> success >= 1-eps; just below -> < 1-eps
    hi = task_success(f + 1e-7, phi, n, ell)
    lo = task_success(f - 1e-4, phi, n, ell)
    if not (hi >= 1 - eps - 1e-9 and lo < 1 - eps):
        ok = False
        break
check("C1 threshold identity (400 random draws)", ok)

# C2: sandwich  eps/n <= 1-(1-eps)^(1/n) <= (eps/n)/(1-eps)
ok = True
for eps in [1e-4, 1e-3, 0.01, 0.05, 0.1, 0.3, 0.5, 0.8]:
    for n in [1, 2, 5, 10, 100, 1000, 10**5]:
        g = 1.0 - (1.0 - eps) ** (1.0 / n)
        if not (eps / n - 1e-15 <= g <= (eps / n) / (1 - eps) + 1e-15):
            ok = False
check("C2 sandwich bounds on 1-(1-eps)^(1/n)", ok)

# C3: log-linear limit  f - phi -> (1/(1+ell)) * ln(n/eps)  as eps/n -> 0
ok = True
for ell in [0, 1, 3]:
    errs = []
    for n, eps in [(10**3, 1e-2), (10**5, 1e-3), (10**7, 1e-4)]:
        f = reduced_threshold(n, ell, eps, 0.0)
        approx = (math.log(n / eps)) / (1 + ell)
        errs.append(abs(f - approx))
    # errors should shrink toward 0
    if not (errs[-1] < errs[0] and errs[-1] < 5e-3):
        ok = False
check("C3 log-linear threshold limit f ~ phi + ln(n/eps)/(1+ell)", ok)

# C3b: slope in h = ln n converges to 1/(1+ell); correction is O((eps/n)^{1/(1+ell)})
ok = True
for ell in [0, 1, 4]:
    eps = 1e-4
    n1, n2 = 10**8, 10**12
    f1 = reduced_threshold(n1, ell, eps, 0.0)
    f2 = reduced_threshold(n2, ell, eps, 0.0)
    slope = (f2 - f1) / (math.log(n2) - math.log(n1))
    if abs(slope - 1.0 / (1 + ell)) > 1e-3:
        ok = False
    # two-term error bound: |f - ln(n/eps)/(1+ell)| <= 2*eps + 3*(eps/n)^{1/(1+ell)}
    e1 = abs(f1 - math.log(n1 / eps) / (1 + ell))
    if e1 > 2.0 * eps + 3.0 * (eps / n1) ** (1.0 / (1 + ell)):
        ok = False
check("C3b threshold slope -> 1/(1+ell); error <= 2eps + 3(eps/n)^{1/(1+ell)}", ok)

# C3c: monotonicities of the exact threshold
f0 = reduced_threshold(100, 1, 0.05, 0.0)
check("C3c f increasing in n", reduced_threshold(200, 1, 0.05, 0.0) > f0)
check("C3c f decreasing in ell", reduced_threshold(100, 2, 0.05, 0.0) < f0)
check("C3c f increasing as eps falls", reduced_threshold(100, 1, 0.01, 0.0) > f0)
check("C3c f additive in phi", abs(reduced_threshold(100, 1, 0.05, 0.7) - (f0 + 0.7)) < 1e-12)

# C4: Gumbel kernel limit.
# At scaling qF = phi + (ln n + u)/(1+ell):  S^n -> exp(-exp(-u)),
# with convergence rate O(n^{-1/(1+ell)}) (from the logistic-tail correction).
ok = True
worst_scaled = 0.0
for ell in [0, 1, 3]:
    for u in np.linspace(-2, 4, 25):
        target = math.exp(-math.exp(-u))
        errs = []
        for n in [10**3, 10**6, 10**9]:
            qF = 0.0 + (math.log(n) + u) / (1 + ell)
            errs.append(abs(task_success(qF, 0.0, n, ell) - target))
        # errors shrink with n, and the n=1e9 error is within the predicted rate
        # (floor at 1e-6 for double-precision noise)
        rate_bound = max(3.0 * (1 + ell) * (10**9) ** (-1.0 / (1 + ell)), 1e-6)
        worst_scaled = max(worst_scaled, errs[-1] / rate_bound)
        if not (errs[-1] <= errs[0] + 1e-12 and errs[-1] <= rate_bound):
            ok = False
check("C4 Gumbel limit S^n -> exp(-exp(-u)), rate O(n^{-1/(1+ell)})", ok,
      f"worst scaled err={worst_scaled:.2f}")

# C5: parameter-free horizon ratio.  n(eps) = ln(1-eps)/ln(S)  =>
#     n(0.5)/n(0.2) = ln(0.5)/ln(0.8) ~ 3.1063, independent of S.
ratio_theory = math.log(0.5) / math.log(0.8)
check("C5 ln(0.5)/ln(0.8) = 3.1063", abs(ratio_theory - 3.1063) < 1e-3)
ok = True
for S in [0.9, 0.99, 0.999, 0.99999]:
    n50 = math.log(0.5) / math.log(S)
    n80 = math.log(0.8) / math.log(S)
    if abs(n50 / n80 - ratio_theory) > 1e-9:
        ok = False
check("C5b ratio independent of per-stage success S", ok)
# retries change S but not the geometric shape => same ratio
ok = True
for ell in [0, 2, 5]:
    pi = 0.97
    S = stage_success(pi, ell)
    n50 = math.log(0.5) / math.log(S)
    n80 = math.log(0.8) / math.log(S)
    if abs(n50 / n80 - ratio_theory) > 1e-9:
        ok = False
check("C5c ratio invariant to retries ell", ok)

# ----------------------------------------------------------------------
# T1b: representation theorem logic (finite counterexample check)
# ----------------------------------------------------------------------
print("== T1b: scalar representability ==")
# Two unordered tasks: t1=(h=1,f=3), t2=(h=4,f=1).
# System A=(4,1) serves t2 not t1; system B=(1,3) serves t1 not t2.
def serves(q, t):
    return q[0] >= t[0] and q[1] >= t[1]
t1, t2 = (1.0, 3.0), (4.0, 1.0)
A, B = (4.0, 1.0), (1.0, 3.0)
cross = serves(A, t2) and not serves(A, t1) and serves(B, t1) and not serves(B, t2)
check("C6 unordered tasks: serving sets not nested (no scalar index)", cross)
# Chain (totally ordered) case: nested serving sets for all systems on a grid
ok = True
chain = [(1.0, 1.0), (2.0, 1.5), (3.0, 2.0), (4.0, 2.5)]  # f increasing in h
for qH in np.linspace(0, 5, 11):
    for qF in np.linspace(0, 3, 11):
        served = [i for i, t in enumerate(chain) if serves((qH, qF), t)]
        # served set must be an initial segment of the chain
        if served and served != list(range(max(served) + 1)):
            ok = False
check("C6b chain tasks: served sets are initial segments (scalar OK)", ok)

# ----------------------------------------------------------------------
# W, boundary values, profit = gap integral (Bertrand lemma)
# ----------------------------------------------------------------------
print("== Market: W, boundary values, profit-gap identity ==")

kH, kF = 1.5, 2.0
def PH(x): return np.exp(-np.exp(-kH * x))
def PF(x): return np.exp(-np.exp(-kF * x))
def gH(x): return kH * np.exp(-kH * x) * np.exp(-np.exp(-kH * x))
def gF(x): return kF * np.exp(-kF * x) * np.exp(-np.exp(-kF * x))

# task-value density: two families (A forgiving flat-f, B unforgiving steep-f)
Hg = np.linspace(0.0, 20.0, 401)
Fg = np.linspace(0.0, 14.0, 281)
HH, FF = np.meshgrid(Hg, Fg, indexing="ij")
dens_A = np.exp(-(((HH - 4.0) / 3.0) ** 2)) * np.exp(-(((FF - 1.0) / 0.5) ** 2))
dens_B = 2.0 * np.exp(-(((HH - 8.0) / 5.0) ** 2)) * np.exp(-(((FF - (4.0 + 0.8 * HH)) / 0.5) ** 2))
dens = dens_A + dens_B

def W(qH, qF):
    return np.trapezoid(np.trapezoid(dens * PH(qH - HH) * PF(qF - FF), Fg, axis=1), Hg)

def BH(qH, qF):
    return np.trapezoid(np.trapezoid(dens * gH(qH - HH) * PF(qF - FF), Fg, axis=1), Hg)

def BF(qH, qF):
    return np.trapezoid(np.trapezoid(dens * PH(qH - HH) * gF(qF - FF), Fg, axis=1), Hg)

def Ccross(qH, qF):
    return np.trapezoid(np.trapezoid(dens * gH(qH - HH) * gF(qF - FF), Fg, axis=1), Hg)

# C7: derivative identities  dW/dqH = BH etc. (numeric differentiation)
q0 = (6.0, 3.0)
h = 1e-4
dWH = (W(q0[0] + h, q0[1]) - W(q0[0] - h, q0[1])) / (2 * h)
dWF = (W(q0[0], q0[1] + h) - W(q0[0], q0[1] - h)) / (2 * h)
check("C7 dW/dqH = B_H", abs(dWH - BH(*q0)) < 1e-5 * max(1, abs(BH(*q0))))
check("C7 dW/dqF = B_F", abs(dWF - BF(*q0)) < 1e-5 * max(1, abs(BF(*q0))))
dBHdF = (BH(q0[0], q0[1] + h) - BH(q0[0], q0[1] - h)) / (2 * h)
dBFdH = (BF(q0[0] + h, q0[1]) - BF(q0[0] - h, q0[1])) / (2 * h)
check("C7b cross-partial identity dB_H/dqF = dB_F/dqH = C >= 0",
      abs(dBHdF - Ccross(*q0)) < 1e-4 and abs(dBFdH - Ccross(*q0)) < 1e-4 and Ccross(*q0) > 0)

# C8: finite-step path formula  W(q+D)-W(q) = int BH du + int BF dv  (H first, then F)
qL = (7.5, 4.0)
qm = (5.5, 2.5)
gap = W(*qL) - W(*qm)
us = np.linspace(qm[0], qL[0], 200)
seg1 = np.trapezoid([BH(u, qm[1]) for u in us], us)
vs = np.linspace(qm[1], qL[1], 200)
seg2 = np.trapezoid([BF(qL[0], v) for v in vs], vs)
check("C8 finite-step gap = path integral of boundary values",
      abs(gap - (seg1 + seg2)) < 1e-4 * max(1.0, abs(gap)))

# C8b: Bertrand profit identity: leader margin on each task = value gap;
# total profit = W(qL) - W(qm) when leader weakly better everywhere, equal costs
margins = dens * (PH(qL[0] - HH) * PF(qL[1] - FF) - PH(qm[0] - HH) * PF(qm[1] - FF))
check("C8b per-task margins nonnegative (qL >= qm componentwise)", float(margins.min()) >= -1e-12)
prof = np.trapezoid(np.trapezoid(margins, Fg, axis=1), Hg)
check("C8c aggregate Bertrand profit = W(qL)-W(qm)", abs(prof - gap) < 1e-8 * max(1.0, abs(gap)))
# saturation: profit -> 0 as fringe closes the gap
prof_close = W(*qL) - W(qL[0] - 0.05, qL[1] - 0.05)
check("C8d saturation: profit vanishes as gap closes", prof_close < 0.05 * gap)

# ----------------------------------------------------------------------
# T2: sample complexity of reliability imitation
# ----------------------------------------------------------------------
print("== T2: tail cannot be distilled ==")
def kl_bern(p, q):
    return p * math.log(p / q) + (1 - p) * math.log((1 - p) / (1 - q))

# C9: KL(Bern(eps/2) || Bern(eps)) / eps  ->  (1 - ln 2)/2  as eps -> 0
lim = (1 - math.log(2)) / 2
vals = [kl_bern(e / 2, e) / e for e in [1e-6, 1e-4, 1e-2]]
check("C9 KL/eps -> (1-ln2)/2 = 0.15343", abs(vals[0] - lim) < 1e-5 and abs(lim - 0.15343) < 1e-5)
# C9b: ratio increasing in eps on (0, 1/2]; global bound KL <= 0.27*eps there
es = np.linspace(1e-6, 0.5, 5000)
ratios = np.array([kl_bern(e / 2, e) / e for e in es])
check("C9b KL/eps increasing on (0,1/2]", bool(np.all(np.diff(ratios) > -1e-12)))
check("C9c KL <= 0.27*eps on (0,1/2] (max ratio = %.4f)" % ratios.max(), ratios.max() <= 0.27)
# C9d: Pinsker-based lower bound N >= 2(1-2*delta)^2 / KL  =>  N >= c_delta/eps
delta = 0.1
N_bound = 2 * (1 - 2 * delta) ** 2 / (0.27)  # coefficient of 1/eps
check("C9d N >= %.2f/eps with delta=0.1" % N_bound, N_bound > 4.7)
# Pinsker: TV <= sqrt(KL/2); after N iid samples KL_N = N*KL; sum of errors >= 1 - TV
# feasibility of test with both errors <= delta requires 1 - sqrt(N*KL/2) <= 2*delta
ok = True
for e in [1e-4, 1e-2, 0.2]:
    N_min = 2 * (1 - 2 * delta) ** 2 / kl_bern(e / 2, e)
    if N_min < 2 * (1 - 2 * delta) ** 2 / (0.27 * e):  # bound must be implied
        ok = False
check("C9e Pinsker bound implies N >= c/eps at each eps", ok)

# C9f: required reliability on marginal fragile task is exponentially small in qF
# eps_required(qF) ~ n * exp(-(1+ell)(qF - phi))  [from Gumbel tail]
ok = True
for ell in [0, 2]:
    n = 10**4
    for qF in [12.0 / (1 + ell), 14.0 / (1 + ell)]:
        eps_req = 1 - task_success(qF, 0.0, n, ell)
        approx = n * math.exp(-(1 + ell) * qF)
        if not (0.5 < eps_req / approx < 2.0):
            ok = False
check("C9f tail failure rate ~ n*exp(-(1+ell)(qF-phi))", ok)

# ----------------------------------------------------------------------
# T3: leader-fringe dynamics, HJB, rotation
# ----------------------------------------------------------------------
print("== T3: dynamics and rotation ==")
import sympy as sp

# C10: HJB linear model:  V(D) = mu.D + v0 with mu_i = B_i/(r+lam_i)
r_, lamH_, lamF_, BH_, BF_ = sp.symbols("r lamH lamF B_H B_F", positive=True)
muH = BH_ / (r_ + lamH_)
muF = BF_ / (r_ + lamF_)
DH, DF, xH, xF = sp.symbols("D_H D_F x_H x_F", nonnegative=True)
v0 = sp.symbols("v0")
V = muH * DH + muF * DF + v0
flow = BH_ * DH + BF_ * DF  # profit flow = boundary-value-weighted gaps (local regime)
# rV = flow - cost(x) + V'_H (xH - lamH DH) + V'_F (xF - lamF DF); check Delta-coefficients
lhs = sp.expand(r_ * V)
rhs = sp.expand(flow + muH * (xH - lamH_ * DH) + muF * (xF - lamF_ * DF))
diff = sp.simplify(lhs - rhs)
check("C10 HJB Delta-coefficients vanish with mu_i = B_i/(r+lam_i)",
      sp.simplify(sp.diff(diff, DH)) == 0 and sp.simplify(sp.diff(diff, DF)) == 0)

# C10b: quadratic cost c(x)=x^2/2 => x_i* = mu_i ; steady state D_i* = mu_i/lam_i
xstar = muF  # by FOC
Dstar_F = xstar / lamF_
dDdlam = sp.simplify(sp.diff(Dstar_F, lamF_))
check("C10b dD_F*/dlamF < 0 (verification thins the moat)",
      sp.simplify(dDdlam * (r_ + lamF_) ** 2 * lamF_ ** 2 / BF_) == -(r_ + 2 * lamF_))
theta = sp.symbols("theta", positive=True)
Dstar_F_theta = (theta * BF_ / (r_ + lamF_)) / lamF_
check("C10c dD_F*/dtheta > 0 (liability thickens the moat)",
      sp.simplify(sp.diff(Dstar_F_theta, theta)) == BF_ / ((r_ + lamF_) * lamF_))

# C11: rotation example: along a pure-H push at fixed qF, B_F/B_H rises and
# crosses the persistence-adjusted threshold exactly once.
qF_fix = 3.0
path = np.linspace(3.0, 12.0, 60)
ratio_path = np.array([BF(qh, qF_fix) / BH(qh, qF_fix) for qh in path])
check("C11 B_F/B_H strictly increasing along H-push (example)",
      bool(np.all(np.diff(ratio_path) > 0)),
      f"range=({ratio_path[0]:.3f},{ratio_path[-1]:.3f})")
r0, lamH0, lamF0, cH0, cF0 = 0.01, 0.17, 0.05, 1.0, 1.0
thr1 = ((r0 + lamF0) * cF0) / ((r0 + lamH0) * cH0)  # rotate when ratio >= thr
lamH1 = 0.30
thr2 = ((r0 + lamF0) * cF0) / ((r0 + lamH1) * cH0)
check("C11a thresholds interior to the ratio path",
      ratio_path[0] < thr2 < thr1 < ratio_path[-1],
      f"path=({ratio_path[0]:.3f},{ratio_path[-1]:.3f}) thr2={thr2:.3f} thr1={thr1:.3f}")
crossings = np.sum((ratio_path[:-1] < thr1) & (ratio_path[1:] >= thr1))
check("C11b single crossing of rotation threshold (example)", int(crossings) == 1)
# faster imitation on H lowers the threshold => earlier rotation
t1 = np.argmax(ratio_path >= thr1)
t2 = np.argmax(ratio_path >= thr2)
check("C11c dT*/dlamH < 0: higher lamH => earlier rotation", thr2 < thr1 and 0 < t2 < t1)
# C11d: dB_F/dqH = C > 0 is the unlock mechanism
check("C11d dB_F/dqH = C > 0 along path",
      all(Ccross(qh, qF_fix) > 0 for qh in [4.0, 8.0, 11.0]))

# ----------------------------------------------------------------------
# T4: direction game (herding vs differentiation)
# ----------------------------------------------------------------------
print("== T4: direction game ==")
VF, VH, sig = sp.symbols("V_F V_H sigma", positive=True)
# payoffs: same direction k -> sigma*V_k each; split -> own V
# BR to opponent F: F iff sigma*V_F >= V_H.  BR to opponent H: F iff V_F >= sigma*V_H.
# Case V_F >= V_H: F vs H when opp plays H always weakly better (V_F >= sigma*V_H).
check("C12 V_F>=V_H => F best-reply to H (V_F >= sig*V_H)",
      sp.simplify(VF - sig * VH).subs({VF: 2, VH: 1, sig: 0.6}) > 0)
# dominance-herding region: sigma*V_F >= V_H -> (F,F) unique via strict dominance
subs_h = {VF: 3.0, VH: 1.0, sig: 0.5}
check("C12b herding iff sigma >= V_H/V_F (numeric region check)",
      (0.5 * 3.0 >= 1.0) and not (0.2 * 3.0 >= 1.0))
# differentiation region sigma*V_F < V_H <= V_F: (F,H),(H,F) both equilibria
sV = 0.2
VFv, VHv = 3.0, 1.0
brF_given_F = sV * VFv >= VHv       # False -> deviate to H: (F,F) not eq
brH_given_F = VHv >= sV * VFv       # True: H is BR to F
brF_given_H = VFv >= sV * VHv       # True: F is BR to H
check("C12c differentiation region: (F,H) and (H,F) are equilibria, (F,F) is not",
      (not brF_given_F) and brH_given_F and brF_given_H)
# mixed equilibrium prob of F: p* = (V_F - sig V_H)/((V_F - sig V_H)+(V_H - sig V_F))
p_star = (VFv - sV * VHv) / ((VFv - sV * VHv) + (VHv - sV * VFv))
payoff_F = p_star * sV * VFv + (1 - p_star) * VFv
payoff_H = p_star * VHv + (1 - p_star) * sV * VHv
check("C12d mixed equilibrium indifference", abs(payoff_F - payoff_H) < 1e-12 and 0 < p_star < 1)
# comparative static: p*(F) increasing in V_F/V_H
def pstar(vf, vh, s):
    return (vf - s * vh) / ((vf - s * vh) + (vh - s * vf))
check("C12e herding prob increasing in V_F/V_H",
      pstar(4.0, 1.0, 0.2) > pstar(3.0, 1.0, 0.2) > pstar(2.0, 1.0, 0.2))

# ----------------------------------------------------------------------
# T5: welfare wedge
# ----------------------------------------------------------------------
print("== T5: welfare wedge ==")
# planner PDV per unit direction-i capability: B_i/r; market: B_i/(r+lam_i)
wedge = sp.simplify((BH_ / (r_ + lamH_)) / (BF_ / (r_ + lamF_)) / ((BH_ / r_) / (BF_ / r_)))
check("C13 relative direction wedge = (r+lamF)/(r+lamH)",
      sp.simplify(wedge - (r_ + lamF_) / (r_ + lamH_)) == 0)
# sign: lamH > lamF => market undervalues H relative to planner (wedge < 1)
check("C13b wedge < 1 iff lamH > lamF",
      wedge.subs({r_: 0.01, lamH_: 0.17, lamF_: 0.05}) < 1
      and wedge.subs({r_: 0.01, lamH_: 0.05, lamF_: 0.17}) > 1)

# ----------------------------------------------------------------------
# Calibration arithmetic
# ----------------------------------------------------------------------
print("== calibration arithmetic ==")
# lag half-life tau months => lam = ln2/tau per month
tauH, tauF = 4.0, 14.0
lamH_c, lamF_c = math.log(2) / tauH, math.log(2) / tauF
r_c = 0.01
wedge_c = (r_c + lamF_c) / (r_c + lamH_c)
check("C14 wedge magnitude with tauH=4m, tauF=14m, r=1%/m in (0.2, 0.5)",
      0.2 < wedge_c < 0.5)
check("C14b METR-benchmark ratio: pure serial model predicts 3.11",
      abs(math.log(0.5) / math.log(0.8) - 3.106) < 1e-2)

# ----------------------------------------------------------------------
# Final-text claims (added after the paper was drafted)
# ----------------------------------------------------------------------
print("== final-text claims ==")

# F1: chi^2 bound: KL(Bern(e/2)||Bern(e)) <= chi2 = e/(4(1-e)) <= e/2 on (0,1/2]
ok = True
for e in np.linspace(1e-6, 0.5, 2000):
    chi2 = e / (4 * (1 - e))
    if not (kl_bern(e / 2, e) <= chi2 + 1e-15 and chi2 <= e / 2 + 1e-15):
        ok = False
check("F1 KL <= chi2 = eps/(4(1-eps)) <= eps/2 on (0,1/2]", ok)
# chi2 formula itself: (e/2)^2/e + (e/2)^2/(1-e) = e/(4(1-e))
import sympy as sp2
e_ = sp2.symbols("epsilon", positive=True)
chi2_expr = (e_ / 2) ** 2 / e_ + (e_ / 2) ** 2 / (1 - e_)
check("F1b chi2 closed form", sp2.simplify(chi2_expr - e_ / (4 * (1 - e_))) == 0)

# F2: sample floor N >= 4(1-2delta)^2/eps ; sharp small-eps coeff 4/(1-ln2) ~ 13.0
delta = 0.1
for e in [1e-4, 1e-2, 0.5]:
    N_pinsker = 2 * (1 - 2 * delta) ** 2 / kl_bern(e / 2, e)
    if N_pinsker < 4 * (1 - 2 * delta) ** 2 / e - 1e-9:
        check("F2 N >= 4(1-2d)^2/eps implied at eps=%g" % e, False)
        break
else:
    check("F2 N >= 4(1-2delta)^2/eps implied by Pinsker+chi2", True)
sharp = 4 / (1 - math.log(2))
check("F2b sharp small-eps coefficient 4/(1-ln2) = 13.03", abs(sharp - 13.03) < 5e-2)
lim_ratio = 2 / ((1 - math.log(2)) / 2)  # 2/KL-slope
check("F2c 2/(KL/eps limit) equals 4/(1-ln2)", abs(lim_ratio - sharp) < 1e-12)

# F3: threshold error bound under stated condition (eps<=1/2, n >= 4^{1+ell} eps)
ok = True
rng2 = np.random.default_rng(7)
for _ in range(600):
    ell = int(rng2.integers(0, 5))
    eps = float(rng2.uniform(1e-6, 0.5))
    n_min = max(1, int(math.ceil(4 ** (1 + ell) * eps)))
    n = int(rng2.integers(n_min, n_min + 10**4))
    f = reduced_threshold(n, ell, eps, 0.0)
    err = abs(f - math.log(n / eps) / (1 + ell))
    if err > 2 * eps + 3 * (eps / n) ** (1.0 / (1 + ell)) + 1e-12:
        ok = False
        break
check("F3 |e(tau)| <= 2eps + 3(eps/n)^{1/(1+ell)} on condition region (600 draws)", ok)

# F4: rotation-ratio derivative identity  d ln(B_F/B_H)/dq_H = C/B_F - (dB_H/dq_H)/B_H
q0 = (6.0, 3.0)
hh = 1e-4
lhs = (math.log(BF(q0[0] + hh, q0[1]) / BH(q0[0] + hh, q0[1]))
       - math.log(BF(q0[0] - hh, q0[1]) / BH(q0[0] - hh, q0[1]))) / (2 * hh)
dBH_dqH = (BH(q0[0] + hh, q0[1]) - BH(q0[0] - hh, q0[1])) / (2 * hh)
rhs = Ccross(*q0) / BF(*q0) - dBH_dqH / BH(*q0)
check("F4 d ln(B_F/B_H)/dq_H = C/B_F - B_H'/B_H", abs(lhs - rhs) < 1e-4 * max(1, abs(rhs)))

# F5: mixed-equilibrium closed form p* = (rho-sigma)/((1-sigma)(rho+1)), dp*/drho > 0
rho_, sig_ = sp2.symbols("rho sigma", positive=True)
p_closed = (rho_ - sig_) / ((1 - sig_) * (rho_ + 1))
# equivalence with the two-term form (divide num and den of C12d formula by V_H)
p_two = (rho_ - sig_) / ((rho_ - sig_) + (1 - sig_ * rho_))
check("F5 p* closed form equals two-term form",
      sp2.simplify(p_closed - p_two) == 0)
dp = sp2.simplify(sp2.diff(p_closed, rho_))
check("F5b dp*/drho = (1+sigma)/((1-sigma)(rho+1)^2) > 0",
      sp2.simplify(dp - (1 + sig_) / ((1 - sig_) * (rho_ + 1) ** 2)) == 0)

# F6: rho* comparative statics (Theorem 3(iii))
r2, lH, lF, cH2, cF2 = sp2.symbols("r lambda_H lambda_F c_H c_F", positive=True)
rho_star = (r2 + lF) * cF2 / ((r2 + lH) * cH2)
check("F6 drho*/dlambda_H < 0",
      sp2.simplify(sp2.diff(rho_star, lH) + (r2 + lF) * cF2 / ((r2 + lH) ** 2 * cH2)) == 0)
check("F6b drho*/dlambda_F > 0",
      sp2.simplify(sp2.diff(rho_star, lF) - cF2 / ((r2 + lH) * cH2)) == 0)

# F7: wedge sign condition in T5(ii): net bias sign = sign(theta_s - (r+lH)/(r+lF))
th_s = sp2.symbols("theta_s", positive=True)
planner_ratio = (1 / r2) / (th_s / r2)          # B_H/B_F weights: planner H/F = 1/theta_s
market_ratio = (1 / (r2 + lH)) / (1 / (r2 + lF))
# market over-tilts to F relative to planner iff market H/F < planner H/F
# iff (r+lF)/(r+lH) < 1/theta_s iff theta_s < (r+lH)/(r+lF)
cond = sp2.simplify(market_ratio / planner_ratio - th_s * (r2 + lF) / (r2 + lH))
check("F7 net direction bias governed by theta_s vs (r+lH)/(r+lF)", cond == 0)

# F8: calibration arithmetic in Section 9
val = (0.01 + math.log(2) / 14) / (0.01 + math.log(2) / 4)
check("F8 (0.01+ln2/14)/(0.01+ln2/4) ~ 0.3", 0.25 < val < 0.36)

# F9: profit envelope dPi/dq^L_i = B_i(q^L) (Lemma 2(iii) consequence used in text)
dPi_H = (W(qL[0] + hh, qL[1]) - W(qL[0] - hh, qL[1])) / (2 * hh)
check("F9 dPi/dq_H^L = B_H(q^L) (fringe fixed)",
      abs(dPi_H - BH(*qL)) < 1e-4 * max(1, abs(BH(*qL))))

# ----------------------------------------------------------------------
print(f"\nTOTAL: {PASS} pass, {FAIL} fail")
if FAILURES:
    for n, d in FAILURES:
        print(f"  - {n}: {d}")
    raise SystemExit(1)