#!/usr/bin/env python3 """ HV-RSI (SPY) development evaluation — two questions, one universe load: (B) SKILL vs RANDOM — cash-matched monkey. Same universe gates (above SMA150, price>$2, 25d avg vol>100k) and IDENTICAL mechanics (3%-below limit, exit close>prev-high or 5d, 20 slots). The ONLY thing randomized is the NDX10 selection + ranking — replaced by random draws from the eligible pool. Isolates the NDX10 signal. K seeds per window; HV-RSI's canonical Bandy CAR25 is scored against the monkey CAR25 distribution. (C) CAPITAL-EFFICIENCY LEVERS — the binding constraint is under-deployment (safe_f pins at the 3x ceiling because exposure is ~4%). Test one- parameter variants that should raise deployment: looser entry limit, ndx_consecutive=1, more slots. Metric is canonical Bandy CAR25/safe_f (depends only on the trade-return distribution + trades/year, so it is sizing-invariant and rewards more frequent good deployment). Each variant is run on BOTH IS and OOS so a lever that only helps in-sample is exposed. Canonical risk engine: edgerisknorm.monkey.bandy_safe_f (RN-BANDY-001). CAR figures are sizing-normalized and exclude cash interest. SBF universe. Usage: python -u develop_eval.py # both parts, IS + OOS python -u develop_eval.py --monkey-seeds 40 """ import argparse import sys from datetime import date from pathlib import Path import numpy as np import polars as pl sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "shared")) from quote_loader import load_eod, load_indexcon, members_on_date from edgerisknorm.monkey import bandy_safe_f INITIAL_CAPITAL = 100_000.0 WINDOWS = { "IS": (date(2005, 1, 1), date(2017, 12, 31), 13.0), "OOS": (date(2018, 1, 1), date(2026, 3, 16), 8.2), } BASE = dict(limit_discount=0.03, max_hold_days=5, ndx_consecutive=2, max_positions=20, per_pos_pct=0.10, min_hold=2) # ── indicators ────────────────────────────────────────────────────── def _ndx10(close, high, low, n=10): r = np.full(len(close), np.nan) for i in range(n, len(close)): hh = np.max(high[i - n:i]); ll = np.min(low[i - n:i]) if hh != ll: r[i] = 100.0 * (close[i] - ll) / (hh - ll) return r def _sma(close, n): r = np.full(len(close), np.nan) cs = np.cumsum(close) r[n - 1:] = (cs[n - 1:] - np.concatenate([[0], cs[:-n]])) / n return r def _avg_vol(volume, n=25): r = np.full(len(volume), np.nan) cs = np.cumsum(volume.astype(float)) r[n - 1:] = (cs[n - 1:] - np.concatenate([[0], cs[:-n]])) / n return r def prepare(df): df = df.sort("date") close = df["close"].to_numpy().astype(float); high = df["high"].to_numpy().astype(float) low = df["low"].to_numpy().astype(float); volume = df["volume"].to_numpy() dates = df["date"].to_list() # Split/dividend-adjust OHLC from adj_close (ratio reconstruction) before # any indicator/price/P&L — same fix as prototype.prepare_symbol_data. if "adj_close" in df.columns: adj = df["adj_close"].to_numpy().astype(float) f = np.where((close > 0) & np.isfinite(adj), adj / close, 1.0) close = close * f; high = high * f; low = low * f di = {} for j, d in enumerate(dates): dd = d.date() if hasattr(d, "date") else d if hasattr(dd, "date"): dd = dd.date() di[dd] = j return dict(close=close, high=high, low=low, ndx10=_ndx10(close, high, low, 10), sma150=_sma(close, 150), avg_vol=_avg_vol(volume, 25), date_idx=di) # ── engine ────────────────────────────────────────────────────────── def run_sim(sym_data, sim_dates, membership, params, selector="ndx", rng=None): """selector='ndx' -> rank by NDX10 (lowest first); 'random' -> random draw from the eligible pool (NDX10 gate REMOVED). All other mechanics identical.""" cash = equity = INITIAL_CAPITAL positions, trades, exposures = [], [], [] pending = [] limit_disc = params["limit_discount"]; max_hold = params["max_hold_days"] ndx_k = params["ndx_consecutive"]; max_pos = params["max_positions"] per_pos = params["per_pos_pct"]; min_hold = params["min_hold"] for today in sim_dates: # fills for sym, lp, _ in pending: data = sym_data.get(sym) if not data: continue idx = data["date_idx"].get(today) if idx is None or len(positions) >= max_pos: continue if data["low"][idx] <= lp: alloc = min(equity * per_pos, cash) sh = int(alloc / lp) if sh < 1: continue cost = sh * lp cash -= cost positions.append(dict(symbol=sym, entry_date=today, entry_price=lp, shares=sh, cost=cost, hold_days=0, prev_high=data["high"][idx])) pending.clear() # exits keep = [] for p in positions: data = sym_data.get(p["symbol"]); idx = data["date_idx"].get(today) if data else None if idx is None: p["hold_days"] += 1; keep.append(p); continue p["hold_days"] += 1 c = data["close"][idx] ex_t = p["hold_days"] >= min_hold and c > p["prev_high"] ex_time = p["hold_days"] >= max_hold if ex_t or ex_time: cash += p["shares"] * c trades.append(dict(pnl_pct=(p["shares"] * c - p["cost"]) / p["cost"], exit_reason="target" if ex_t else "time")) else: p["prev_high"] = data["high"][idx]; keep.append(p) positions = keep # scan — eligible pool (shared gates), selection differs members = members_on_date(membership, today) held = set(p["symbol"] for p in positions) elig = [] for sym in members: data = sym_data.get(sym) if not data or sym in held: continue idx = data["date_idx"].get(today) if idx is None or idx < ndx_k: continue c = data["close"][idx]; sma = data["sma150"][idx]; vol = data["avg_vol"][idx] if np.isnan(sma) or np.isnan(vol) or c < 2.0 or vol < 100_000 or c <= sma: continue lp = c * (1 - limit_disc) if selector == "ndx": ok = True for k in range(ndx_k): v = data["ndx10"][idx - k] if np.isnan(v) or v >= 0: ok = False; break if ok: elig.append((sym, lp, data["ndx10"][idx])) else: # random: pool = all eligible (NDX10 gate removed) if not np.isnan(data["ndx10"][idx]): elig.append((sym, lp, 0.0)) slots = max_pos - len(positions) if selector == "ndx": elig.sort(key=lambda x: x[2]) pending = elig[:slots] else: if elig and slots > 0: pick = rng.choice(len(elig), size=min(slots, len(elig)), replace=False) pending = [elig[i] for i in pick] else: pending = [] # mtm + exposure pv = 0.0 for p in positions: data = sym_data.get(p["symbol"]); idx = data["date_idx"].get(today) if data else None pv += p["shares"] * data["close"][idx] if idx is not None else p["cost"] equity = cash + pv exposures.append(pv / equity if equity > 0 else 0.0) return trades, float(np.mean(exposures)) if exposures else 0.0 def car25_of(trades, years, max_pos): r = np.array([t["pnl_pct"] for t in trades], dtype=float) r = r[np.isfinite(r)] if len(r) < 20: return None tpy = max(1.0, len(r) / years) b = bandy_safe_f(r, n_trials=1500, trades_per_trial=500, trades_per_year=tpy, max_positions=max_pos, dd_constraint=-0.20, dd_pctile=5, seed=42) return dict(n=len(r), wr=float((r > 0).mean()), tpy=tpy, safe_f=b.safe_f, car25=b.car25, car50=b.car50, maxdd_p5=b.max_dd_at_constraint_pctile) # ── main ──────────────────────────────────────────────────────────── def main(): ap = argparse.ArgumentParser() ap.add_argument("--monkey-seeds", type=int, default=40) args = ap.parse_args() full_start, full_end = date(2005, 1, 1), date(2026, 3, 16) print("Loading SPY membership + quotes (2005-2026)...", flush=True) mem = load_indexcon("SPY_SandP_500") all_dates = sorted(mem["date"].to_list()) sdates = [d.date() if hasattr(d, "date") else d for d in all_dates] sdates = [d for d in sdates if full_start <= d <= full_end] members = set() for d in sdates[::10]: members.update(members_on_date(mem, d)) sym_data = {} for i, sym in enumerate(sorted(members)): df = load_eod(sym) if df is not None and len(df) >= 160: sym_data[sym] = prepare(df) if (i + 1) % 300 == 0: print(f" {i+1}/{len(members)} ({len(sym_data)} loaded)", flush=True) print(f" Loaded {len(sym_data)} symbols.\n", flush=True) win_dates = {w: [d for d in sdates if s <= d <= e] for w, (s, e, _) in WINDOWS.items()} # ===== Part C — capital-efficiency levers ===== print("=" * 70, flush=True) print("PART C — CAPITAL-EFFICIENCY LEVERS (canonical Bandy CAR25)", flush=True) print("=" * 70, flush=True) LEVERS = [ ("baseline (3% limit, k=2, 20 slots)", BASE), ("limit 2%", {**BASE, "limit_discount": 0.02}), ("limit 1%", {**BASE, "limit_discount": 0.01}), ("limit 0% (at close)", {**BASE, "limit_discount": 0.00}), ("ndx_consec=1", {**BASE, "ndx_consecutive": 1}), ("30 slots", {**BASE, "max_positions": 30}), ("40 slots", {**BASE, "max_positions": 40}), ("limit 1% + k=1", {**BASE, "limit_discount": 0.01, "ndx_consecutive": 1}), ("limit 1% + 40 slots", {**BASE, "limit_discount": 0.01, "max_positions": 40}), ] lever_rows = [] for label, p in LEVERS: for w, (s, e, yrs) in WINDOWS.items(): trades, expo = run_sim(sym_data, win_dates[w], mem, p, selector="ndx") c = car25_of(trades, yrs, p["max_positions"]) if c is None: print(f" {label:32} {w:3} n<20 (skip)", flush=True) continue lever_rows.append({"lever": label, "win": w, "n": c["n"], "wr": round(c["wr"], 3), "tpy": round(c["tpy"], 1), "exposure": round(expo, 4), "safe_f": round(c["safe_f"], 2), "CAR25": round(c["car25"], 4), "CAR50": round(c["car50"], 4), "maxDD_p5": round(c["maxdd_p5"], 4)}) print(f" {label:32} {w:3} n={c['n']:4} tpy={c['tpy']:5.1f} " f"expo={expo*100:4.1f}% WR={c['wr']*100:4.1f}% " f"safe_f={c['safe_f']:4.2f} CAR25={c['car25']*100:+5.2f}% " f"DD@p5={c['maxdd_p5']*100:+6.2f}%", flush=True) pl.DataFrame(lever_rows).write_parquet(Path(__file__).parent / "output" / "SPY_SandP_500" / "levers.parquet") # ===== Part B — cash-matched monkey (skill vs random) ===== print("\n" + "=" * 70, flush=True) print(f"PART B — CASH-MATCHED MONKEY ({args.monkey_seeds} seeds/window)", flush=True) print("(same gates + mechanics; NDX10 selection -> random draw)", flush=True) print("=" * 70, flush=True) monkey_rows = [] for w, (s, e, yrs) in WINDOWS.items(): # HV baseline on identical engine hv_trades, hv_expo = run_sim(sym_data, win_dates[w], mem, BASE, selector="ndx") hv = car25_of(hv_trades, yrs, BASE["max_positions"]) print(f"\n [{w}] HV-RSI: n={hv['n']} expo={hv_expo*100:.1f}% " f"WR={hv['wr']*100:.1f}% CAR25={hv['car25']*100:+.2f}%", flush=True) mc = [] for seed in range(args.monkey_seeds): rng = np.random.default_rng(1000 + seed) mt, mexpo = run_sim(sym_data, win_dates[w], mem, BASE, selector="random", rng=rng) c = car25_of(mt, yrs, BASE["max_positions"]) if c is None: continue mc.append(dict(car25=c["car25"], wr=c["wr"], n=c["n"], expo=mexpo, tpy=c["tpy"])) if (seed + 1) % 10 == 0: print(f" {seed+1}/{args.monkey_seeds} seeds...", flush=True) car = np.array([m["car25"] for m in mc]) pct = float((car < hv["car25"]).mean() * 100) mexpo_med = float(np.median([m["expo"] for m in mc])) mwr_med = float(np.median([m["wr"] for m in mc])) print(f" [{w}] monkey CAR25: p5={np.percentile(car,5)*100:+.2f}% " f"p50={np.percentile(car,50)*100:+.2f}% p95={np.percentile(car,95)*100:+.2f}% " f"| monkey expo~{mexpo_med*100:.1f}% WR~{mwr_med*100:.1f}%", flush=True) print(f" [{w}] >>> HV-RSI CAR25 {hv['car25']*100:+.2f}% beats " f"{pct:.0f}% of monkeys (n={len(mc)})", flush=True) monkey_rows.append({"win": w, "hv_car25": round(hv["car25"], 4), "hv_expo": round(hv_expo, 4), "hv_wr": round(hv["wr"], 3), "monkey_car25_p50": round(float(np.percentile(car, 50)), 4), "monkey_expo_med": round(mexpo_med, 4), "monkey_wr_med": round(mwr_med, 3), "hv_percentile": round(pct, 1), "n_seeds": len(mc)}) pl.DataFrame(monkey_rows).write_parquet(Path(__file__).parent / "output" / "SPY_SandP_500" / "monkey_matched.parquet") print("\n" + "=" * 70, flush=True) print("DONE. Saved levers.parquet + monkey_matched.parquet", flush=True) if __name__ == "__main__": main()