First-Look Script

Runnable Python (matplotlib), analysis only: consumes the base table from the transform layer and renders the coverage panel, realization waterfall, realized-bps distribution, fee-code ladder, charged-at-all share, raw driver small-multiples, within-MG candidate pool, waived-pool Lorenz, and the return-on-AUM × custody quadrant. Descriptive; figures are generated corp-side and not committed.
GET/v2/hypothesis-testing/first-look-scriptRequires authentication
Code Example

python
# first-look-exhibits.py — descriptive exhibits for the stakeholder meeting
# -------------------------------------------------------------------------
# ANALYSIS ONLY. The base table is built by the transformation layer (transform.py); this file just
# VISUALISES it. Run AFTER your ingestion cells. Requires these frames in scope:
#   custody_2h25_hk, custody_2h25_sg, mkgid_clientid_mapping, clientdomicile_df, acctmgmt_df,
#   trustacct_df, mkg_info   (→ base table, via transform.build_base_table)
#   mkg_rev_history, mkg_aum_history   (→ E10 only)
# Produces exhibits E0–E17 (PNG + inline). No modelling.
#
# READ BEFORE PRESENTING — caveats baked in (the base-table logic + its caveats live in transform.py):
#   • Every cut is RAW / UNADJUSTED — descriptive, confounded (size ⟂ segment ⟂ domicile …).
#     These NOMINATE patterns; they are not conclusions. Say so out loud.
#   • Rates (bps, %) are unit-free → HK+SG pooled is fine. $ figures are kept PER-SITE
#     (local ccy) to avoid FX. Set FX_TO_SGD['HK'] to combine $ across sites.
#   • GST: SG fees may be GST-inclusive (HK not) → slightly inflates SG bps. Flagged, not stripped.
#   • Universe = client map (incl. the CDT099 pool); eligible-AUM is BLANK for CDT099 until the backend
#     request lands → eligible-AUM cuts describe the charged universe, the CDT099 gap is countable only.
#   • "Uncharged-but-eligible" is a CANDIDATE pool, NOT "should be charged"; legit-exempt (DPM/MyAdvisory)
#     is split from genuine-gap via waived_class. See transform.py for the full base-table contract.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

try:
    from transform import build_base_table     # transformation layer (file mode)
except ImportError:
    pass                                        # notebook-paste mode: run transform.py's cells first

OUTDIR = Path("figs"); OUTDIR.mkdir(exist_ok=True)        # data-derived PNGs — DO NOT COMMIT
FX_TO_SGD = {"HK": None, "SG": 1.0}                        # set HK (e.g. 0.17) only to combine $; None = per-site
DEANON = False    # E17 sponsor-only: real market names stay anonymised (`Market-NN`) until set True (plan.md §confidentiality)
plt.rcParams.update({"figure.dpi": 130, "font.size": 10, "axes.grid": True, "axes.axisbelow": True})

def _num(s): return pd.to_numeric(s, errors="coerce")     # local helper for the E10 revenue/AUM join
def _save(fig, name):
    fig.tight_layout()
    fig.savefig(OUTDIR / f"{name}.png", bbox_inches="tight")   # save for the deck
    plt.show()                                                 # AND render inline in the notebook
    print(f"  saved figs/{name}.png")

# ---------- 0. Base table (built by the transformation layer) ----------
acct = build_base_table(custody_2h25_hk, custody_2h25_sg, mkgid_clientid_mapping,
                        clientdomicile_df, acctmgmt_df, trustacct_df, mkg_info)

# ---------- 1. Coverage panel (credibility: the joins work + the CDT099 gap is visible) ----------
try:
    cov = {"→ MG": acct["MKG_ID"].notna().mean(),
           "→ domicile": acct["domicile_country"].notna().mean(),
           "→ mgmt type": acct["account_type"].notna().mean(),
           "in charging file\n(non-CDT099)": acct["in_charging_file"].mean(),
           "has eligible AUM": acct["eligible"].mean()}
    fig, ax = plt.subplots(figsize=(6.4, 3.0))
    ax.barh(list(cov)[::-1], [v * 100 for v in list(cov.values())[::-1]], color="#4C78A8")
    ax.set_xlim(0, 100); ax.set_xlabel("% of account universe (client map)")
    ax.set_title("E0 · Universe coverage — attributes link up; the charging-file gap = the CDT099 pool")
    for i, v in enumerate(list(cov.values())[::-1]): ax.text(v * 100 + 1, i, f"{v:.0%}", va="center")
    _n099 = int(acct["is_cdt099"].sum())
    ax.annotate(f"CDT099 / full-waiver pool: {_n099:,} accts ({acct['is_cdt099'].mean():.0%}) — eligible-AUM pending backend",
                xy=(0, -0.42), xycoords="axes fraction", fontsize=8, color="#B22222")
    _save(fig, "e0_coverage")
except Exception as e: print("E0 failed:", e)

# ---------- 2. Realization waterfall, per site (computed vs charged; waived = the gap) ----------
try:
    fig, axes = plt.subplots(1, 2, figsize=(9, 3.4))
    for ax, site in zip(axes, ["HK", "SG"]):
        s = acct[acct.site == site]
        comp, chg = s["custody_fee_computed"].sum(), s["custody_fee_charged"].sum()
        waived = comp - chg
        ax.bar(["Computed\n(theoretical)", "Waived/\ndiscounted", "Charged\n(realised)"],
               [comp, waived, chg], color=["#54A24B", "#E45756", "#4C78A8"])
        ax.set_title(f"{site}  ·  realised {chg/comp:.0%} of computed" if comp else site)
        ax.set_ylabel("fee (local ccy)")
    fig.suptitle("E1 · Realisation waterfall — what we compute vs what we collect", y=1.02)
    _save(fig, "e1_waterfall")
except Exception as e: print("E1 failed:", e)

# ---------- 2b. Realized-bps distribution (E2) — the rate landscape + the zero-fee spike ----------
try:
    r = acct.loc[acct["eligible"], "realized_bps"].dropna()
    charged_r = acct.loc[acct["charged"], "realized_bps"].dropna()
    pmax = charged_r.quantile(0.95) if len(charged_r) else np.nan          # empirical practical max
    fig, ax = plt.subplots(figsize=(7, 3.6))
    ax.hist(r.clip(upper=30), bins=40, color="#4C78A8")
    ymax = ax.get_ylim()[1]
    ax.axvline(25, color="grey", ls="--", lw=1); ax.text(25, ymax * 0.9, " 25 bps (mythical)", fontsize=8, color="grey")
    if np.isfinite(pmax):
        ax.axvline(pmax, color="#E45756", ls="--", lw=1.2)
        ax.text(pmax, ymax * 0.72, f" practical max ≈ {pmax:.0f} bps", fontsize=8, color="#E45756")
    ax.set_xlabel("realized bps (custody charged ÷ eligible AUM)"); ax.set_ylabel("accounts")
    ax.set_title("E2 · Realized-bps distribution — the zero-fee spike + the achievable rate")
    print(f"  E2: practical max (95th pct of charged) ≈ {pmax:.1f} bps · zero-fee (eligible) accounts = {(r == 0).sum():,} of {len(r):,}")
    _save(fig, "e2_rate_distribution")
except Exception as e: print("E2 failed:", e)

# ---------- 2c. Fee-code → discount ladder (E3) — what each CDT code actually encodes ----------
try:
    codes = acct.loc[acct["eligible"], ["cdt_code", "realized_bps"]].dropna(subset=["cdt_code"])
    order = codes.groupby("cdt_code")["realized_bps"].median().sort_values().index
    data = [codes.loc[codes.cdt_code == c, "realized_bps"].dropna().values for c in order]
    fig, ax = plt.subplots(figsize=(7, 3.6))
    ax.boxplot(data, showfliers=False)                                    # positions 1..N
    ax.set_xticks(range(1, len(order) + 1)); ax.set_xticklabels(list(order), rotation=40, ha="right", fontsize=8)
    ax.set_xlabel("custody fee code (CDT)"); ax.set_ylabel("realized bps")
    ax.set_title("E3 · Fee-code → empirical discount ladder")
    print(f"  E3: {len(order)} codes; median bps by code = " +
          ", ".join(f"{c}:{codes.loc[codes.cdt_code == c, 'realized_bps'].median():.0f}" for c in order))
    _save(fig, "e3_fee_code_ladder")
except Exception as e: print("E3 failed:", e)

# ---------- 3. Charged-at-all — by count and by eligible AUM (the never-asked share) ----------
try:
    fig, axes = plt.subplots(1, 2, figsize=(9, 3.4))
    by_cnt = acct.groupby("site")["charged"].agg(["sum", "count"])
    by_cnt["uncharged"] = by_cnt["count"] - by_cnt["sum"]
    axes[0].bar(by_cnt.index, by_cnt["sum"], label="charged", color="#4C78A8")
    axes[0].bar(by_cnt.index, by_cnt["uncharged"], bottom=by_cnt["sum"], label="uncharged", color="#BAB0AC")
    axes[0].set_title("by account count"); axes[0].legend()
    elig = acct.assign(elig_chg=np.where(acct.charged, acct.custody_eligible_aum, 0),
                       elig_unchg=np.where(~acct.charged, acct.custody_eligible_aum, 0)).groupby("site")[["elig_chg", "elig_unchg"]].sum()
    axes[1].bar(elig.index, elig["elig_chg"], label="charged", color="#4C78A8")
    axes[1].bar(elig.index, elig["elig_unchg"], bottom=elig["elig_chg"], label="uncharged (eligible)", color="#E45756")
    axes[1].set_title("by eligible AUM (local ccy)"); axes[1].legend()
    for site in elig.index:
        share = elig.loc[site, "elig_unchg"] / max(elig.loc[site].sum(), 1)
        print(f"  {site}: {share:.0%} of eligible AUM sits in zero-fee accounts")
    fig.suptitle("E4 · Charged-at-all — the never-asked share (count vs money)", y=1.02)
    _save(fig, "e4_charged_at_all")
except Exception as e: print("E4 failed:", e)

# ---------- 4. THE SLICES — charged-share & median realised bps by dimension (raw) ----------
try:
    top_dom = acct["domicile_country"].value_counts().head(6).index
    acct["domicile_top"] = np.where(acct["domicile_country"].isin(top_dom), acct["domicile_country"], "Other")
    dims = [("site", "Site"), ("is_trust", "Trust flag"), ("account_type", "Account type"),
            ("segment_current", "Segment"), ("domicile_top", "Domicile (top 6)")]
    fig, axes = plt.subplots(2, len(dims), figsize=(3.0 * len(dims), 6))
    for j, (col, label) in enumerate(dims):
        share = acct.groupby(col)["charged"].mean().sort_values(ascending=False)
        bps = acct[acct.charged].groupby(col)["realized_bps"].median().reindex(share.index)
        axes[0, j].bar(range(len(share)), share.values * 100, color="#4C78A8")
        axes[0, j].set_title(label); axes[0, j].set_xticks(range(len(share)))
        axes[0, j].set_xticklabels(share.index, rotation=40, ha="right", fontsize=8)
        axes[1, j].bar(range(len(bps)), bps.values, color="#54A24B")
        axes[1, j].set_xticks(range(len(bps))); axes[1, j].set_xticklabels(bps.index, rotation=40, ha="right", fontsize=8)
    axes[0, 0].set_ylabel("% charged (any fee)"); axes[1, 0].set_ylabel("median bps | charged")
    fig.suptitle("E7 · Slices — RAW / UNADJUSTED descriptive cuts (nominate, don't conclude)", y=1.01)
    _save(fig, "e7_slices")
except Exception as e: print("E7 failed:", e)

# ---------- 5. Within-MG teaser — eligible AUM sitting in uncharged accounts of charging MGs ----------
try:
    acct["_elig_unchg"] = np.where(~acct["charged"], acct["custody_eligible_aum"].fillna(0), 0.0)
    mg = (acct.groupby(["site", "MKG_ID"])
              .agg(n=("CLIENT_ID", "size"), n_chg=("charged", "sum"),
                   elig=("custody_eligible_aum", "sum"), elig_unchg=("_elig_unchg", "sum"))
              .reset_index())
    mg["mixed"] = (mg.n > 1) & (mg.n_chg > 0) & (mg.n_chg < mg.n)
    fig, ax = plt.subplots(figsize=(6.5, 3.4))
    pool = mg[mg.n_chg > 0].groupby("site")["elig_unchg"].sum()           # uncharged-eligible inside MGs that DO charge
    ax.bar(pool.index, pool.values, color="#E45756")
    ax.set_title("E8 · Candidate pool — eligible AUM in UNCHARGED accounts of MGs that already charge")
    ax.set_ylabel("eligible AUM in zero-fee accounts (local ccy)")
    for site in pool.index:
        mixed = mg[(mg.site == site) & mg.mixed].shape[0]
        print(f"  {site}: {mixed:,} mixed-status MGs · uncharged-eligible pool = {pool.get(site, 0):,.0f} (local ccy)")
    _save(fig, "e8_within_mg")
except Exception as e: print("E8 failed:", e)

# ---------- 5b. Waived-pool concentration (E9) — Lorenz/Pareto of foregone custody by MG ----------
try:
    mgw = acct.groupby("MKG_ID").agg(computed=("custody_fee_computed", "sum"),
                                     charged=("custody_fee_charged", "sum")).reset_index()
    waived = (mgw["computed"] - mgw["charged"]).clip(lower=0)               # foregone custody fee
    w = waived[waived > 0].sort_values(ascending=False).reset_index(drop=True)
    total = w.sum()
    cum = (w.cumsum() / total).values if (total > 0 and len(w)) else np.array([])
    x = (np.arange(1, len(w) + 1) / len(w)) if len(w) else np.array([])
    share10 = cum[int(0.1 * len(w)) - 1] if len(w) >= 10 else np.nan
    fig, ax = plt.subplots(figsize=(6.5, 4.5))
    ax.plot(x * 100, cum * 100, color="#E45756", lw=2, label="waived pool")
    ax.plot([0, 100], [0, 100], color="grey", ls="--", lw=0.8, label="perfect equality")
    if np.isfinite(share10):
        ax.axvline(10, color="grey", lw=0.6, ls=":")
        ax.annotate(f"top 10% of MGs ≈ {share10:.0%} of waived", xy=(10, share10 * 100),
                    xytext=(25, 45), fontsize=8, arrowprops=dict(arrowstyle="->", color="grey"))
    ax.set_xlabel("cumulative % of MGs (ranked by foregone custody)")
    ax.set_ylabel("cumulative % of waived pool"); ax.legend(loc="lower right", fontsize=8)
    ax.set_title("E9 · Waived-pool concentration (Lorenz) — mixed ccy, descriptive")
    print(f"  E9: {len(w):,} MGs with a waived pool" + (f"; top 10% hold ≈ {share10:.0%}" if np.isfinite(share10) else ""))
    _save(fig, "e9_waived_pareto")
except Exception as e: print("E9 failed:", e)

# ---------- 6. PRICING LANDSCAPE — return-on-AUM × custody rate (E10): the "get them interested" hook ----------
# Descriptive only: LOCATES the underpriced pool using the bank's pricing logic (low total return ⇒ no break
# justified). It does NOT explain why a given MG deviates — that's deliberately future-week work.
try:
    mg_c = acct.groupby("MKG_ID").agg(elig=("custody_eligible_aum", "sum"), chg=("custody_fee_charged", "sum"),
                                      segment=("segment_current", "first"), floor=("segment_min_roa", "first")).reset_index()
    rev = mkg_rev_history[["MKG_ID", "revenue_2025"]].assign(revenue_2025=lambda d: _num(d["revenue_2025"]))
    aum = mkg_aum_history[["MKG_ID", "aum_2025"]].assign(aum_2025=lambda d: _num(d["aum_2025"]))
    q = (mg_c.merge(rev, on="MKG_ID", how="left").merge(aum, on="MKG_ID", how="left")
             .query("aum_2025 > 0 and elig > 0"))
    q["return_on_aum_bps"] = q["revenue_2025"] / q["aum_2025"] * 1e4     # total revenue yield on AUM
    q["custody_bps"]       = q["chg"] / q["elig"] * 1e4                  # both rates → currency-safe to pool
    q = q[q["custody_bps"].le(60)]    # drop tiny-eligible divide-by-near-zero artifacts (no MG realizes >60 bps)
    ry = q["custody_bps"].median()
    q["below_floor"] = q["return_on_aum_bps"] < q["floor"]               # below the MG's SEGMENT Min RoA (policy), not a global median
    under = q[q["below_floor"] & (q.custody_bps < ry)]                   # below segment RoA floor AND low custody = realignment target
    fig, ax = plt.subplots(figsize=(7, 5))
    smax = q["elig"].max() or 1
    ax.scatter(q.return_on_aum_bps, q.custody_bps, s=20 + 200 * q.elig / smax, alpha=0.45, color="#4C78A8", edgecolor="none")
    ax.scatter(under.return_on_aum_bps, under.custody_bps, s=20 + 200 * under.elig / smax,
               facecolor="none", edgecolor="#E45756", linewidth=1.3, label="below segment RoA floor + low custody")
    ax.axhline(ry, color="grey", lw=0.8, ls="--")                       # low-custody reference (median; no policy custody floor)
    ax.set_xlim(0, np.nanpercentile(q.return_on_aum_bps, 97))           # clip right-tail outliers (tiny-AUM MGs)
    ax.set_ylim(0, max(np.nanpercentile(q.custody_bps, 98), 5))         # clip blow-out from tiny-eligible MGs
    ax.set_xlabel("total return-on-AUM (bps = revenue ÷ AUM)"); ax.set_ylabel("custody realized bps")
    ax.set_title("E10 · Pricing landscape — return-on-AUM × custody (red = below segment RoA floor + low custody)")
    ax.legend(loc="upper right", fontsize=8)
    print(f"  E10: {len(under):,} MGs below their segment RoA floor (Strategic 40/Key 65/Core 80/PB 100) AND low custody; "
          f"eligible AUM = {under['elig'].sum():,.0f} (mixed sites)")
    _save(fig, "e10_pricing_quadrant")
except Exception as e: print("E10 failed:", e)

# ---------- 7. FEE OUTCOME (DEF / Q1) — full-pay / partial-discount / deliberate full-waiver / default ----------
# Where does a less-than-full charge come from? Only DEFAULT (CDT099) is "never asked"; a non-CDT099 zero
# OR a partial discount had a fee computed → an explicit/deliberate give-away (degree = discount_depth).
try:
    order = ["full-pay", "partial-discount", "full-waiver", "default (CDT099)"]
    colors = {"full-pay": "#4C78A8", "partial-discount": "#F58518", "full-waiver": "#E45756", "default (CDT099)": "#BAB0AC"}
    ct = acct.groupby(["site", "fee_outcome"]).size().unstack("fee_outcome").reindex(columns=order).fillna(0)
    fig, ax = plt.subplots(figsize=(6.8, 3.6))
    bottom = np.zeros(len(ct))
    for src in order:
        ax.bar(ct.index, ct[src].values, bottom=bottom, label=src, color=colors[src]); bottom += ct[src].values
    ax.set_ylabel("accounts"); ax.legend(fontsize=8)
    ax.set_title("E11 · Fee outcome — full-pay / partial-discount / deliberate full-waiver / default (CDT099)")
    print("  E11: " + " | ".join(f"{s} {int(acct['fee_outcome'].eq(s).sum()):,}" for s in order))
    _save(fig, "e11_fee_outcome")
except Exception as e: print("E11 failed:", e)

# ---------- 9. DELIBERATE-WAIVER opportunity (DEF / OBJ-E10) — where partial+full give-aways concentrate ----------
# Partial discounts AND non-CDT099 full waivers are both INTENTIONAL give-aways → group them. Foregone fee =
# computed − charged. Sizable NOW (computed + eligible in hand), unlike the CDT099 pool. $ are mixed-ccy.
try:
    dw = acct[acct["deliberate_waiver"]].copy()
    dw["foregone"] = (dw["custody_fee_computed"].fillna(0) - dw["custody_fee_charged"].fillna(0)).clip(lower=0)
    dw["kind"] = np.where(dw["fee_outcome"].eq("partial-discount"), "partial", "full")
    segs = dw.groupby("segment_current")["foregone"].sum().sort_values(ascending=False).index
    fig, axes = plt.subplots(1, 2, figsize=(11, 4))
    for ax, col, lab in zip(axes, ["foregone", "custody_eligible_aum"], ["foregone fee (computed − charged)", "eligible AUM"]):
        piv = dw.groupby(["segment_current", "kind"])[col].sum().unstack("kind").reindex(segs).fillna(0)
        bottom = np.zeros(len(piv))
        for kind, color in [("partial", "#F58518"), ("full", "#E45756")]:
            if kind in piv:
                ax.bar(range(len(piv)), piv[kind].values, bottom=bottom, label=kind, color=color); bottom += piv[kind].values
        ax.set_xticks(range(len(piv))); ax.set_xticklabels(piv.index, rotation=40, ha="right", fontsize=8)
        ax.set_title(lab); ax.legend(fontsize=8)
    tot = dw["foregone"].sum() or 1
    bk = dw.groupby("kind").agg(foregone=("foregone", "sum"), elig=("custody_eligible_aum", "sum"), n=("CLIENT_ID", "size"))
    pf = {k: (bk.loc[k, "foregone"] / tot if k in bk.index else 0.0) for k in ("partial", "full")}
    fig.suptitle(f"E13 · Deliberate-waiver opportunity by segment — foregone fee: {pf['partial']:.0%} partial / "
                 f"{pf['full']:.0%} full  (non-CDT099, mixed ccy, descriptive)", y=1.04)
    print(f"  E13: deliberate waivers = {len(dw):,} accts · foregone fee {dw['foregone'].sum():,.0f} · "
          f"eligible AUM {dw['custody_eligible_aum'].sum():,.0f} (mixed ccy)")
    for k in ("partial", "full"):                         # ← where the opportunity sits: tighten partials vs reverse full waivers
        if k in bk.index:
            r = bk.loc[k]
            print(f"    {k}: foregone {r['foregone']:,.0f} ({r['foregone']/tot:.0%}) · eligible AUM {r['elig']:,.0f} · {int(r['n']):,} accts")
    _save(fig, "e13_deliberate_waiver")
except Exception as e: print("E13 failed:", e)

# ---------- 8. CROSS-BOOKING (Q2›BC1) — within MGs spanning HK+SG, is the SG leg waived while HK pays? ----------
# Tests the diversification/cross-subsidy read: same relationship (MG) booked in both centres; compare
# the charged-share of the HK leg vs the SG leg. Sub-hypothesis of within-MG inconsistency.
try:
    xb = acct[acct["mg_cross_booking"]]
    n = xb["MKG_ID"].nunique()
    share = xb.groupby("site")["charged"].mean().reindex(["HK", "SG"]).dropna()
    fig, ax = plt.subplots(figsize=(5.5, 3.4))
    ax.bar(share.index, share.values * 100, color=["#4C78A8", "#E45756"][:len(share)])
    ax.set_ylabel("% charged (any fee)"); ax.set_ylim(0, 100)
    ax.set_title(f"E12 · Cross-booking MGs (n={n:,}) — charged-share by leg (HK-pays / SG-waived?)")
    for i, v in enumerate(share.values): ax.text(i, v * 100 + 1, f"{v:.0%}", ha="center", fontsize=9)
    print(f"  E12: {n:,} cross-booking MGs · charged-share " + ", ".join(f"{s}:{v:.0%}" for s, v in share.items()))
    _save(fig, "e12_cross_booking")
except Exception as e: print("E12 failed:", e)

# ---------- 10. PB/PB Ex POST-CODE WAIVERS (OBJ-E10 / H-W2) — not allowed → direct recovery ----------
# Policy: PB & PB Ex permit no waivers on the COMPUTED (post-code) fee. So an IN-FILE (non-099) PB/PB Ex
# account charged BELOW its computed fee is a violation. Uplift = foregone (computed − charged), sizable now.
# (CDT099 PB/PB Ex accounts compute zero — the code waives — and are NOT an anomaly on their own: many are
#  legit-exempt DPM/MyAdvisory. They sit in the normal CDT099 pool, sized via the CDT099 re-exam, not here.
#  $ per the fee currency — confirm USD.)
try:
    an = acct[acct["pb_waiver_anomaly"]].copy()
    an["foregone"] = (an["custody_fee_computed"].fillna(0) - an["custody_fee_charged"].fillna(0)).clip(lower=0)
    order = sorted(an["segment_current"].dropna().unique())
    cnt = an.groupby(["segment_current", "site"]).size().unstack("site").reindex(order).fillna(0)
    fig, ax = plt.subplots(figsize=(6.5, 3.6))
    bottom = np.zeros(len(cnt))
    for s, c in [("HK", "#4C78A8"), ("SG", "#E45756")]:
        if s in cnt:
            ax.bar(range(len(cnt)), cnt[s].values, bottom=bottom, label=s, color=c); bottom += cnt[s].values
    ax.set_xticks(range(len(cnt))); ax.set_xticklabels(cnt.index, fontsize=8); ax.set_ylabel("anomaly accounts"); ax.legend(fontsize=8)
    ax.set_title("E14 · PB/PB Ex post-code waivers (in-file, charged < computed) — direct recovery")
    print(f"  E14: {len(an):,} PB/PB Ex in-file waiver anomalies (charged < computed) · recoverable uplift ≈ "
          f"{an['foregone'].sum():,.0f} (foregone fee; currency per fee schedule, confirm USD)")
    for seg in order:
        sub = an[an.segment_current == seg]
        print(f"    {seg}: {len(sub):,} accts · foregone {sub['foregone'].sum():,.0f}")
    _save(fig, "e14_pb_anomalies")
except Exception as e: print("E14 failed:", e)

# ---------- 11. CDT099 POOL BY MANDATE (CDT099 re-exam / DEF) — legit-exempt vs genuine candidate ----------
# The ~76% CDT099 (absent) pool isn't one thing. The mandate (account_type) splits it: Free (DPM) and
# Advisory (MyAdvisory) are legit-exempt (not custody-charged by design); RESTRICTED (Self-Directed) is the
# genuine custody-fee candidate — the real "never-asked" subset. Countable NOW; $ awaits the chargeable-AUM.
try:
    c99 = acct[acct["is_cdt099"]].copy()
    order = ["Restricted (Self-Directed)", "Advisory (MyAdvisory)", "Free (DPM)", "unknown"]
    by = c99.groupby(["mandate", "site"]).size().unstack("site").fillna(0)
    by = by.reindex([m for m in order if m in by.index])
    fig, ax = plt.subplots(figsize=(7, 3.4))
    left = np.zeros(len(by))
    for s, col in [("HK", "#4C78A8"), ("SG", "#E45756")]:
        if s in by:
            ax.barh(range(len(by)), by[s].values, left=left, label=s, color=col); left += by[s].values
    ax.set_yticks(range(len(by))); ax.set_yticklabels(by.index, fontsize=8); ax.invert_yaxis()
    ax.set_xlabel("CDT099 accounts"); ax.legend(fontsize=8)
    ax.set_title("E15 · CDT099 pool by mandate — Restricted (Self-Directed) = the custody-fee candidate")
    cand = int(c99["mandate"].eq("Restricted (Self-Directed)").sum())
    print(f"  E15: CDT099 pool {len(c99):,} · candidate Restricted/Self-Directed {cand:,} ({cand / max(len(c99), 1):.0%}) "
          f"· legit-exempt Free/Advisory {int((c99['mgmt_exempt']).sum()):,}. $ sizing awaits chargeable-AUM.")
    _save(fig, "e15_cdt099_by_mandate")
except Exception as e: print("E15 failed:", e)

# ---------- 12. POLICY COMPLIANCE & REVENUE IMPACT (no-waiver + min-charge approval grid) ----------
# Classify every in-file waiver (charged < computed) by policy status: PB/PB Ex → NO waivers = VIOLATION;
# S/K/C → allowed *if approved*, binned by the waived amount's required approval tier (RM <500 · TH <1k ·
# MH <5k · MH+SH/COO >5k). Plus the min-charge breaches (under_min_charge). Flags the foregone revenue.
# Can't verify whether a waiver WAS approved (no approver field) — the tiers flag where senior sign-off is due.
try:
    BANDS = [(0, 500, "RM (<500)"), (500, 1000, "TH (<1k)"), (1000, 5000, "MH (<5k)"), (5000, np.inf, "MH+SH/COO (>5k)")]
    w = acct[acct["in_charging_file"] & (acct["custody_fee_charged"].fillna(0) < acct["custody_fee_computed"].fillna(0))].copy()
    w["waived"] = (w["custody_fee_computed"].fillna(0) - w["custody_fee_charged"].fillna(0)).clip(lower=0)
    def _status(row):
        if row["no_waiver_segment"]:
            return "PB/PB Ex — NOT ALLOWED"
        for lo, hi, lab in BANDS:
            if lo <= row["waived"] < hi:
                return f"S/K/C — needs {lab}"
        return "S/K/C — other"
    w["compliance"] = w.apply(_status, axis=1)
    g = w.groupby("compliance")["waived"].sum().sort_values(ascending=False)
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.barh(range(len(g)), g.values, color=["#E45756" if "NOT ALLOWED" in i else "#4C78A8" for i in g.index])
    ax.set_yticks(range(len(g))); ax.set_yticklabels(g.index, fontsize=8); ax.invert_yaxis()
    ax.set_xlabel("foregone revenue (computed − charged; mixed ccy, confirm USD)")
    ax.set_title("E16 · Policy compliance — waived revenue by status (PB/PB Ex = violation; S/K/C by approval tier)")
    viol = w.loc[w["no_waiver_segment"], "waived"].sum()
    print(f"  E16: PB/PB Ex no-waiver VIOLATIONS {int(w['no_waiver_segment'].sum()):,} accts · revenue impact {viol:,.0f} (clear)")
    for lo, hi, lab in BANDS:
        b = w[~w["no_waiver_segment"] & (w["waived"] >= lo) & (w["waived"] < hi)]
        if len(b):
            print(f"    S/K/C waivers needing {lab}: {len(b):,} accts · {b['waived'].sum():,.0f}")
    um = acct[acct["under_min_charge"]]
    um_short = (150.0 - um["custody_fee_charged"].fillna(0)).clip(lower=0).sum()
    print(f"  E16: min-charge breaches (in-file, computed ≥ 150, charged < 150): {len(um):,} accts · "
          f"shortfall-to-floor {um_short:,.0f} (currency per fee schedule, confirm USD)")
    _save(fig, "e16_policy_compliance")
except Exception as e: print("E16 failed:", e)

# ---------- 13. PRICING DISCIPLINE — waived $ by APPROVING MARKET (governance read / OBJ-COMPLIANCE) ----------
# The governance pivot of E16: SAME in-file waiver pool (charged < computed — the discretionary give-aways the
# approval grid governs), but attributed to the APPROVING UNIT, proxied by `market` (≈ the account's market
# head's domain; there's no approver field). Each market's bar is STACKED by the approval tier the waived
# amount REQUIRES — so a market whose pool skews to the >5k / NOT-ALLOWED tiers is a pricing-DISCIPLINE flag
# even when each waiver was individually signed off (compliant ≠ disciplined — Marc 2026-06-19). Kept at
# MARKET level by design: a per-team/RM league table is the most sensitive cut (plan.md) and stays verbal.
# ANONYMISED BY DEFAULT (`Market-NN`, rank order); set DEANON=True for the sponsor-only named mapping. Mixed ccy.
try:
    BANDS = [(0, 500, "RM (<500)"), (500, 1000, "TH (<1k)"), (1000, 5000, "MH (<5k)"), (5000, np.inf, "MH+SH/COO (>5k)")]
    TIERS = ["PB/PB Ex — NOT ALLOWED", "MH+SH/COO (>5k)", "MH (<5k)", "TH (<1k)", "RM (<500)"]   # senior → junior
    TIER_COLOR = {"PB/PB Ex — NOT ALLOWED": "#7F1313", "MH+SH/COO (>5k)": "#E45756", "MH (<5k)": "#F58518",
                  "TH (<1k)": "#54A24B", "RM (<500)": "#4C78A8"}
    w = acct[acct["in_charging_file"] & (acct["custody_fee_charged"].fillna(0) < acct["custody_fee_computed"].fillna(0))].copy()
    w["waived"] = (w["custody_fee_computed"].fillna(0) - w["custody_fee_charged"].fillna(0)).clip(lower=0)
    def _tier(row):
        if row["no_waiver_segment"]:
            return "PB/PB Ex — NOT ALLOWED"
        for lo, hi, lab in BANDS:
            if lo <= row["waived"] < hi:
                return lab
        return "RM (<500)"
    w["tier"] = w.apply(_tier, axis=1)
    w["market_"] = w["market"].fillna("unknown")
    rank = w.groupby("market_")["waived"].sum().sort_values(ascending=False)
    top = rank.head(12).index                                  # focus the bars on where the money is
    piv = (w[w["market_"].isin(top)].groupby(["market_", "tier"])["waived"].sum()
             .unstack("tier").reindex(index=top, columns=TIERS).fillna(0))
    labels = list(piv.index) if DEANON else [f"Market-{i:02d}" for i in range(1, len(piv) + 1)]   # rank-ordered anon
    fig, ax = plt.subplots(figsize=(8.5, 4.8))
    left = np.zeros(len(piv))
    for t in TIERS:
        if t in piv:
            ax.barh(range(len(piv)), piv[t].values, left=left, label=t, color=TIER_COLOR[t]); left += piv[t].values
    ax.set_yticks(range(len(piv))); ax.set_yticklabels(labels, fontsize=8); ax.invert_yaxis()
    ax.set_xlabel("waived custody fee (computed − charged; mixed ccy, confirm USD)")
    ax.legend(fontsize=7, ncol=2, loc="lower right"); ax.margins(y=0.01)
    ax.set_title("E17 · Waived $ by approving market — pricing-discipline read (anonymised; stacked by required tier)")
    tot = rank.sum() or 1
    top1 = rank.iloc[0] / tot if len(rank) else 0.0
    top3 = rank.head(3).sum() / tot if len(rank) else 0.0
    senior = w.loc[w["tier"].isin(["MH+SH/COO (>5k)", "PB/PB Ex — NOT ALLOWED"]), "waived"].sum() / tot
    ax.annotate(f"top market ≈ {top1:.0%} · top 3 ≈ {top3:.0%} of waived $ · "
                f"{senior:.0%} sits in >5k / not-allowed tiers", xy=(0, -0.18), xycoords="axes fraction",
                fontsize=8, color="#B22222")
    print(f"  E17: {rank.shape[0]:,} markets carry a waived pool · total {tot:,.0f} (mixed ccy) · "
          f"top market {top1:.0%} · top 3 {top3:.0%} · {senior:.0%} in >5k/not-allowed tiers")
    print("       (anonymised Market-NN by rank — set DEANON=True for the sponsor-only named mapping)")
    if DEANON:
        for i, (m, v) in enumerate(rank.head(12).items(), 1):
            print(f"    Market-{i:02d} = {m}: waived {v:,.0f} ({v / tot:.0%})")
    _save(fig, "e17_waived_by_market")
except Exception as e: print("E17 failed:", e)

print("\nDONE → figs/  (read the headline numbers above for talking points). "
      "All cuts are descriptive/unadjusted; the E8/E10 pools are CANDIDATE pools, not 'should-charge'.")
Last updated: June 22, 2026python