First-Look Script
Runnable Python (matplotlib), analysis only: consumes the base table from the transform layer and renders the coverage panel, realization waterfall, realized-bps distribution, fee-code ladder, charged-at-all share, raw driver small-multiples, within-MG candidate pool, waived-pool Lorenz, and the return-on-AUM × custody quadrant. Descriptive; figures are generated corp-side and not committed.
GET
/v2/hypothesis-testing/first-look-scriptRequires authenticationCode Example
python
# first-look-exhibits.py — descriptive exhibits for the stakeholder meeting
# -------------------------------------------------------------------------
# ANALYSIS ONLY. The base table is built by the transformation layer (transform.py); this file just
# VISUALISES it. Run AFTER your ingestion cells. Requires these frames in scope:
# custody_2h25_hk, custody_2h25_sg, mkgid_clientid_mapping, clientdomicile_df, acctmgmt_df,
# trustacct_df, mkg_info (→ base table, via transform.build_base_table)
# mkg_rev_history, mkg_aum_history (→ E10 only)
# Produces exhibits E0–E17 (PNG + inline). No modelling.
#
# READ BEFORE PRESENTING — caveats baked in (the base-table logic + its caveats live in transform.py):
# • Every cut is RAW / UNADJUSTED — descriptive, confounded (size ⟂ segment ⟂ domicile …).
# These NOMINATE patterns; they are not conclusions. Say so out loud.
# • Rates (bps, %) are unit-free → HK+SG pooled is fine. $ figures are kept PER-SITE
# (local ccy) to avoid FX. Set FX_TO_SGD['HK'] to combine $ across sites.
# • GST: SG fees may be GST-inclusive (HK not) → slightly inflates SG bps. Flagged, not stripped.
# • Universe = client map (incl. the CDT099 pool); eligible-AUM is BLANK for CDT099 until the backend
# request lands → eligible-AUM cuts describe the charged universe, the CDT099 gap is countable only.
# • "Uncharged-but-eligible" is a CANDIDATE pool, NOT "should be charged"; legit-exempt (DPM/MyAdvisory)
# is split from genuine-gap via waived_class. See transform.py for the full base-table contract.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
try:
from transform import build_base_table # transformation layer (file mode)
except ImportError:
pass # notebook-paste mode: run transform.py's cells first
OUTDIR = Path("figs"); OUTDIR.mkdir(exist_ok=True) # data-derived PNGs — DO NOT COMMIT
FX_TO_SGD = {"HK": None, "SG": 1.0} # set HK (e.g. 0.17) only to combine $; None = per-site
DEANON = False # E17 sponsor-only: real market names stay anonymised (`Market-NN`) until set True (plan.md §confidentiality)
plt.rcParams.update({"figure.dpi": 130, "font.size": 10, "axes.grid": True, "axes.axisbelow": True})
def _num(s): return pd.to_numeric(s, errors="coerce") # local helper for the E10 revenue/AUM join
def _save(fig, name):
fig.tight_layout()
fig.savefig(OUTDIR / f"{name}.png", bbox_inches="tight") # save for the deck
plt.show() # AND render inline in the notebook
print(f" saved figs/{name}.png")
# ---------- 0. Base table (built by the transformation layer) ----------
acct = build_base_table(custody_2h25_hk, custody_2h25_sg, mkgid_clientid_mapping,
clientdomicile_df, acctmgmt_df, trustacct_df, mkg_info)
# ---------- 1. Coverage panel (credibility: the joins work + the CDT099 gap is visible) ----------
try:
cov = {"→ MG": acct["MKG_ID"].notna().mean(),
"→ domicile": acct["domicile_country"].notna().mean(),
"→ mgmt type": acct["account_type"].notna().mean(),
"in charging file\n(non-CDT099)": acct["in_charging_file"].mean(),
"has eligible AUM": acct["eligible"].mean()}
fig, ax = plt.subplots(figsize=(6.4, 3.0))
ax.barh(list(cov)[::-1], [v * 100 for v in list(cov.values())[::-1]], color="#4C78A8")
ax.set_xlim(0, 100); ax.set_xlabel("% of account universe (client map)")
ax.set_title("E0 · Universe coverage — attributes link up; the charging-file gap = the CDT099 pool")
for i, v in enumerate(list(cov.values())[::-1]): ax.text(v * 100 + 1, i, f"{v:.0%}", va="center")
_n099 = int(acct["is_cdt099"].sum())
ax.annotate(f"CDT099 / full-waiver pool: {_n099:,} accts ({acct['is_cdt099'].mean():.0%}) — eligible-AUM pending backend",
xy=(0, -0.42), xycoords="axes fraction", fontsize=8, color="#B22222")
_save(fig, "e0_coverage")
except Exception as e: print("E0 failed:", e)
# ---------- 2. Realization waterfall, per site (computed vs charged; waived = the gap) ----------
try:
fig, axes = plt.subplots(1, 2, figsize=(9, 3.4))
for ax, site in zip(axes, ["HK", "SG"]):
s = acct[acct.site == site]
comp, chg = s["custody_fee_computed"].sum(), s["custody_fee_charged"].sum()
waived = comp - chg
ax.bar(["Computed\n(theoretical)", "Waived/\ndiscounted", "Charged\n(realised)"],
[comp, waived, chg], color=["#54A24B", "#E45756", "#4C78A8"])
ax.set_title(f"{site} · realised {chg/comp:.0%} of computed" if comp else site)
ax.set_ylabel("fee (local ccy)")
fig.suptitle("E1 · Realisation waterfall — what we compute vs what we collect", y=1.02)
_save(fig, "e1_waterfall")
except Exception as e: print("E1 failed:", e)
# ---------- 2b. Realized-bps distribution (E2) — the rate landscape + the zero-fee spike ----------
try:
r = acct.loc[acct["eligible"], "realized_bps"].dropna()
charged_r = acct.loc[acct["charged"], "realized_bps"].dropna()
pmax = charged_r.quantile(0.95) if len(charged_r) else np.nan # empirical practical max
fig, ax = plt.subplots(figsize=(7, 3.6))
ax.hist(r.clip(upper=30), bins=40, color="#4C78A8")
ymax = ax.get_ylim()[1]
ax.axvline(25, color="grey", ls="--", lw=1); ax.text(25, ymax * 0.9, " 25 bps (mythical)", fontsize=8, color="grey")
if np.isfinite(pmax):
ax.axvline(pmax, color="#E45756", ls="--", lw=1.2)
ax.text(pmax, ymax * 0.72, f" practical max ≈ {pmax:.0f} bps", fontsize=8, color="#E45756")
ax.set_xlabel("realized bps (custody charged ÷ eligible AUM)"); ax.set_ylabel("accounts")
ax.set_title("E2 · Realized-bps distribution — the zero-fee spike + the achievable rate")
print(f" E2: practical max (95th pct of charged) ≈ {pmax:.1f} bps · zero-fee (eligible) accounts = {(r == 0).sum():,} of {len(r):,}")
_save(fig, "e2_rate_distribution")
except Exception as e: print("E2 failed:", e)
# ---------- 2c. Fee-code → discount ladder (E3) — what each CDT code actually encodes ----------
try:
codes = acct.loc[acct["eligible"], ["cdt_code", "realized_bps"]].dropna(subset=["cdt_code"])
order = codes.groupby("cdt_code")["realized_bps"].median().sort_values().index
data = [codes.loc[codes.cdt_code == c, "realized_bps"].dropna().values for c in order]
fig, ax = plt.subplots(figsize=(7, 3.6))
ax.boxplot(data, showfliers=False) # positions 1..N
ax.set_xticks(range(1, len(order) + 1)); ax.set_xticklabels(list(order), rotation=40, ha="right", fontsize=8)
ax.set_xlabel("custody fee code (CDT)"); ax.set_ylabel("realized bps")
ax.set_title("E3 · Fee-code → empirical discount ladder")
print(f" E3: {len(order)} codes; median bps by code = " +
", ".join(f"{c}:{codes.loc[codes.cdt_code == c, 'realized_bps'].median():.0f}" for c in order))
_save(fig, "e3_fee_code_ladder")
except Exception as e: print("E3 failed:", e)
# ---------- 3. Charged-at-all — by count and by eligible AUM (the never-asked share) ----------
try:
fig, axes = plt.subplots(1, 2, figsize=(9, 3.4))
by_cnt = acct.groupby("site")["charged"].agg(["sum", "count"])
by_cnt["uncharged"] = by_cnt["count"] - by_cnt["sum"]
axes[0].bar(by_cnt.index, by_cnt["sum"], label="charged", color="#4C78A8")
axes[0].bar(by_cnt.index, by_cnt["uncharged"], bottom=by_cnt["sum"], label="uncharged", color="#BAB0AC")
axes[0].set_title("by account count"); axes[0].legend()
elig = acct.assign(elig_chg=np.where(acct.charged, acct.custody_eligible_aum, 0),
elig_unchg=np.where(~acct.charged, acct.custody_eligible_aum, 0)).groupby("site")[["elig_chg", "elig_unchg"]].sum()
axes[1].bar(elig.index, elig["elig_chg"], label="charged", color="#4C78A8")
axes[1].bar(elig.index, elig["elig_unchg"], bottom=elig["elig_chg"], label="uncharged (eligible)", color="#E45756")
axes[1].set_title("by eligible AUM (local ccy)"); axes[1].legend()
for site in elig.index:
share = elig.loc[site, "elig_unchg"] / max(elig.loc[site].sum(), 1)
print(f" {site}: {share:.0%} of eligible AUM sits in zero-fee accounts")
fig.suptitle("E4 · Charged-at-all — the never-asked share (count vs money)", y=1.02)
_save(fig, "e4_charged_at_all")
except Exception as e: print("E4 failed:", e)
# ---------- 4. THE SLICES — charged-share & median realised bps by dimension (raw) ----------
try:
top_dom = acct["domicile_country"].value_counts().head(6).index
acct["domicile_top"] = np.where(acct["domicile_country"].isin(top_dom), acct["domicile_country"], "Other")
dims = [("site", "Site"), ("is_trust", "Trust flag"), ("account_type", "Account type"),
("segment_current", "Segment"), ("domicile_top", "Domicile (top 6)")]
fig, axes = plt.subplots(2, len(dims), figsize=(3.0 * len(dims), 6))
for j, (col, label) in enumerate(dims):
share = acct.groupby(col)["charged"].mean().sort_values(ascending=False)
bps = acct[acct.charged].groupby(col)["realized_bps"].median().reindex(share.index)
axes[0, j].bar(range(len(share)), share.values * 100, color="#4C78A8")
axes[0, j].set_title(label); axes[0, j].set_xticks(range(len(share)))
axes[0, j].set_xticklabels(share.index, rotation=40, ha="right", fontsize=8)
axes[1, j].bar(range(len(bps)), bps.values, color="#54A24B")
axes[1, j].set_xticks(range(len(bps))); axes[1, j].set_xticklabels(bps.index, rotation=40, ha="right", fontsize=8)
axes[0, 0].set_ylabel("% charged (any fee)"); axes[1, 0].set_ylabel("median bps | charged")
fig.suptitle("E7 · Slices — RAW / UNADJUSTED descriptive cuts (nominate, don't conclude)", y=1.01)
_save(fig, "e7_slices")
except Exception as e: print("E7 failed:", e)
# ---------- 5. Within-MG teaser — eligible AUM sitting in uncharged accounts of charging MGs ----------
try:
acct["_elig_unchg"] = np.where(~acct["charged"], acct["custody_eligible_aum"].fillna(0), 0.0)
mg = (acct.groupby(["site", "MKG_ID"])
.agg(n=("CLIENT_ID", "size"), n_chg=("charged", "sum"),
elig=("custody_eligible_aum", "sum"), elig_unchg=("_elig_unchg", "sum"))
.reset_index())
mg["mixed"] = (mg.n > 1) & (mg.n_chg > 0) & (mg.n_chg < mg.n)
fig, ax = plt.subplots(figsize=(6.5, 3.4))
pool = mg[mg.n_chg > 0].groupby("site")["elig_unchg"].sum() # uncharged-eligible inside MGs that DO charge
ax.bar(pool.index, pool.values, color="#E45756")
ax.set_title("E8 · Candidate pool — eligible AUM in UNCHARGED accounts of MGs that already charge")
ax.set_ylabel("eligible AUM in zero-fee accounts (local ccy)")
for site in pool.index:
mixed = mg[(mg.site == site) & mg.mixed].shape[0]
print(f" {site}: {mixed:,} mixed-status MGs · uncharged-eligible pool = {pool.get(site, 0):,.0f} (local ccy)")
_save(fig, "e8_within_mg")
except Exception as e: print("E8 failed:", e)
# ---------- 5b. Waived-pool concentration (E9) — Lorenz/Pareto of foregone custody by MG ----------
try:
mgw = acct.groupby("MKG_ID").agg(computed=("custody_fee_computed", "sum"),
charged=("custody_fee_charged", "sum")).reset_index()
waived = (mgw["computed"] - mgw["charged"]).clip(lower=0) # foregone custody fee
w = waived[waived > 0].sort_values(ascending=False).reset_index(drop=True)
total = w.sum()
cum = (w.cumsum() / total).values if (total > 0 and len(w)) else np.array([])
x = (np.arange(1, len(w) + 1) / len(w)) if len(w) else np.array([])
share10 = cum[int(0.1 * len(w)) - 1] if len(w) >= 10 else np.nan
fig, ax = plt.subplots(figsize=(6.5, 4.5))
ax.plot(x * 100, cum * 100, color="#E45756", lw=2, label="waived pool")
ax.plot([0, 100], [0, 100], color="grey", ls="--", lw=0.8, label="perfect equality")
if np.isfinite(share10):
ax.axvline(10, color="grey", lw=0.6, ls=":")
ax.annotate(f"top 10% of MGs ≈ {share10:.0%} of waived", xy=(10, share10 * 100),
xytext=(25, 45), fontsize=8, arrowprops=dict(arrowstyle="->", color="grey"))
ax.set_xlabel("cumulative % of MGs (ranked by foregone custody)")
ax.set_ylabel("cumulative % of waived pool"); ax.legend(loc="lower right", fontsize=8)
ax.set_title("E9 · Waived-pool concentration (Lorenz) — mixed ccy, descriptive")
print(f" E9: {len(w):,} MGs with a waived pool" + (f"; top 10% hold ≈ {share10:.0%}" if np.isfinite(share10) else ""))
_save(fig, "e9_waived_pareto")
except Exception as e: print("E9 failed:", e)
# ---------- 6. PRICING LANDSCAPE — return-on-AUM × custody rate (E10): the "get them interested" hook ----------
# Descriptive only: LOCATES the underpriced pool using the bank's pricing logic (low total return ⇒ no break
# justified). It does NOT explain why a given MG deviates — that's deliberately future-week work.
try:
mg_c = acct.groupby("MKG_ID").agg(elig=("custody_eligible_aum", "sum"), chg=("custody_fee_charged", "sum"),
segment=("segment_current", "first"), floor=("segment_min_roa", "first")).reset_index()
rev = mkg_rev_history[["MKG_ID", "revenue_2025"]].assign(revenue_2025=lambda d: _num(d["revenue_2025"]))
aum = mkg_aum_history[["MKG_ID", "aum_2025"]].assign(aum_2025=lambda d: _num(d["aum_2025"]))
q = (mg_c.merge(rev, on="MKG_ID", how="left").merge(aum, on="MKG_ID", how="left")
.query("aum_2025 > 0 and elig > 0"))
q["return_on_aum_bps"] = q["revenue_2025"] / q["aum_2025"] * 1e4 # total revenue yield on AUM
q["custody_bps"] = q["chg"] / q["elig"] * 1e4 # both rates → currency-safe to pool
q = q[q["custody_bps"].le(60)] # drop tiny-eligible divide-by-near-zero artifacts (no MG realizes >60 bps)
ry = q["custody_bps"].median()
q["below_floor"] = q["return_on_aum_bps"] < q["floor"] # below the MG's SEGMENT Min RoA (policy), not a global median
under = q[q["below_floor"] & (q.custody_bps < ry)] # below segment RoA floor AND low custody = realignment target
fig, ax = plt.subplots(figsize=(7, 5))
smax = q["elig"].max() or 1
ax.scatter(q.return_on_aum_bps, q.custody_bps, s=20 + 200 * q.elig / smax, alpha=0.45, color="#4C78A8", edgecolor="none")
ax.scatter(under.return_on_aum_bps, under.custody_bps, s=20 + 200 * under.elig / smax,
facecolor="none", edgecolor="#E45756", linewidth=1.3, label="below segment RoA floor + low custody")
ax.axhline(ry, color="grey", lw=0.8, ls="--") # low-custody reference (median; no policy custody floor)
ax.set_xlim(0, np.nanpercentile(q.return_on_aum_bps, 97)) # clip right-tail outliers (tiny-AUM MGs)
ax.set_ylim(0, max(np.nanpercentile(q.custody_bps, 98), 5)) # clip blow-out from tiny-eligible MGs
ax.set_xlabel("total return-on-AUM (bps = revenue ÷ AUM)"); ax.set_ylabel("custody realized bps")
ax.set_title("E10 · Pricing landscape — return-on-AUM × custody (red = below segment RoA floor + low custody)")
ax.legend(loc="upper right", fontsize=8)
print(f" E10: {len(under):,} MGs below their segment RoA floor (Strategic 40/Key 65/Core 80/PB 100) AND low custody; "
f"eligible AUM = {under['elig'].sum():,.0f} (mixed sites)")
_save(fig, "e10_pricing_quadrant")
except Exception as e: print("E10 failed:", e)
# ---------- 7. FEE OUTCOME (DEF / Q1) — full-pay / partial-discount / deliberate full-waiver / default ----------
# Where does a less-than-full charge come from? Only DEFAULT (CDT099) is "never asked"; a non-CDT099 zero
# OR a partial discount had a fee computed → an explicit/deliberate give-away (degree = discount_depth).
try:
order = ["full-pay", "partial-discount", "full-waiver", "default (CDT099)"]
colors = {"full-pay": "#4C78A8", "partial-discount": "#F58518", "full-waiver": "#E45756", "default (CDT099)": "#BAB0AC"}
ct = acct.groupby(["site", "fee_outcome"]).size().unstack("fee_outcome").reindex(columns=order).fillna(0)
fig, ax = plt.subplots(figsize=(6.8, 3.6))
bottom = np.zeros(len(ct))
for src in order:
ax.bar(ct.index, ct[src].values, bottom=bottom, label=src, color=colors[src]); bottom += ct[src].values
ax.set_ylabel("accounts"); ax.legend(fontsize=8)
ax.set_title("E11 · Fee outcome — full-pay / partial-discount / deliberate full-waiver / default (CDT099)")
print(" E11: " + " | ".join(f"{s} {int(acct['fee_outcome'].eq(s).sum()):,}" for s in order))
_save(fig, "e11_fee_outcome")
except Exception as e: print("E11 failed:", e)
# ---------- 9. DELIBERATE-WAIVER opportunity (DEF / OBJ-E10) — where partial+full give-aways concentrate ----------
# Partial discounts AND non-CDT099 full waivers are both INTENTIONAL give-aways → group them. Foregone fee =
# computed − charged. Sizable NOW (computed + eligible in hand), unlike the CDT099 pool. $ are mixed-ccy.
try:
dw = acct[acct["deliberate_waiver"]].copy()
dw["foregone"] = (dw["custody_fee_computed"].fillna(0) - dw["custody_fee_charged"].fillna(0)).clip(lower=0)
dw["kind"] = np.where(dw["fee_outcome"].eq("partial-discount"), "partial", "full")
segs = dw.groupby("segment_current")["foregone"].sum().sort_values(ascending=False).index
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
for ax, col, lab in zip(axes, ["foregone", "custody_eligible_aum"], ["foregone fee (computed − charged)", "eligible AUM"]):
piv = dw.groupby(["segment_current", "kind"])[col].sum().unstack("kind").reindex(segs).fillna(0)
bottom = np.zeros(len(piv))
for kind, color in [("partial", "#F58518"), ("full", "#E45756")]:
if kind in piv:
ax.bar(range(len(piv)), piv[kind].values, bottom=bottom, label=kind, color=color); bottom += piv[kind].values
ax.set_xticks(range(len(piv))); ax.set_xticklabels(piv.index, rotation=40, ha="right", fontsize=8)
ax.set_title(lab); ax.legend(fontsize=8)
tot = dw["foregone"].sum() or 1
bk = dw.groupby("kind").agg(foregone=("foregone", "sum"), elig=("custody_eligible_aum", "sum"), n=("CLIENT_ID", "size"))
pf = {k: (bk.loc[k, "foregone"] / tot if k in bk.index else 0.0) for k in ("partial", "full")}
fig.suptitle(f"E13 · Deliberate-waiver opportunity by segment — foregone fee: {pf['partial']:.0%} partial / "
f"{pf['full']:.0%} full (non-CDT099, mixed ccy, descriptive)", y=1.04)
print(f" E13: deliberate waivers = {len(dw):,} accts · foregone fee {dw['foregone'].sum():,.0f} · "
f"eligible AUM {dw['custody_eligible_aum'].sum():,.0f} (mixed ccy)")
for k in ("partial", "full"): # ← where the opportunity sits: tighten partials vs reverse full waivers
if k in bk.index:
r = bk.loc[k]
print(f" {k}: foregone {r['foregone']:,.0f} ({r['foregone']/tot:.0%}) · eligible AUM {r['elig']:,.0f} · {int(r['n']):,} accts")
_save(fig, "e13_deliberate_waiver")
except Exception as e: print("E13 failed:", e)
# ---------- 8. CROSS-BOOKING (Q2›BC1) — within MGs spanning HK+SG, is the SG leg waived while HK pays? ----------
# Tests the diversification/cross-subsidy read: same relationship (MG) booked in both centres; compare
# the charged-share of the HK leg vs the SG leg. Sub-hypothesis of within-MG inconsistency.
try:
xb = acct[acct["mg_cross_booking"]]
n = xb["MKG_ID"].nunique()
share = xb.groupby("site")["charged"].mean().reindex(["HK", "SG"]).dropna()
fig, ax = plt.subplots(figsize=(5.5, 3.4))
ax.bar(share.index, share.values * 100, color=["#4C78A8", "#E45756"][:len(share)])
ax.set_ylabel("% charged (any fee)"); ax.set_ylim(0, 100)
ax.set_title(f"E12 · Cross-booking MGs (n={n:,}) — charged-share by leg (HK-pays / SG-waived?)")
for i, v in enumerate(share.values): ax.text(i, v * 100 + 1, f"{v:.0%}", ha="center", fontsize=9)
print(f" E12: {n:,} cross-booking MGs · charged-share " + ", ".join(f"{s}:{v:.0%}" for s, v in share.items()))
_save(fig, "e12_cross_booking")
except Exception as e: print("E12 failed:", e)
# ---------- 10. PB/PB Ex POST-CODE WAIVERS (OBJ-E10 / H-W2) — not allowed → direct recovery ----------
# Policy: PB & PB Ex permit no waivers on the COMPUTED (post-code) fee. So an IN-FILE (non-099) PB/PB Ex
# account charged BELOW its computed fee is a violation. Uplift = foregone (computed − charged), sizable now.
# (CDT099 PB/PB Ex accounts compute zero — the code waives — and are NOT an anomaly on their own: many are
# legit-exempt DPM/MyAdvisory. They sit in the normal CDT099 pool, sized via the CDT099 re-exam, not here.
# $ per the fee currency — confirm USD.)
try:
an = acct[acct["pb_waiver_anomaly"]].copy()
an["foregone"] = (an["custody_fee_computed"].fillna(0) - an["custody_fee_charged"].fillna(0)).clip(lower=0)
order = sorted(an["segment_current"].dropna().unique())
cnt = an.groupby(["segment_current", "site"]).size().unstack("site").reindex(order).fillna(0)
fig, ax = plt.subplots(figsize=(6.5, 3.6))
bottom = np.zeros(len(cnt))
for s, c in [("HK", "#4C78A8"), ("SG", "#E45756")]:
if s in cnt:
ax.bar(range(len(cnt)), cnt[s].values, bottom=bottom, label=s, color=c); bottom += cnt[s].values
ax.set_xticks(range(len(cnt))); ax.set_xticklabels(cnt.index, fontsize=8); ax.set_ylabel("anomaly accounts"); ax.legend(fontsize=8)
ax.set_title("E14 · PB/PB Ex post-code waivers (in-file, charged < computed) — direct recovery")
print(f" E14: {len(an):,} PB/PB Ex in-file waiver anomalies (charged < computed) · recoverable uplift ≈ "
f"{an['foregone'].sum():,.0f} (foregone fee; currency per fee schedule, confirm USD)")
for seg in order:
sub = an[an.segment_current == seg]
print(f" {seg}: {len(sub):,} accts · foregone {sub['foregone'].sum():,.0f}")
_save(fig, "e14_pb_anomalies")
except Exception as e: print("E14 failed:", e)
# ---------- 11. CDT099 POOL BY MANDATE (CDT099 re-exam / DEF) — legit-exempt vs genuine candidate ----------
# The ~76% CDT099 (absent) pool isn't one thing. The mandate (account_type) splits it: Free (DPM) and
# Advisory (MyAdvisory) are legit-exempt (not custody-charged by design); RESTRICTED (Self-Directed) is the
# genuine custody-fee candidate — the real "never-asked" subset. Countable NOW; $ awaits the chargeable-AUM.
try:
c99 = acct[acct["is_cdt099"]].copy()
order = ["Restricted (Self-Directed)", "Advisory (MyAdvisory)", "Free (DPM)", "unknown"]
by = c99.groupby(["mandate", "site"]).size().unstack("site").fillna(0)
by = by.reindex([m for m in order if m in by.index])
fig, ax = plt.subplots(figsize=(7, 3.4))
left = np.zeros(len(by))
for s, col in [("HK", "#4C78A8"), ("SG", "#E45756")]:
if s in by:
ax.barh(range(len(by)), by[s].values, left=left, label=s, color=col); left += by[s].values
ax.set_yticks(range(len(by))); ax.set_yticklabels(by.index, fontsize=8); ax.invert_yaxis()
ax.set_xlabel("CDT099 accounts"); ax.legend(fontsize=8)
ax.set_title("E15 · CDT099 pool by mandate — Restricted (Self-Directed) = the custody-fee candidate")
cand = int(c99["mandate"].eq("Restricted (Self-Directed)").sum())
print(f" E15: CDT099 pool {len(c99):,} · candidate Restricted/Self-Directed {cand:,} ({cand / max(len(c99), 1):.0%}) "
f"· legit-exempt Free/Advisory {int((c99['mgmt_exempt']).sum()):,}. $ sizing awaits chargeable-AUM.")
_save(fig, "e15_cdt099_by_mandate")
except Exception as e: print("E15 failed:", e)
# ---------- 12. POLICY COMPLIANCE & REVENUE IMPACT (no-waiver + min-charge approval grid) ----------
# Classify every in-file waiver (charged < computed) by policy status: PB/PB Ex → NO waivers = VIOLATION;
# S/K/C → allowed *if approved*, binned by the waived amount's required approval tier (RM <500 · TH <1k ·
# MH <5k · MH+SH/COO >5k). Plus the min-charge breaches (under_min_charge). Flags the foregone revenue.
# Can't verify whether a waiver WAS approved (no approver field) — the tiers flag where senior sign-off is due.
try:
BANDS = [(0, 500, "RM (<500)"), (500, 1000, "TH (<1k)"), (1000, 5000, "MH (<5k)"), (5000, np.inf, "MH+SH/COO (>5k)")]
w = acct[acct["in_charging_file"] & (acct["custody_fee_charged"].fillna(0) < acct["custody_fee_computed"].fillna(0))].copy()
w["waived"] = (w["custody_fee_computed"].fillna(0) - w["custody_fee_charged"].fillna(0)).clip(lower=0)
def _status(row):
if row["no_waiver_segment"]:
return "PB/PB Ex — NOT ALLOWED"
for lo, hi, lab in BANDS:
if lo <= row["waived"] < hi:
return f"S/K/C — needs {lab}"
return "S/K/C — other"
w["compliance"] = w.apply(_status, axis=1)
g = w.groupby("compliance")["waived"].sum().sort_values(ascending=False)
fig, ax = plt.subplots(figsize=(8, 4))
ax.barh(range(len(g)), g.values, color=["#E45756" if "NOT ALLOWED" in i else "#4C78A8" for i in g.index])
ax.set_yticks(range(len(g))); ax.set_yticklabels(g.index, fontsize=8); ax.invert_yaxis()
ax.set_xlabel("foregone revenue (computed − charged; mixed ccy, confirm USD)")
ax.set_title("E16 · Policy compliance — waived revenue by status (PB/PB Ex = violation; S/K/C by approval tier)")
viol = w.loc[w["no_waiver_segment"], "waived"].sum()
print(f" E16: PB/PB Ex no-waiver VIOLATIONS {int(w['no_waiver_segment'].sum()):,} accts · revenue impact {viol:,.0f} (clear)")
for lo, hi, lab in BANDS:
b = w[~w["no_waiver_segment"] & (w["waived"] >= lo) & (w["waived"] < hi)]
if len(b):
print(f" S/K/C waivers needing {lab}: {len(b):,} accts · {b['waived'].sum():,.0f}")
um = acct[acct["under_min_charge"]]
um_short = (150.0 - um["custody_fee_charged"].fillna(0)).clip(lower=0).sum()
print(f" E16: min-charge breaches (in-file, computed ≥ 150, charged < 150): {len(um):,} accts · "
f"shortfall-to-floor {um_short:,.0f} (currency per fee schedule, confirm USD)")
_save(fig, "e16_policy_compliance")
except Exception as e: print("E16 failed:", e)
# ---------- 13. PRICING DISCIPLINE — waived $ by APPROVING MARKET (governance read / OBJ-COMPLIANCE) ----------
# The governance pivot of E16: SAME in-file waiver pool (charged < computed — the discretionary give-aways the
# approval grid governs), but attributed to the APPROVING UNIT, proxied by `market` (≈ the account's market
# head's domain; there's no approver field). Each market's bar is STACKED by the approval tier the waived
# amount REQUIRES — so a market whose pool skews to the >5k / NOT-ALLOWED tiers is a pricing-DISCIPLINE flag
# even when each waiver was individually signed off (compliant ≠ disciplined — Marc 2026-06-19). Kept at
# MARKET level by design: a per-team/RM league table is the most sensitive cut (plan.md) and stays verbal.
# ANONYMISED BY DEFAULT (`Market-NN`, rank order); set DEANON=True for the sponsor-only named mapping. Mixed ccy.
try:
BANDS = [(0, 500, "RM (<500)"), (500, 1000, "TH (<1k)"), (1000, 5000, "MH (<5k)"), (5000, np.inf, "MH+SH/COO (>5k)")]
TIERS = ["PB/PB Ex — NOT ALLOWED", "MH+SH/COO (>5k)", "MH (<5k)", "TH (<1k)", "RM (<500)"] # senior → junior
TIER_COLOR = {"PB/PB Ex — NOT ALLOWED": "#7F1313", "MH+SH/COO (>5k)": "#E45756", "MH (<5k)": "#F58518",
"TH (<1k)": "#54A24B", "RM (<500)": "#4C78A8"}
w = acct[acct["in_charging_file"] & (acct["custody_fee_charged"].fillna(0) < acct["custody_fee_computed"].fillna(0))].copy()
w["waived"] = (w["custody_fee_computed"].fillna(0) - w["custody_fee_charged"].fillna(0)).clip(lower=0)
def _tier(row):
if row["no_waiver_segment"]:
return "PB/PB Ex — NOT ALLOWED"
for lo, hi, lab in BANDS:
if lo <= row["waived"] < hi:
return lab
return "RM (<500)"
w["tier"] = w.apply(_tier, axis=1)
w["market_"] = w["market"].fillna("unknown")
rank = w.groupby("market_")["waived"].sum().sort_values(ascending=False)
top = rank.head(12).index # focus the bars on where the money is
piv = (w[w["market_"].isin(top)].groupby(["market_", "tier"])["waived"].sum()
.unstack("tier").reindex(index=top, columns=TIERS).fillna(0))
labels = list(piv.index) if DEANON else [f"Market-{i:02d}" for i in range(1, len(piv) + 1)] # rank-ordered anon
fig, ax = plt.subplots(figsize=(8.5, 4.8))
left = np.zeros(len(piv))
for t in TIERS:
if t in piv:
ax.barh(range(len(piv)), piv[t].values, left=left, label=t, color=TIER_COLOR[t]); left += piv[t].values
ax.set_yticks(range(len(piv))); ax.set_yticklabels(labels, fontsize=8); ax.invert_yaxis()
ax.set_xlabel("waived custody fee (computed − charged; mixed ccy, confirm USD)")
ax.legend(fontsize=7, ncol=2, loc="lower right"); ax.margins(y=0.01)
ax.set_title("E17 · Waived $ by approving market — pricing-discipline read (anonymised; stacked by required tier)")
tot = rank.sum() or 1
top1 = rank.iloc[0] / tot if len(rank) else 0.0
top3 = rank.head(3).sum() / tot if len(rank) else 0.0
senior = w.loc[w["tier"].isin(["MH+SH/COO (>5k)", "PB/PB Ex — NOT ALLOWED"]), "waived"].sum() / tot
ax.annotate(f"top market ≈ {top1:.0%} · top 3 ≈ {top3:.0%} of waived $ · "
f"{senior:.0%} sits in >5k / not-allowed tiers", xy=(0, -0.18), xycoords="axes fraction",
fontsize=8, color="#B22222")
print(f" E17: {rank.shape[0]:,} markets carry a waived pool · total {tot:,.0f} (mixed ccy) · "
f"top market {top1:.0%} · top 3 {top3:.0%} · {senior:.0%} in >5k/not-allowed tiers")
print(" (anonymised Market-NN by rank — set DEANON=True for the sponsor-only named mapping)")
if DEANON:
for i, (m, v) in enumerate(rank.head(12).items(), 1):
print(f" Market-{i:02d} = {m}: waived {v:,.0f} ({v / tot:.0%})")
_save(fig, "e17_waived_by_market")
except Exception as e: print("E17 failed:", e)
print("\nDONE → figs/ (read the headline numbers above for talking points). "
"All cuts are descriptive/unadjusted; the E8/E10 pools are CANDIDATE pools, not 'should-charge'.")
Last updated: June 22, 2026python