|
| 1 | +"""Generate 5090 freq-cap (clock-locking) efficiency chart from @apnar's data. |
| 2 | +
|
| 3 | +Source data: 2026-05-07 sweep, 1× RTX 5090 air-cooled. |
| 4 | +Engine: vLLM `gemma-mtp` compose + Gemma-4-31B-AutoRound + MTP K=3. |
| 5 | +Methodology: instead of setting power caps via `nvidia-smi -pl`, lock GPU |
| 6 | +SM clock + memory clock pairs via `nvidia-smi -lgc` and `-lmc`. This is a |
| 7 | +workaround for the 5090's 400W power-cap floor (firmware refuses caps below |
| 8 | +400W on this card). Clock-locking lets you reach operating points the |
| 9 | +power-cap-sweep methodology can't access. |
| 10 | +
|
| 11 | +Why this matters: |
| 12 | + Power-cap sweet spot: 400W cap → 571 narr / 701 code TPS, 1.43 TPS/W |
| 13 | + Freq-cap sweet spot: 7001/1635 → 428 narr / 542 code TPS, 211W draw, 2.025 TPS/W |
| 14 | + → 1.42× more efficient at 47% less power, for 25% lower TPS |
| 15 | +
|
| 16 | +The clock-lock methodology surfaces a Pareto improvement over the power-cap |
| 17 | +methodology on this card: 14001/2122 produces MORE TPS (602 vs 571) at LESS |
| 18 | +power (314W vs ~400W), strictly dominating the power-cap sweet spot. |
| 19 | +
|
| 20 | +Data source: https://github.com/noonghunna/club-3090/discussions/86#discussioncomment-16845745 |
| 21 | +""" |
| 22 | +import matplotlib.pyplot as plt |
| 23 | + |
| 24 | +# (mem_MHz, gpu_MHz, narr_TPS, code_TPS, actual_W, eff_TPS_per_W) |
| 25 | +data = [ |
| 26 | + # 405 MHz mem (lowest) — bandwidth-starved at every GPU clock |
| 27 | + (405, 180, 27.65, 34.07, 47.69, 0.580), |
| 28 | + (405, 300, 28.69, 36.79, 53.02, 0.541), |
| 29 | + (405, 412, 29.49, 38.06, 55.59, 0.530), |
| 30 | + (405, 532, 29.65, 37.41, 51.71, 0.573), |
| 31 | + (405, 652, 29.25, 35.19, 57.22, 0.511), |
| 32 | + (405, 765, 52.96, 65.98, 63.18, 0.838), # large jump — kernel threshold? |
| 33 | + (405, 885, 53.20, 68.08, 62.43, 0.852), |
| 34 | + |
| 35 | + # 810 MHz mem (low) — bandwidth ceiling clear above 667 GPU MHz |
| 36 | + (810, 180, 46.44, 58.78, 51.46, 0.902), |
| 37 | + (810, 667, 54.64, 65.50, 58.96, 0.927), |
| 38 | + (810, 1147, 55.25, 68.12, 68.09, 0.811), |
| 39 | + (810, 1635, 54.80, 67.99, 78.33, 0.700), |
| 40 | + (810, 2122, 56.30, 66.73, 87.59, 0.643), |
| 41 | + (810, 2602, 54.93, 67.84, 106.68, 0.515), |
| 42 | + (810, 3090, 54.93, 67.55, 142.80, 0.385), |
| 43 | + |
| 44 | + # 7001 MHz mem (mid) — efficiency knee at GPU 1635 |
| 45 | + (7001, 180, 67.01, 82.64, 65.58, 1.022), |
| 46 | + (7001, 667, 212.69, 265.54, 117.47, 1.811), |
| 47 | + (7001, 1147, 327.81, 408.22, 162.04, 2.023), |
| 48 | + (7001, 1635, 428.19, 541.85, 211.47, 2.025), # ⭐ peak efficiency |
| 49 | + (7001, 2122, 494.95, 605.76, 258.53, 1.914), |
| 50 | + (7001, 2602, 511.01, 626.28, 313.44, 1.630), |
| 51 | + (7001, 3090, 518.82, 657.63, 409.95, 1.266), |
| 52 | + |
| 53 | + # 14001 MHz mem (max) — TPS scales through tested GPU range |
| 54 | + (14001, 180, 66.32, 82.94, 80.31, 0.826), |
| 55 | + (14001, 667, 223.09, 274.49, 132.66, 1.682), |
| 56 | + (14001, 1147, 348.14, 429.73, 186.53, 1.866), |
| 57 | + (14001, 1635, 457.77, 562.38, 242.71, 1.886), |
| 58 | + (14001, 2122, 602.20, 737.58, 313.63, 1.920), # ⭐ Pareto point vs 400W cap |
| 59 | + (14001, 2602, 716.51, 878.75, 417.07, 1.718), |
| 60 | + (14001, 3090, 805.21, 965.95, 558.95, 1.441), |
| 61 | +] |
| 62 | + |
| 63 | +# Group by mem clock for plotting |
| 64 | +mem_tiers = sorted(set(d[0] for d in data)) |
| 65 | + |
| 66 | +plt.rcParams.update({ |
| 67 | + "font.family": "sans-serif", |
| 68 | + "font.size": 11, |
| 69 | + "axes.titlesize": 15, |
| 70 | + "axes.titleweight": "bold", |
| 71 | + "axes.labelsize": 12, |
| 72 | + "figure.facecolor": "white", |
| 73 | + "axes.facecolor": "white", |
| 74 | +}) |
| 75 | + |
| 76 | +fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(11, 9.5), dpi=150, |
| 77 | + gridspec_kw={"height_ratios": [3, 2], "hspace": 0.35}) |
| 78 | + |
| 79 | +mem_colors = { |
| 80 | + 405: "#9e9e9e", |
| 81 | + 810: "#fdae61", |
| 82 | + 7001: "#7b3fa0", |
| 83 | + 14001: "#1f77b4", |
| 84 | +} |
| 85 | +mem_labels = { |
| 86 | + 405: "405 MHz mem (lowest)", |
| 87 | + 810: "810 MHz mem", |
| 88 | + 7001: "7001 MHz mem (50% of max)", |
| 89 | + 14001: "14001 MHz mem (stock max)", |
| 90 | +} |
| 91 | + |
| 92 | +# Top panel: Narrative TPS vs GPU clock, one line per mem tier |
| 93 | +for mem in mem_tiers: |
| 94 | + rows = sorted([d for d in data if d[0] == mem], key=lambda d: d[1]) |
| 95 | + gpus = [d[1] for d in rows] |
| 96 | + narrs = [d[2] for d in rows] |
| 97 | + ax1.plot(gpus, narrs, "o-", color=mem_colors[mem], |
| 98 | + linewidth=2.0, markersize=6, label=mem_labels[mem], zorder=3) |
| 99 | + |
| 100 | +ax1.set_xlabel("GPU SM clock (MHz, locked via -lgc)", fontsize=12) |
| 101 | +ax1.set_ylabel("Narrative TPS (decode-concurrent N=6)", fontsize=12) |
| 102 | +ax1.set_xlim(0, 3200) |
| 103 | +ax1.set_ylim(0, 850) |
| 104 | +ax1.grid(True, alpha=0.3, zorder=0) |
| 105 | +ax1.tick_params(axis="both", labelsize=10) |
| 106 | +ax1.legend(loc="upper left", fontsize=10.5, framealpha=0.95, edgecolor="#ccc", |
| 107 | + title="Memory clock", title_fontsize=10) |
| 108 | +ax1.set_title("RTX 5090 + Gemma-4-31B + vLLM-MTP — freq-cap (clock-lock) sweep", pad=12) |
| 109 | + |
| 110 | +# Sweet spot annotation: 7001 / 1635 |
| 111 | +ax1.axvline(1635, color="goldenrod", linestyle=":", alpha=0.4, linewidth=1.2) |
| 112 | +ax1.scatter([1635], [428.19], s=180, color="goldenrod", marker="*", |
| 113 | + edgecolors="#aa5500", linewidths=1.5, zorder=5) |
| 114 | +ax1.annotate( |
| 115 | + "★ peak efficiency\n7001/1635 → 2.025 TPS/W\n428 narr TPS, 211W draw\n(1.42× more efficient than\nthe 400W power-cap sweet spot)", |
| 116 | + xy=(1635, 428.19), |
| 117 | + xytext=(1700, 35), |
| 118 | + fontsize=9.5, |
| 119 | + fontweight="bold", |
| 120 | + bbox=dict(boxstyle="round,pad=0.35", facecolor="#fff3cd", |
| 121 | + edgecolor="goldenrod", linewidth=1.2), |
| 122 | + arrowprops=dict(arrowstyle="->", color="goldenrod", lw=1.3), |
| 123 | + zorder=5, |
| 124 | +) |
| 125 | + |
| 126 | +# Pareto annotation: 14001 / 2122 |
| 127 | +ax1.scatter([2122], [602.20], s=180, color="#1f77b4", marker="*", |
| 128 | + edgecolors="#0a3d6e", linewidths=1.5, zorder=5) |
| 129 | +ax1.annotate( |
| 130 | + "★ Pareto point\n14001/2122 → 1.92 TPS/W\n602 narr TPS, 314W draw\n(strictly better than\n400W cap: +5% TPS, -22% W)", |
| 131 | + xy=(2122, 602.20), |
| 132 | + xytext=(800, 720), |
| 133 | + fontsize=9.5, |
| 134 | + fontweight="bold", |
| 135 | + bbox=dict(boxstyle="round,pad=0.35", facecolor="#dbeafe", |
| 136 | + edgecolor="#1f77b4", linewidth=1.2), |
| 137 | + arrowprops=dict(arrowstyle="->", color="#1f77b4", lw=1.3), |
| 138 | + zorder=5, |
| 139 | +) |
| 140 | + |
| 141 | +# Bottom panel: efficiency |
| 142 | +for mem in mem_tiers: |
| 143 | + rows = sorted([d for d in data if d[0] == mem], key=lambda d: d[1]) |
| 144 | + gpus = [d[1] for d in rows] |
| 145 | + effs = [d[5] for d in rows] |
| 146 | + ax2.plot(gpus, effs, "^--", color=mem_colors[mem], |
| 147 | + linewidth=1.6, markersize=5, alpha=0.9, zorder=3) |
| 148 | + |
| 149 | +ax2.set_xlabel("GPU SM clock (MHz, locked via -lgc)", fontsize=12) |
| 150 | +ax2.set_ylabel("Efficiency: narrative TPS/W", fontsize=12) |
| 151 | +ax2.set_xlim(0, 3200) |
| 152 | +ax2.set_ylim(0, 2.2) |
| 153 | +ax2.grid(True, alpha=0.3, zorder=0) |
| 154 | +ax2.tick_params(axis="both", labelsize=10) |
| 155 | +ax2.axhline(1.43, color="#888", linestyle=":", alpha=0.6, linewidth=1.2) |
| 156 | +ax2.text(2700, 1.48, "400W power-cap\nsweet spot: 1.43 TPS/W", |
| 157 | + fontsize=9, color="#555", fontstyle="italic", ha="center") |
| 158 | +ax2.scatter([1635], [2.025], s=140, color="goldenrod", marker="*", |
| 159 | + edgecolors="#aa5500", linewidths=1.2, zorder=5) |
| 160 | +ax2.scatter([2122], [1.920], s=140, color="#1f77b4", marker="*", |
| 161 | + edgecolors="#0a3d6e", linewidths=1.2, zorder=5) |
| 162 | +ax2.set_title("Efficiency (TPS/W) — clock-lock breaks past the 400W power-cap floor", |
| 163 | + fontsize=11, pad=8) |
| 164 | + |
| 165 | +# Subtitle / data attribution |
| 166 | +fig.text( |
| 167 | + 0.5, 0.94, |
| 168 | + "1× RTX 5090 air-cooled, vLLM `gemma-mtp` + Gemma-4-31B-AutoRound + MTP K=3, " |
| 169 | + "decode-concurrent N=6, bench-runs=3 | data: @apnar", |
| 170 | + ha="center", fontsize=10, color="#666", |
| 171 | + style="italic", |
| 172 | +) |
| 173 | + |
| 174 | +# Footer |
| 175 | +fig.text( |
| 176 | + 0.99, 0.005, |
| 177 | + "github.com/noonghunna/club-3090", |
| 178 | + ha="right", fontsize=9, color="#888", style="italic", |
| 179 | +) |
| 180 | + |
| 181 | +plt.tight_layout(rect=(0, 0.01, 1, 0.93)) |
| 182 | + |
| 183 | +out = "/tmp/freq_cap_5090_gemma4.png" |
| 184 | +plt.savefig(out, dpi=150, bbox_inches="tight", facecolor="white") |
| 185 | +print(f"Saved: {out}") |
0 commit comments