Skip to content

Commit 497091d

Browse files
committed
V21: COMPLETE PR openai#1855 stack + AWQ-lite + AsymLogit (CRITICAL FIX)
V19c/V20 ran with FUNDAMENTALLY WRONG base config: - smear_gate_enabled: False (PR openai#1855 needs True) - sparse_attn_gate_enabled: False (PR openai#1855 needs True) - num_phases: 1 (PR openai#1855 needs 3) - compressor: brotli (PR openai#1855 needs pergroup with lrzip) - embed_bits: 8 (PR openai#1855 needs 7) - 11+ other hparams default-not-PR1855 Hence V19c/V20 artifacts hit 16.93 MB (over 16 MB cap, INVALID submission) and TTT recovery was 1-phase only, severely handicapped. V21 = exact PR openai#1855 README reproduction command env vars + AWQ-lite (PR openai#1908) + ASYM_LOGIT_RESCALE=1 (V19 innovation, V19c proved -0.001/-0.002 BPB benefit). Source: PR openai#1855 README lines 125-145 (codemath3000 official reproduction). Predicted (seed 42): pre-quant: ~1.064 (matches PR openai#1908 1.06384) quantized: ~1.072 (matches PR openai#1908 1.07226) artifact: ~15.99 MB (lrzip pergroup compression + EMBED_BITS=7) post-TTT: ~1.057 (PR openai#1908 1.05957 - 0.002 from AsymLogit) Win threshold: < 1.06021 Probability: 50-60% real frontier break Pre-req: apt-get install lrzip on RunPod pod (handled in setup script)
1 parent cab7fe0 commit 497091d

1 file changed

Lines changed: 98 additions & 0 deletions

File tree

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
# V21 = FULL PR #1855 9-hp stack + PR #1908 AWQ-lite + V19 ASYM_LOGIT_RESCALE
3+
# This is the FIRST version with the COMPLETE PR #1855 reproduction env vars.
4+
# V18/V19c/V20 all ran with SmearGate=False, SparseAttnGate=False, num_phases=1 -> WRONG BASE.
5+
# Source: PR #1855 README lines 125-145 (codemath3000's exact reproduction command).
6+
#
7+
# Predicted (seed 42, FORCE_STOP_STEP=4945 for direct PR #1908 comparison):
8+
# pre-quant val_bpb: ~1.064 (matching PR #1908 1.06384)
9+
# quantized val_bpb: ~1.072 (matching PR #1908 1.07226)
10+
# artifact size: ~15.99 MB (lrzip pergroup compression)
11+
# post-TTT val_bpb: ~1.057 (PR #1908 1.05957 - 0.002 from AsymLogit)
12+
# total eval time: ~485s (3-phase TTT slightly slower than 1-phase)
13+
#
14+
# Win threshold: < 1.06021
15+
# Probability of true single-seed win vs frontier: 50-60%
16+
set -e
17+
18+
cd /workspace/parameter-golf/records/track_10min_16mb/2026-04-30_V19_PR1908_AsymLogit_WD2/
19+
20+
echo "===================================================="
21+
echo " V21 scout: FULL PR #1855 stack + AWQ-lite + AsymLogit"
22+
echo " Seed 42 + FORCE_STOP_STEP=4945 Start: $(date)"
23+
echo "===================================================="
24+
25+
# COMPLETE env var set from PR #1855 README + PR #1908 AWQ-lite + V19 ASYM_LOGIT_RESCALE
26+
ENV_VARS="DATA_DIR=/workspace/caseops_data/datasets/ \
27+
DATA_PATH=/workspace/caseops_data/datasets/datasets/fineweb10B_sp8192_lossless_caps_caseops_v1_reserved \
28+
TOKENIZER_PATH=/workspace/caseops_data/datasets/tokenizers/fineweb_8192_bpe_lossless_caps_caseops_v1_reserved.model \
29+
CASEOPS_ENABLED=1 \
30+
VOCAB_SIZE=8192 \
31+
ITERATIONS=20000 \
32+
MAX_WALLCLOCK_SECONDS=600 \
33+
WARMUP_STEPS=20 \
34+
WARMDOWN_FRAC=0.85 \
35+
BETA2=0.99 \
36+
GRAD_CLIP_NORM=0.3 \
37+
MIN_LR=0.1 \
38+
MATRIX_LR=0.026 \
39+
GLOBAL_TTT_MOMENTUM=0.9 \
40+
SPARSE_ATTN_GATE_ENABLED=1 \
41+
SPARSE_ATTN_GATE_SCALE=0.5 \
42+
SMEAR_GATE_ENABLED=1 \
43+
GATE_WINDOW=12 \
44+
GATED_ATTN_QUANT_GATE=1 \
45+
FUSED_CE_ENABLED=1 \
46+
EMBED_BITS=7 \
47+
MLP_CLIP_SIGMAS=11.5 \
48+
ATTN_CLIP_SIGMAS=13.0 \
49+
EMBED_CLIP_SIGMAS=14.0 \
50+
GPTQ_RESERVE_SECONDS=0.5 \
51+
GPTQ_CALIBRATION_BATCHES=16 \
52+
COMPRESSOR=pergroup \
53+
LQER_ENABLED=1 \
54+
LQER_ASYM_ENABLED=1 \
55+
LQER_RANK=4 \
56+
LQER_FACTOR_BITS=4 \
57+
LQER_ASYM_GROUP=64 \
58+
LQER_TOP_K=3 \
59+
AWQ_LITE_ENABLED=1 \
60+
AWQ_LITE_BITS=8 \
61+
AWQ_LITE_GROUP_TOP_K=1 \
62+
AWQ_LITE_GROUP_SIZE=64 \
63+
PHASED_TTT_ENABLED=1 \
64+
PHASED_TTT_PREFIX_DOCS=2500 \
65+
PHASED_TTT_NUM_PHASES=3 \
66+
TTT_CHUNK_SIZE=48 \
67+
TTT_BETA2=0.99 \
68+
TTT_WEIGHT_DECAY=0.5 \
69+
TTT_LORA_RANK=80 \
70+
MUON_BACKEND_STEPS=5 \
71+
NCCL_NET=Socket \
72+
VAL_LOSS_EVERY=0 \
73+
ASYM_LOGIT_RESCALE=1 \
74+
FORCE_STOP_STEP=4945"
75+
76+
env SEED=42 $ENV_VARS \
77+
torchrun --standalone --nproc_per_node=8 train_gpt.py \
78+
> /workspace/scout_v21_seed42.log 2>&1
79+
80+
cp final_model.int6.ptz /workspace/v21_seed42_model.int6.ptz 2>/dev/null || true
81+
cp /workspace/scout_v21_seed42.log /workspace/v21_seed42_FULL.log 2>/dev/null || true
82+
83+
echo ""
84+
echo "===================================================="
85+
echo " V21 scout DONE $(date)"
86+
echo "===================================================="
87+
grep -E "stopping_early|train_time|quantized_ttt_phased|val_bpb|total_eval_time|Total submission|smear_gate_enabled|sparse_attn_gate_enabled|num_phases|compressor" /workspace/scout_v21_seed42.log | tail -20
88+
echo ""
89+
echo "DECISION RULE:"
90+
echo " PR #1908 reported 3-seed mean: 1.06081"
91+
echo " community merge floor: 0.0006 BPB"
92+
echo " win threshold: < 1.06021"
93+
echo " artifact cap: < 16,000,000 bytes"
94+
echo ""
95+
echo " if V21 quantized_ttt_phased < 1.058 AND artifact < 16M -> CLEAR WIN, run 3-seed"
96+
echo " if V21 quantized_ttt_phased 1.058-1.060 -> WIN, run 3-seed"
97+
echo " if artifact > 16M -> SIZE FAIL (debug compressor)"
98+
echo " if quantized_ttt_phased > 1.062 -> abandon"

0 commit comments

Comments
 (0)