diff --git a/starter_kit/START_HERE.md b/starter_kit/START_HERE.md new file mode 100644 index 0000000000..6dea0ce4b4 --- /dev/null +++ b/starter_kit/START_HERE.md @@ -0,0 +1,48 @@ +# Parameter Golf Starter Kit + +This folder is a low-budget workflow to get from first run to a valid non-record PR. + +## 1) Fork + set your remote + +From your local repo root: + +```bash +git remote rename origin upstream +git remote add origin https://github.com/YOUR_GITHUB_USERNAME/parameter-golf.git +git fetch upstream +git checkout -b exp/first-runs upstream/main +git push -u origin exp/first-runs +``` + +## 2) On RunPod: first smoke run + +Use scripts in `starter_kit/scripts`: + +1. `01_runpod_bootstrap.sh` +2. `02_smoke_run.sh` + +## 3) Promote to serious run + +Run `03_full_run.sh` once smoke logs look healthy. + +## 4) Prepare a PR-ready records folder + +Run: + +```bash +python starter_kit/scripts/prepare_submission.py \ + --track non-record \ + --run-name my_first_non_record \ + --author-name "Your Name" \ + --github-id "your_github" \ + --val-bpb 1.1999 +``` + +Then copy your real train log into the generated folder and edit README details. + +## 5) Submission checklist + +- Folder only adds one new path under `records/track_non_record_16mb/` or `records/track_10min_16mb/`. +- Includes `README.md`, `submission.json`, `train_gpt.py`, and train log. +- Repro steps are explicit and complete. +- No validation-data leakage or rule violations. diff --git a/starter_kit/notes/EXPERIMENT_LOG_TEMPLATE.md b/starter_kit/notes/EXPERIMENT_LOG_TEMPLATE.md new file mode 100644 index 0000000000..d5803ee6eb --- /dev/null +++ b/starter_kit/notes/EXPERIMENT_LOG_TEMPLATE.md @@ -0,0 +1,32 @@ +# Experiment Log Template + +## Run Metadata + +- run_id: +- date: +- gpu: +- cost_estimate_usd: +- dataset_variant: +- train_shards: +- max_wallclock_seconds: + +## Config Delta + +- base_commit: +- branch: +- changed_hparams: +- changed_code_paths: + +## Outcomes + +- val_loss: +- val_bpb: +- final_int8_zlib_roundtrip_bytes: +- step_count: +- runtime_seconds: + +## Decision + +- keep / drop: +- reason: +- next test: diff --git a/starter_kit/scripts/01_runpod_bootstrap.sh b/starter_kit/scripts/01_runpod_bootstrap.sh new file mode 100644 index 0000000000..db2a49b041 --- /dev/null +++ b/starter_kit/scripts/01_runpod_bootstrap.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# bash starter_kit/scripts/01_runpod_bootstrap.sh https://github.com/YOUR_GITHUB_USERNAME/parameter-golf.git + +FORK_URL="${1:-}" +if [[ -z "$FORK_URL" ]]; then + echo "Provide your fork URL as first arg." + exit 1 +fi + +cd /workspace +if [[ ! -d parameter-golf ]]; then + git clone "$FORK_URL" parameter-golf +fi + +cd parameter-golf +git remote -v + +echo "Downloading small dataset slice for low-cost iteration..." +python3 data/cached_challenge_fineweb.py --variant sp1024 --train-shards 1 + +echo "Bootstrap complete. Run: bash starter_kit/scripts/02_smoke_run.sh" diff --git a/starter_kit/scripts/02_smoke_run.sh b/starter_kit/scripts/02_smoke_run.sh new file mode 100644 index 0000000000..d2a65eb299 --- /dev/null +++ b/starter_kit/scripts/02_smoke_run.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Quick low-cost run (~4 minutes max) +cd /workspace/parameter-golf + +RUN_ID="${RUN_ID:-smoke_sp1024_$(date +%Y%m%d_%H%M%S)}" +export RUN_ID +export DATA_PATH=./data/datasets/fineweb10B_sp1024/ +export TOKENIZER_PATH=./data/tokenizers/fineweb_1024_bpe.model +export VOCAB_SIZE=1024 +export MAX_WALLCLOCK_SECONDS=240 +export VAL_LOSS_EVERY=0 + +mkdir -p logs + +torchrun --standalone --nproc_per_node=1 train_gpt.py | tee "logs/${RUN_ID}.log" + +echo "Smoke run done: logs/${RUN_ID}.log" diff --git a/starter_kit/scripts/03_full_run.sh b/starter_kit/scripts/03_full_run.sh new file mode 100644 index 0000000000..3035395f9b --- /dev/null +++ b/starter_kit/scripts/03_full_run.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Full baseline-style run (~10 minutes) +cd /workspace/parameter-golf + +RUN_ID="${RUN_ID:-full_sp1024_$(date +%Y%m%d_%H%M%S)}" +export RUN_ID +export DATA_PATH=./data/datasets/fineweb10B_sp1024/ +export TOKENIZER_PATH=./data/tokenizers/fineweb_1024_bpe.model +export VOCAB_SIZE=1024 +export MAX_WALLCLOCK_SECONDS=600 +export VAL_LOSS_EVERY=200 + +mkdir -p logs + +torchrun --standalone --nproc_per_node=1 train_gpt.py | tee "logs/${RUN_ID}.log" + +echo "Full run done: logs/${RUN_ID}.log" diff --git a/starter_kit/scripts/prepare_submission.py b/starter_kit/scripts/prepare_submission.py new file mode 100644 index 0000000000..d41943256b --- /dev/null +++ b/starter_kit/scripts/prepare_submission.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +import argparse +import datetime as dt +import json +from pathlib import Path +import shutil + + +def main() -> None: + parser = argparse.ArgumentParser(description="Create a PR-ready records folder.") + parser.add_argument("--track", choices=["record", "non-record"], required=True) + parser.add_argument("--run-name", required=True) + parser.add_argument("--author-name", required=True) + parser.add_argument("--github-id", required=True) + parser.add_argument("--val-bpb", type=float, required=True) + parser.add_argument("--source-train-script", default="train_gpt.py") + args = parser.parse_args() + + repo_root = Path(__file__).resolve().parents[2] + date = dt.datetime.now().strftime("%Y-%m-%d") + slug = f"{date}_{args.run_name}" + + if args.track == "record": + track_dir = repo_root / "records" / "track_10min_16mb" + else: + track_dir = repo_root / "records" / "track_non_record_16mb" + + out_dir = track_dir / slug + out_dir.mkdir(parents=True, exist_ok=False) + + template_dir = repo_root / "starter_kit" / "templates" + readme_tpl = (template_dir / "README_submission_template.md").read_text(encoding="utf-8") + readme = ( + readme_tpl + .replace("{{RUN_NAME}}", args.run_name) + .replace("{{DATE}}", date) + .replace("{{TRACK}}", args.track) + .replace("{{AUTHOR_NAME}}", args.author_name) + .replace("{{GITHUB_ID}}", args.github_id) + .replace("{{VAL_BPB}}", f"{args.val_bpb:.4f}") + ) + (out_dir / "README.md").write_text(readme, encoding="utf-8") + + submission = { + "author_name": args.author_name, + "github_id": args.github_id, + "run_name": args.run_name, + "track": args.track, + "val_bpb": round(args.val_bpb, 4), + "date": date, + "notes": "Fill out details and attach train logs." + } + (out_dir / "submission.json").write_text(json.dumps(submission, indent=2) + "\n", encoding="utf-8") + + source_script = repo_root / args.source_train_script + if not source_script.exists(): + raise FileNotFoundError(f"Could not find train script: {source_script}") + shutil.copy2(source_script, out_dir / "train_gpt.py") + + (out_dir / "train.log").write_text("# Paste or copy real run logs here\n", encoding="utf-8") + + print(f"Created: {out_dir}") + print("Next: copy your actual log into train.log and complete README details.") + + +if __name__ == "__main__": + main() diff --git a/starter_kit/templates/README_submission_template.md b/starter_kit/templates/README_submission_template.md new file mode 100644 index 0000000000..b06bb2d6d6 --- /dev/null +++ b/starter_kit/templates/README_submission_template.md @@ -0,0 +1,37 @@ +# {{RUN_NAME}} + +- Date: {{DATE}} +- Track: {{TRACK}} +- Author: {{AUTHOR_NAME}} ({{GITHUB_ID}}) +- Reported val_bpb: {{VAL_BPB}} + +## Summary + +Short summary of the idea and why it may help. + +## What Changed + +- List architecture changes. +- List optimization and schedule changes. +- List quantization or eval changes. + +## Repro Command + +```bash +RUN_ID={{RUN_NAME}} \ +DATA_PATH=./data/datasets/fineweb10B_sp1024/ \ +TOKENIZER_PATH=./data/tokenizers/fineweb_1024_bpe.model \ +VOCAB_SIZE=1024 \ +torchrun --standalone --nproc_per_node=1 train_gpt.py +``` + +## Results + +- val_bpb: +- val_loss: +- compressed_bytes: +- wallclock_seconds: + +## Notes + +Any caveats, negative findings, or follow-up experiments. diff --git a/starter_kit/templates/submission.json.template b/starter_kit/templates/submission.json.template new file mode 100644 index 0000000000..43550e7f7f --- /dev/null +++ b/starter_kit/templates/submission.json.template @@ -0,0 +1,9 @@ +{ + "author_name": "Your Name", + "github_id": "your_github", + "run_name": "your_run_name", + "track": "non-record", + "val_bpb": 1.2000, + "date": "YYYY-MM-DD", + "notes": "Fill with concise methodology and constraints." +}