lints

qihqi · qihqi · commit 983699840aa2 · 2024-09-11T00:41:21.000Z
diff --git a/.github/workflows/offline_perf.yaml b/.github/workflows/offline_perf.yaml
@@ -51,7 +51,7 @@ jobs:
         set -euo pipefail
         source venv/bin/activate 
         JAX_PLATFORMS=tpu,cpu python -m jetstream_pt.cli benchmark_offline --model_id meta-llama/Meta-Llama-3-8B-Instruct --quantize_weights=0 --override_batch_size=128 --benchmark_save_offline_result_to_file=result.md --internal_use_random_weights=True --hf_token=$HF_TOKEN
-        cat result.md | ./jq-linux-amd64 > output.txt
+        cat result.md | ./jq-linux-amd64 -Rsa . > output.txt
     - name: Update result to PR
       env: 
         URL: ${{ github.event.pull_request.comments_url }}
diff --git a/benchmarks/run_offline.py b/benchmarks/run_offline.py
@@ -93,7 +93,7 @@ def main(argv):
   decode_state = engine.init_decode_state()
   profiler_started = False
   for exp in range(4, 11):
-    batch = 2 ** exp
+    batch = 2**exp
     runtime, decode_state, profiler_started = run_prefill_time(
         engine, params, decode_state, batch, profiler_started
     )
diff --git a/jetstream_pt/cli.py b/jetstream_pt/cli.py
@@ -27,7 +27,11 @@
 flags.DEFINE_integer("max_output_length", 1024, "The batch size")
 flags.DEFINE_integer("port", 9000, "port to listen on")
 flags.DEFINE_integer("threads", 64, "number of worker threads in thread pool")
-flags.DEFINE_string("benchmark_save_offline_result_to_file", "", "if set, then save the result to the given file name")
+flags.DEFINE_string(
+    "benchmark_save_offline_result_to_file",
+    "",
+    "if set, then save the result to the given file name",
+)
 
 
 def shard_weights(env, weights, weight_shardings):
@@ -115,21 +119,24 @@ def _check_model_id():
     list_model()
     sys.exit(1)
 
-def _run_prefill_time(engine, params, decode_state, seqlen, profiler_started):
+
+def _run_prefill_time(
+    pt_engine, params, decode_state, seqlen, profiler_started
+):
   """Run prefill and measure time."""
-  metadata = engine.get_tokenizer()
-  tokenizer = engine.build_tokenizer(metadata)
+  metadata = pt_engine.get_tokenizer()
+  tokenizer = pt_engine.build_tokenizer(metadata)
 
   text = "This is a beautiful day"
   tokens, true_length = tokenizer.encode(
       text, is_bos=True, prefill_lengths=[seqlen]
   )
 
   for _ in range(3):
-    prefill_result, _ = engine.prefill(
+    prefill_result, _ = pt_engine.prefill(
         params=params, padded_tokens=tokens, true_length=true_length
     )
-    decode_state = engine.insert(
+    decode_state = pt_engine.insert(
         prefill_result, decode_state, slot=jnp.int32(1)
     )
 
@@ -140,10 +147,10 @@ def _run_prefill_time(engine, params, decode_state, seqlen, profiler_started):
       jax.profiler.start_trace(FLAGS.profiling_output)
       profiler_started = True
 
-    prefill_result, _ = engine.prefill(
+    prefill_result, _ = pt_engine.prefill(
         params=params, padded_tokens=tokens, true_length=true_length
     )
-    decode_state = engine.insert(
+    decode_state = pt_engine.insert(
         prefill_result, decode_state, slot=jnp.int32(i)
     )
   jax.block_until_ready(decode_state)
@@ -244,25 +251,25 @@ def interactive():
     print("---- All output text.")
     print(tokenizer.decode(sampled_tokens_list))
 
+
 def _save_benchmark_to_file(filename, prefill_times_ms, decode_time_ms):
-  lines = [
-    " # Offline benchmark numbers",
-    " ## Model: " + FLAGS.model_id,
-    " ## Batch size: {}".format(FLAGS.override_batch_size),
-    " ## Quantize: {}".format(FLAGS.quantize_weights),
-    " |       | time (ms) |",
-    " |-------|-----------|",
-  ] + [
-    "| Prefill {} | {} |".format(x, y) for x, y in prefill_times_ms.items()
-  ] + [
-    "| Decode | {} |".format(decode_time_ms)
-  ]
-  with open(filename, 'w') as f:
-    f.write('\n'.join(lines))
+  lines = (
+      [
+          " # Offline benchmark numbers",
+          " ## Model: " + FLAGS.model_id,
+          f" ## Batch size: {FLAGS.override_batch_size}",
+          f" ## Quantize: {FLAGS.quantize_weights}",
+          " |       | time (ms) |",
+          " |-------|-----------|",
+      ]
+      + [f"| Prefill {x} | {y} |" for x, y in prefill_times_ms.items()]
+      + [f"| Decode | {decode_time_ms} |"]
+  )
+  with open(filename, "w", encoding="utf-8") as f:
+    f.write("\n".join(lines))
     f.flush()
 
 
-
 def benchmark_offline():
   """function to run engine offline."""
   _check_model_id()
@@ -280,7 +287,7 @@ def benchmark_offline():
   profiler_started = False
   # 16 .. 1024
   for exp in range(4, 11):
-    batch = 2 ** exp
+    batch = 2**exp
     runtime, decode_state, profiler_started = _run_prefill_time(
         pt_engine, params, decode_state, batch, profiler_started
     )
@@ -333,13 +340,12 @@ def benchmark_offline():
 
   if FLAGS.benchmark_save_offline_result_to_file:
     _save_benchmark_to_file(
-      FLAGS.benchmark_save_offline_result_to_file,
-      prefill_times_ms,
-      decode_time_ms
+        FLAGS.benchmark_save_offline_result_to_file,
+        prefill_times_ms,
+        decode_time_ms,
     )
 
 
-
 def main():
   """Main function."""
 
diff --git a/jetstream_pt/fetch_models.py b/jetstream_pt/fetch_models.py
@@ -23,7 +23,11 @@
     "Directory to store downloaded/converted weights",
 )
 flags.DEFINE_string("hf_token", "", "huggingface token")
-flags.DEFINE_bool("internal_use_random_weights", False, "Use random weights instead of HF weights. Testing only.")
+flags.DEFINE_bool(
+    "internal_use_random_weights",
+    False,
+    "Use random weights instead of HF weights. Testing only.",
+)
 
 flags.DEFINE_integer(
     "override_max_cache_length",
@@ -158,10 +162,11 @@ def _load_weights(directory):
   # Load the state_dict into the model
   return state_dict
 
+
 def _make_random_model_weights(model):
   result = {}
   for key, val in model.state_dict().items():
-    new_weights = torch.rand(val.shape, dtype=val.dtype, device='cpu')
+    new_weights = torch.rand(val.shape, dtype=val.dtype, device="cpu")
     result[key] = new_weights
   return result
 
@@ -172,8 +177,9 @@ def instantiate_model_from_repo_id(
 ):
   """Create model instance by hf model id.+"""
   model_dir = _hf_dir(repo_id)
-  if not FLAGS.internal_use_random_weights and (not os.path.exists(model_dir) or 
-      not os.listdir(model_dir)):
+  if not FLAGS.internal_use_random_weights and (
+      not os.path.exists(model_dir) or not os.listdir(model_dir)
+  ):
     # no weights has been downloaded
     _hf_download(repo_id, model_dir, FLAGS.hf_token)
   model_info = model_id_to_class.get(repo_id)

Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ def main(argv):`
`93`	`93`	`decode_state = engine.init_decode_state()`
`94`	`94`	`profiler_started = False`
`95`	`95`	`for exp in range(4, 11):`
`96`		`- batch = 2 ** exp`
	`96`	`+ batch = 2**exp`
`97`	`97`	`runtime, decode_state, profiler_started = run_prefill_time(`
`98`	`98`	`engine, params, decode_state, batch, profiler_started`
`99`	`99`	`)`