AI-Hypercomputer
diff --git a/‎benchmarks/mixtral_offline.sh
Lines changed: 20 additions & 0 deletions b/‎benchmarks/mixtral_offline.sh
Lines changed: 20 additions & 0 deletions
diff --git a/‎benchmarks/run_offline.py
Lines changed: 12 additions & 2 deletions b/‎benchmarks/run_offline.py
Lines changed: 12 additions & 2 deletions
diff --git a/‎mlperf/README.md
Lines changed: 31 additions & 0 deletions b/‎mlperf/README.md
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,20 @@
+CACHE_LENGTH=1024
+INPUT_SIZE=512
+OUTPUT_SIZE=1024
+BATCH_SIZE=512
+CHECKPOINT_PATH=mlperf/data/mixtral-instruct-quantized/
+
+pushd ..
+python -m benchmarks.run_offline \
+  --model_name=mixtral \
+  --batch_size=$BATCH_SIZE \
+  --max_cache_length=$CACHE_LENGTH \
+  --max_decode_length=$OUTPUT_SIZE \
+  --context_length=$INPUT_SIZE \
+  --checkpoint_path=$CHECKPOINT_PATH/model.safetensors \
+  --tokenizer_path=$CHECKPOINT_PATH/tokenizer.model \
+  --quantize_weights=1 \
+  --quantize_type=int8_per_channel \
+  --quantize_kv_cache=1 \
+  --profiling_output=/mnt/disks/hanq/mixtral-profiles
+popd
@@ -127,10 +127,20 @@ def main(argv):
     jax.profiler.stop_trace()
 
   print("prefill ", prefill_times)
-  print("decode", sum(dec_times) / 10)
+  avg_decode_times = sum(dec_times[2:]) / len(dec_times[2:])
+  print("decode", avg_decode_times)
 
   prefill_times_ms = {k: v * 1000 for k, v in prefill_times.items()}
-  decode_time_ms = sum(dec_times) * 1000 / 10 / FLAGS.batch_size
+  decode_time_ms = sum(dec_times[2:]) * 1000 / 8
+
+  largest_prefill = max(prefill_times.items())
+  print("MAX tokens:", FLAGS.batch_size / avg_decode_times)
+
+  time2 = (FLAGS.batch_size * FLAGS.max_decode_length) / (
+      FLAGS.batch_size * largest_prefill[1]
+      + FLAGS.max_decode_length * avg_decode_times
+  )
+  print("MAX tokens 2:", time2)
 
   sharegpt_path = FLAGS.sharegpt_path
   if sharegpt_path:
 
@@ -0,0 +1,31 @@
+# Run MLPerf tests
+
+NOTE: currently only tried with mixtral;
+and only tried with offline benchmark
+
+# How to run
+
+### 1. Install 
+
+```
+./install.sh
+```
+
+### 2. Start server
+
+```
+./start_server.sh
+```
+
+### 3. Warm up the server
+
+```
+python warmup.py
+```
+
+### 4. Run the benchmark, now it runs offline mode
+
+```
+./benchmark_run.sh
+```
+