We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b1446aa commit 2347ebbCopy full SHA for 2347ebb
jetstream_pt/config.py
@@ -31,6 +31,7 @@
31
flags.DEFINE_string("size", "tiny", "size of model")
32
flags.DEFINE_bool("quantize_kv_cache", False, "kv_cache_quantize")
33
flags.DEFINE_integer("max_cache_length", 1024, "kv_cache_quantize")
34
+flags.DEFINE_integer("max_decode_length", 1024, "max length of generated text")
35
flags.DEFINE_string("sharding_config", "", "config file for sharding")
36
flags.DEFINE_bool(
37
"shard_on_batch",
@@ -173,6 +174,7 @@ def create_engine_from_config_flags():
173
174
batch_size=FLAGS.batch_size,
175
quant_config=quant_config,
176
max_cache_length=FLAGS.max_cache_length,
177
+ max_decode_length=FLAGS.max_decode_length,
178
sharding_config=sharding_file_name,
179
shard_on_batch=FLAGS.shard_on_batch,
180
ragged_mha=FLAGS.ragged_mha,
0 commit comments