We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8110cb8 commit db53912Copy full SHA for db53912
1 file changed
records/track_10min_16mb/2026-04-05_11L_LatentMaskTTT_GPTQ_ProductKey_Brotli/train_gpt.py
@@ -16,7 +16,7 @@
16
import torch.nn.functional as F
17
from torch import Tensor, nn
18
from torch.nn.parallel import DistributedDataParallel as DDP
19
-from flash_attn.flash_attn_interface import flash_attn_func as _fa3_func
+from flash_attn_interface import flash_attn_func as _fa3_func
20
def flash_attn_3_func(q, k, v, causal=True):
21
return _fa3_func(q, k, v, causal=causal)
22
class Hyperparameters:
0 commit comments