Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions tests/cov_pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ addopts =
--ignore=tests/layers/test_append_attention.py
--ignore=tests/layers/test_attention.py
--ignore=tests/operators/test_rejection_top_p_sampling.py
--ignore=tests/operators/test_perchannel_gemm.py
--ignore=tests/operators/test_scaled_gemm_f8_i4_f16.py
--ignore=tests/operators/test_topp_sampling.py
--ignore=tests/operators/test_stop_generation.py
Expand All @@ -17,8 +16,3 @@ addopts =
--ignore=tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py
--ignore=tests/graph_optimization/test_cuda_graph_spec_decode
--ignore=tests/layers/test_quant_layer.py
--ignore=tests/operators/test_token_penalty.py
--ignore=tests/operators/test_split_fuse.py
--ignore=tests/operators/test_flash_mask_attn.py
--ignore=tests/operators/test_w4afp8_gemm.py
--ignore=tests/operators/test_tree_mask.py
2 changes: 1 addition & 1 deletion tests/operators/test_tree_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def run_append_c16_attention(self, q_len, kv_len, prefill=False, attn_mask=None)
paddle.device.synchronize()
e_time = time.time()
print(f"mean infer time: {np.mean((e_time - s_time) * 1000 / self.run_time):.2f}")
return out[0].reshape([token_num, self.num_q_head, self.head_dim])
return out.reshape([token_num, self.num_q_head, self.head_dim])

def test_naive_speculative_decoding(self):
prefill_len = 8192
Expand Down
Loading