diff --git a/README.md b/README.md
index 1110b09a..2210aa22 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,6 @@ addmm_bench.run()
 
 We depend on the following projects as a source of customized Triton or CUTLASS kernels:
 
-* (CUDA, HIP) [kernels](https://github.com/triton-lang/kernels)
 * (CUDA, HIP) [generative-recommenders](https://github.com/facebookresearch/generative-recommenders)
 * (CUDA, HIP) [Liger-Kernel](https://github.com/linkedin/Liger-Kernel)
 * (CUDA, HIP) [tilelang](https://github.com/tile-ai/tilelang)
diff --git a/test/test_gpu/skip_tests_h100_pytorch.yaml b/test/test_gpu/skip_tests_h100_pytorch.yaml
index d316eea6..418828df 100644
--- a/test/test_gpu/skip_tests_h100_pytorch.yaml
+++ b/test/test_gpu/skip_tests_h100_pytorch.yaml
@@ -5,8 +5,6 @@
 #  op-name: to skip an entire operator
 #  op-name:\n\t- impl-name to skip an impl
 flash_attention:
-  # thunderkittens cannot handle the default input shapes
-  - tk
   # tma API changed in upstream
   - triton_tutorial_flash_v2_tma
   # triton_tutorial_*_ws kernels require triton-main
diff --git a/test/test_gpu/skip_tests_h100_triton_main.yaml b/test/test_gpu/skip_tests_h100_triton_main.yaml
index b2513664..73b35ef1 100644
--- a/test/test_gpu/skip_tests_h100_triton_main.yaml
+++ b/test/test_gpu/skip_tests_h100_triton_main.yaml
@@ -5,8 +5,6 @@
 #  op-name: to skip an entire operator
 #  op-name:\n\t- impl-name to skip an impl
 flash_attention:
-  # thunderkittens cannot handle the default input shapes
-  - tk
   # _ws kernels require Triton with warp specialization
   - triton_tutorial_flash_v2_ws
   - triton_tutorial_flash_v2_tma_ws