diff --git a/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp b/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp
index c34713ba..c17a7110 100644
--- a/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp
+++ b/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp
@@ -949,7 +949,7 @@ struct Sm100FmhaBwdKernelTmaWarpSpecialized {
       TensorC const& coord,
       TensorShape const& tensor_shape) {
 
-    // TODO: Performance of FlashMLA on sm90 is dropped with latest cutlass, so here revert the to the old version.
+    // TODO: Performance of FlashMLA on sm90 is dropped with latest cutlass, so here revert to the old version.
     // Tensor preds = cute::lazy::transform(coord, [&](auto const& c) { return elem_less(c, tensor_shape); });
 
     auto copy_op = make_cotiled_copy(
diff --git a/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp b/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp
index c25d638d..d45d2af7 100644
--- a/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp
+++ b/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp
@@ -954,7 +954,7 @@ struct Sm100FmhaBwdMlaKernelTmaWarpSpecialized {
       TensorC const& coord,
       TensorShape const& tensor_shape) {
   
-    // TODO: Performance of FlashMLA on sm90 is dropped with latest cutlass, so here revert the to the old version.
+    // TODO: Performance of FlashMLA on sm90 is dropped with latest cutlass, so here revert to the old version.
     // Tensor preds = cute::lazy::transform(coord, [&](auto const& c) { return elem_less(c, tensor_shape); });
 
     auto copy_op = make_cotiled_copy(