vad : remove unused variable temp_end

danbev · danbev · commit 3fd945c0cc0e · 2025-04-09T13:47:06.000+02:00
diff --git a/src/whisper.cpp b/src/whisper.cpp
@@ -4438,7 +4438,6 @@ struct whisper_vad_context {
     bool triggered;
     std::vector<float> context_buffer;
     unsigned int current_sample;
-    unsigned int temp_end;
 
     std::vector<whisper_vad_segment> detected_segments;
 
@@ -4478,13 +4477,10 @@ static ggml_backend_buffer_type_t select_weight_buft(const whisper_vad_hparams &
 
 static ggml_tensor * whisper_vad_build_stft_layer(ggml_context* ctx0,
         const whisper_vad_model & model, ggml_tensor * cur) {
-    struct ggml_tensor * padded_frame = ggml_pad(ctx0, cur, 64, 0, 0, 0);
-    struct ggml_tensor * reshaped_frame = ggml_reshape_3d(ctx0, padded_frame, 640, 1, 1);
-
     // We need the stft tensor to be in {258, 1, 256},
     // that is a kernel size of 258, 1 channel, and 256 frequency bins (output)
-    struct ggml_tensor * reshaped_stft = ggml_reshape_3d(ctx0, model.stft_forward_basis, 258, 1, 256);
-    cur = ggml_conv_1d(ctx0, reshaped_stft, reshaped_frame, 1, 1, 1);
+    struct ggml_tensor * stft_reshaped = ggml_reshape_3d(ctx0, model.stft_forward_basis, 258, 1, 256);
+    cur = ggml_conv_1d(ctx0, stft_reshaped, cur, 1, 1, 1);
     ggml_set_name(cur, "stft");
     ggml_set_output(cur);
     return cur;
@@ -4739,7 +4735,6 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
     vctx->triggered = false;
     vctx->context_buffer.resize(vctx->context_samples, 0.0f);
     vctx->current_sample = 0;
-    vctx->temp_end = 0;
 
     auto & model = vctx->model;
     auto & hparams = model.hparams;
@@ -5215,7 +5210,6 @@ struct whisper_vad_segments whisper_vad_detect_speech(
     // Reset state for this detection
     vctx->triggered = false;
     vctx->current_sample = 0;
-    vctx->temp_end = 0;
     std::fill(vctx->context_buffer.begin(), vctx->context_buffer.end(), 0.0f);
     vctx->detected_segments.clear();
 
@@ -5227,12 +5221,11 @@ struct whisper_vad_segments whisper_vad_detect_speech(
     }
 
     std::vector<float> window_with_context(vctx->effective_window_size);
-    WHISPER_LOG_INFO("%s: window_with_context.size() = %zu\n", __func__, window_with_context.size());
     WHISPER_LOG_INFO("%s: window_sample_size: %u\n", __func__, vctx->window_size_samples);
     WHISPER_LOG_INFO("%s: context_sample_size: %u\n", __func__, vctx->context_samples);
+    WHISPER_LOG_INFO("%s: window_with_context: %zu\n", __func__, window_with_context.size());
     WHISPER_LOG_INFO("%s: effective_window_size: %u\n", __func__, vctx->effective_window_size);
 
-    whisper_vad_segment current_segment = {-1.0f, -1.0f};
     struct ggml_tensor * frame = ggml_graph_get_tensor(gf, "frame");
     struct ggml_tensor * c_out = ggml_graph_get_tensor(gf, "c_out");
     struct ggml_tensor * h_out = ggml_graph_get_tensor(gf, "h_out");
@@ -5256,7 +5249,7 @@ struct whisper_vad_segments whisper_vad_detect_speech(
         if (i + vctx->window_size_samples > n_samples) {
             break;
         }
-        // Copy the previous context buffer into the next window to be processed next
+        // Copy the previous context buffer into the next window to be processed next.
         // context_buffer contains the 64 samples from the previous window and this is
         // part of the overlapping windows to avoid spectral leakage.
         std::copy(vctx->context_buffer.begin(), vctx->context_buffer.end(), window_with_context.begin());