Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 6 additions & 28 deletions src/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6701,12 +6701,13 @@ static bool whisper_vad(
int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
int segment_end_samples = cs_to_samples(vad_segments->data[i].end);

if (i < (int)vad_segments->data.size() - 1) {
segment_end_samples += overlap_samples;
}

segment_start_samples = std::min(segment_start_samples, n_samples - 1);
segment_end_samples = std::min(segment_end_samples, n_samples - 1);
int original_segment_length = segment_end_samples - segment_start_samples;

if (i < (int)vad_segments->data.size() - 1) {
segment_end_samples = std::min(segment_end_samples + overlap_samples, n_samples - 1);
}
int segment_length = segment_end_samples - segment_start_samples;
if (segment_length > 0) {
whisper_state::vad_segment_info segment;
Expand All @@ -6715,7 +6716,7 @@ static bool whisper_vad(
segment.orig_end = vad_segments->data[i].end;

segment.vad_start = samples_to_cs(offset);
segment.vad_end = samples_to_cs(offset + segment_length);
segment.vad_end = samples_to_cs(offset + original_segment_length);

// Add segment boundaries to mapping table
vad_time_mapping start_mapping = {segment.vad_start, segment.orig_start};
Expand All @@ -6724,29 +6725,6 @@ static bool whisper_vad(
state->vad_mapping_table.push_back(start_mapping);
state->vad_mapping_table.push_back(end_mapping);

// Add intermediate points for longer segments to improve interpolation accuracy
const int64_t min_segment_length = 100; // 1 second
const int64_t point_interval = 20; // Add a point every 200ms

if (segment.vad_end - segment.vad_start > min_segment_length) {
int64_t segment_duration = segment.vad_end - segment.vad_start;
int num_points = (int)(segment_duration / point_interval) - 1;

for (int j = 1; j <= num_points; j++) {
int64_t vad_time = segment.vad_start + j * point_interval;

if (vad_time >= segment.vad_end) continue;

int64_t vad_elapsed = vad_time - segment.vad_start;
int64_t vad_total = segment.vad_end - segment.vad_start;
int64_t orig_total = segment.orig_end - segment.orig_start;
int64_t orig_time = segment.orig_start + (vad_elapsed * orig_total) / vad_total;

vad_time_mapping intermediate_mapping = {vad_time, orig_time};
state->vad_mapping_table.push_back(intermediate_mapping);
}
}

WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
__func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
ctx->state->vad_segments.push_back(segment);
Expand Down
Loading