Skip to content

Commit 2c78888

Browse files
committed
vad : add logging for weights to compare with org model
1 parent a006275 commit 2c78888

File tree

1 file changed

+37
-9
lines changed

1 file changed

+37
-9
lines changed

src/whisper.cpp

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4484,9 +4484,9 @@ static ggml_backend_buffer_type_t select_weight_buft(const whisper_vad_hparams &
44844484
static ggml_tensor * whisper_vad_build_encoder_layer(ggml_context* ctx0,
44854485
const whisper_vad_model & model, ggml_tensor * cur) {
44864486
WHISPER_LOG_INFO("%s: building encoder layer\n", __func__);
4487-
// Reshape from the STFT output which is [258, 1, 1, 1] where are complex
4488-
// number pairs. I think we can ignore the imaginary part and just use the
4489-
// real part here.
4487+
// Reshape from the STFT output which is [258, 1, 1, 1] where the first
4488+
// dimension are complex number pairs. I think we can ignore the imaginary
4489+
// part and just use the real part here.
44904490
struct ggml_tensor * real_part = ggml_view_1d(ctx0, cur, 129, 0);
44914491
struct ggml_tensor * reshaped = ggml_reshape_3d(ctx0, real_part, 1, 129, 1);
44924492

@@ -5021,11 +5021,39 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
50215021
return nullptr;
50225022
}
50235023

5024-
struct ggml_tensor * tensor = model.stft_forward_basis;
5025-
std::vector<float> read_b(ggml_nbytes(tensor));
5026-
ggml_backend_tensor_get(tensor, read_b.data(), 0, ggml_nbytes(tensor));
5027-
for (int i = 0; i < 10; i++) {
5028-
WHISPER_LOG_INFO("%s: stft_forward_basis[%d]: %f\n", __func__, i, read_b[i]);
5024+
{
5025+
// Print as F32
5026+
struct ggml_tensor * tensor = model.stft_forward_basis;
5027+
std::vector<float> read_b(ggml_nbytes(tensor));
5028+
ggml_backend_tensor_get(tensor, read_b.data(), 0, ggml_nbytes(tensor));
5029+
for (int i = 0; i < 10; i++) {
5030+
WHISPER_LOG_INFO("%s: stft_forward_basis[%d]: %f\n", __func__, i, read_b[i]);
5031+
}
5032+
}
5033+
5034+
5035+
{
5036+
// Print as F16
5037+
struct ggml_tensor * tensor = model.encoder_0_weight;
5038+
std::vector<uint16_t> raw_data(ggml_nbytes(tensor) / sizeof(uint16_t));
5039+
ggml_backend_tensor_get(tensor, raw_data.data(), 0, ggml_nbytes(tensor));
5040+
5041+
// Convert first 10 values from F16 to F32 for display
5042+
for (int i = 0; i < 10; i++) {
5043+
float converted_value = ggml_fp16_to_fp32(raw_data[i]);
5044+
WHISPER_LOG_INFO("%s: model.encoder.0.reparam_conv: [%d]: %f (raw: 0x%04x)\n",
5045+
__func__, i, converted_value, raw_data[i]);
5046+
}
5047+
}
5048+
5049+
{
5050+
// Print as F32
5051+
struct ggml_tensor * tensor = model.encoder_0_bias;
5052+
std::vector<float> read_b(ggml_nbytes(tensor));
5053+
ggml_backend_tensor_get(tensor, read_b.data(), 0, ggml_nbytes(tensor));
5054+
for (int i = 0; i < 10; i++) {
5055+
WHISPER_LOG_INFO("%s: encoder_0_bias: [%d]: %f\n", __func__, i, read_b[i]);
5056+
}
50295057
}
50305058
}
50315059

@@ -5122,7 +5150,7 @@ struct whisper_vad_segments whisper_vad_detect_speech(
51225150
}
51235151
WHISPER_LOG_INFO("%s: finished processing %d samples\n", __func__, n_samples);
51245152
for (int i = 0; i < probs.size(); i++) {
5125-
WHISPER_LOG_INFO("%s: prob[%d]: %f\n", __func__, i, probs[i]);
5153+
//WHISPER_LOG_INFO("%s: prob[%d]: %f\n", __func__, i, probs[i]);
51265154
}
51275155

51285156
segments.n_segments = n_frames;

0 commit comments

Comments
 (0)