@@ -4484,9 +4484,9 @@ static ggml_backend_buffer_type_t select_weight_buft(const whisper_vad_hparams &
44844484static ggml_tensor * whisper_vad_build_encoder_layer (ggml_context* ctx0,
44854485 const whisper_vad_model & model, ggml_tensor * cur) {
44864486 WHISPER_LOG_INFO (" %s: building encoder layer\n " , __func__);
4487- // Reshape from the STFT output which is [258, 1, 1, 1] where are complex
4488- // number pairs. I think we can ignore the imaginary part and just use the
4489- // real part here.
4487+ // Reshape from the STFT output which is [258, 1, 1, 1] where the first
4488+ // dimension are complex number pairs. I think we can ignore the imaginary
4489+ // part and just use the real part here.
44904490 struct ggml_tensor * real_part = ggml_view_1d (ctx0, cur, 129 , 0 );
44914491 struct ggml_tensor * reshaped = ggml_reshape_3d (ctx0, real_part, 1 , 129 , 1 );
44924492
@@ -5021,11 +5021,39 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
50215021 return nullptr ;
50225022 }
50235023
5024- struct ggml_tensor * tensor = model.stft_forward_basis ;
5025- std::vector<float > read_b (ggml_nbytes (tensor));
5026- ggml_backend_tensor_get (tensor, read_b.data (), 0 , ggml_nbytes (tensor));
5027- for (int i = 0 ; i < 10 ; i++) {
5028- WHISPER_LOG_INFO (" %s: stft_forward_basis[%d]: %f\n " , __func__, i, read_b[i]);
5024+ {
5025+ // Print as F32
5026+ struct ggml_tensor * tensor = model.stft_forward_basis ;
5027+ std::vector<float > read_b (ggml_nbytes (tensor));
5028+ ggml_backend_tensor_get (tensor, read_b.data (), 0 , ggml_nbytes (tensor));
5029+ for (int i = 0 ; i < 10 ; i++) {
5030+ WHISPER_LOG_INFO (" %s: stft_forward_basis[%d]: %f\n " , __func__, i, read_b[i]);
5031+ }
5032+ }
5033+
5034+
5035+ {
5036+ // Print as F16
5037+ struct ggml_tensor * tensor = model.encoder_0_weight ;
5038+ std::vector<uint16_t > raw_data (ggml_nbytes (tensor) / sizeof (uint16_t ));
5039+ ggml_backend_tensor_get (tensor, raw_data.data (), 0 , ggml_nbytes (tensor));
5040+
5041+ // Convert first 10 values from F16 to F32 for display
5042+ for (int i = 0 ; i < 10 ; i++) {
5043+ float converted_value = ggml_fp16_to_fp32 (raw_data[i]);
5044+ WHISPER_LOG_INFO (" %s: model.encoder.0.reparam_conv: [%d]: %f (raw: 0x%04x)\n " ,
5045+ __func__, i, converted_value, raw_data[i]);
5046+ }
5047+ }
5048+
5049+ {
5050+ // Print as F32
5051+ struct ggml_tensor * tensor = model.encoder_0_bias ;
5052+ std::vector<float > read_b (ggml_nbytes (tensor));
5053+ ggml_backend_tensor_get (tensor, read_b.data (), 0 , ggml_nbytes (tensor));
5054+ for (int i = 0 ; i < 10 ; i++) {
5055+ WHISPER_LOG_INFO (" %s: encoder_0_bias: [%d]: %f\n " , __func__, i, read_b[i]);
5056+ }
50295057 }
50305058 }
50315059
@@ -5122,7 +5150,7 @@ struct whisper_vad_segments whisper_vad_detect_speech(
51225150 }
51235151 WHISPER_LOG_INFO (" %s: finished processing %d samples\n " , __func__, n_samples);
51245152 for (int i = 0 ; i < probs.size (); i++) {
5125- WHISPER_LOG_INFO (" %s: prob[%d]: %f\n " , __func__, i, probs[i]);
5153+ // WHISPER_LOG_INFO("%s: prob[%d]: %f\n", __func__, i, probs[i]);
51265154 }
51275155
51285156 segments.n_segments = n_frames;
0 commit comments