@@ -251,7 +251,7 @@ static std::vector<std::string> get_words(const std::string &txt) {
251251
252252// command-list mode
253253// guide the transcription to match the most likely command from a provided list
254- static int process_command_list (struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
254+ static int process_command_list (struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::ofstream &fout ) {
255255 fprintf (stderr, " \n " );
256256 fprintf (stderr, " %s: guided mode\n " , __func__);
257257
@@ -444,12 +444,16 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
444444
445445 const float prob = probs_id[0 ].first ;
446446 const int index = probs_id[0 ].second ;
447+ const char * best_command = allowed_commands[index].c_str ();
447448
448449 fprintf (stdout, " \n " );
449450 fprintf (stdout, " %s: detected command: %s%s%s | p = %f | t = %d ms\n " , __func__,
450- " \033 [1m" , allowed_commands[index]. c_str () , " \033 [0m" , prob,
451+ " \033 [1m" , best_command , " \033 [0m" , prob,
451452 (int ) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count ());
452453 fprintf (stdout, " \n " );
454+ if (fout.is_open ()) {
455+ fout << best_command << std::endl;
456+ }
453457 }
454458 }
455459
@@ -462,7 +466,7 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
462466
463467// always-prompt mode
464468// transcribe the voice into text after valid prompt
465- static int always_prompt_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
469+ static int always_prompt_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout ) {
466470 bool is_running = true ;
467471 bool ask_prompt = true ;
468472
@@ -528,6 +532,9 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
528532
529533 if ((sim > 0 .7f ) && (command.size () > 0 )) {
530534 fprintf (stdout, " %s: Command '%s%s%s', (t = %d ms)\n " , __func__, " \033 [1m" , command.c_str (), " \033 [0m" , (int ) t_ms);
535+ if (fout.is_open ()) {
536+ fout << command << std::endl;
537+ }
531538 }
532539
533540 fprintf (stdout, " \n " );
@@ -542,7 +549,7 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
542549
543550// general-purpose mode
544551// freely transcribe the voice into text
545- static int process_general_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
552+ static int process_general_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout ) {
546553 bool is_running = true ;
547554 bool have_prompt = false ;
548555 bool ask_prompt = true ;
@@ -662,8 +669,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
662669 } else {
663670 // cut the prompt from the decoded text
664671 const std::string command = ::trim (txt.substr (best_len));
665-
666672 fprintf (stdout, " %s: Command '%s%s%s', (t = %d ms)\n " , __func__, " \033 [1m" , command.c_str (), " \033 [0m" , (int ) t_ms);
673+ if (fout.is_open ()) {
674+ fout << command << std::endl;
675+ }
667676 }
668677
669678 fprintf (stdout, " \n " );
@@ -759,13 +768,22 @@ int main(int argc, char ** argv) {
759768 }
760769 }
761770
771+ std::ofstream fout;
772+ if (params.fname_out .length () > 0 ) {
773+ fout.open (params.fname_out );
774+ if (!fout.is_open ()) {
775+ fprintf (stderr, " %s: failed to open output file '%s'!\n " , __func__, params.fname_out .c_str ());
776+ return 1 ;
777+ }
778+ }
779+
762780 if (ret_val == 0 ) {
763781 if (!params.commands .empty ()) {
764- ret_val = process_command_list (ctx, audio, params);
782+ ret_val = process_command_list (ctx, audio, params, fout );
765783 } else if (!params.prompt .empty () && params.grammar_parsed .rules .empty ()) {
766- ret_val = always_prompt_transcription (ctx, audio, params);
784+ ret_val = always_prompt_transcription (ctx, audio, params, fout );
767785 } else {
768- ret_val = process_general_transcription (ctx, audio, params);
786+ ret_val = process_general_transcription (ctx, audio, params, fout );
769787 }
770788 }
771789
0 commit comments