NVIDIA-NeMo
diff --git a/‎examples/voice_agent/client/package-lock.json‎
Lines changed: 59 additions & 1 deletion b/‎examples/voice_agent/client/package-lock.json‎
Lines changed: 59 additions & 1 deletion
diff --git a/‎examples/voice_agent/client/package.json‎
Lines changed: 5 additions & 1 deletion b/‎examples/voice_agent/client/package.json‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/voice_agent/client/src/app.ts‎
Lines changed: 3 additions & 2 deletions b/‎examples/voice_agent/client/src/app.ts‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/voice_agent/client/tsconfig.json‎
Lines changed: 8 additions & 8 deletions b/‎examples/voice_agent/client/tsconfig.json‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎examples/voice_agent/server/backchannel_phrases.yaml‎
Lines changed: 0 additions & 1 deletion b/‎examples/voice_agent/server/backchannel_phrases.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/voice_agent/server/bot_websocket_server.py‎
Lines changed: 9 additions & 5 deletions b/‎examples/voice_agent/server/bot_websocket_server.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎examples/voice_agent/server/example_prompts/fast-bite.txt‎
Lines changed: 7 additions & 7 deletions b/‎examples/voice_agent/server/example_prompts/fast-bite.txt‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎examples/voice_agent/server/server_configs/default.yaml‎
Lines changed: 2 additions & 1 deletion b/‎examples/voice_agent/server/server_configs/default.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/voice_agent/server/server_configs/llm_configs/hf_llm_generic.yaml‎
Lines changed: 2 additions & 2 deletions b/‎examples/voice_agent/server/server_configs/llm_configs/hf_llm_generic.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/voice_agent/server/server_configs/llm_configs/llama3.1-8B-instruct.yaml‎
Lines changed: 3 additions & 3 deletions b/‎examples/voice_agent/server/server_configs/llm_configs/llama3.1-8B-instruct.yaml‎
Lines changed: 3 additions & 3 deletions
@@ -14,13 +14,17 @@
   "devDependencies": {
     "@types/node": "^22.15.30",
     "@types/protobufjs": "^6.0.0",
+    "@types/react": "^19.2.2",
+    "@types/react-dom": "^19.2.2",
     "@vitejs/plugin-react-swc": "^3.10.1",
     "typescript": "^5.8.3",
     "vite": "^6.3.5"
   },
   "dependencies": {
     "@pipecat-ai/client-js": "^0.4.0",
     "@pipecat-ai/websocket-transport": "^0.4.1",
-    "protobufjs": "^7.4.0"
+    "protobufjs": "^7.4.0",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0"
   }
 }
@@ -46,12 +46,12 @@ class WebsocketClientApp {
   private readonly serverConfigs = {
     websocket: {
       name: 'WebSocket Server',
-      baseUrl: 'http://localhost:7860',
+      baseUrl: `http://${window.location.hostname}:7860`,
       port: 8765
     },
     fastapi: {
       name: 'FastAPI Server', 
-      baseUrl: 'http://localhost:8000',
+      baseUrl: `http://${window.location.hostname}:8000`,
       port: 8000
     }
   };
@@ -257,6 +257,7 @@ class WebsocketClientApp {
 
       this.log('Initializing devices...');
       await this.rtviClient.initDevices();
+      this.log('Devices initialized successfully');
 
       this.log('Connecting to bot...');
       await this.rtviClient.connect();
 
@@ -11,9 +11,9 @@
     // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
 
     /* Language and Environment */
-    "target": "es2016",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
-    // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
-    // "jsx": "preserve",                                /* Specify what JSX code is generated. */
+    "target": "ES2020",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],            /* Specify a set of bundled library declaration files that describe the target runtime environment. */
+    "jsx": "react-jsx",                                  /* Specify what JSX code is generated. */
     // "experimentalDecorators": true,                   /* Enable experimental support for legacy experimental decorators. */
     // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
     // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
@@ -25,9 +25,9 @@
     // "moduleDetection": "auto",                        /* Control what method is used to detect module-format JS files. */
 
     /* Modules */
-    "module": "commonjs",                                /* Specify what module code is generated. */
+    "module": "ESNext",                                  /* Specify what module code is generated. */
     // "rootDir": "./",                                  /* Specify the root folder within your source files. */
-    // "moduleResolution": "node10",                     /* Specify how TypeScript looks up a file from a given module specifier. */
+    "moduleResolution": "bundler",                       /* Specify how TypeScript looks up a file from a given module specifier. */
     // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
     // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
     // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
@@ -41,7 +41,7 @@
     // "resolvePackageJsonImports": true,                /* Use the package.json 'imports' field when resolving imports. */
     // "customConditions": [],                           /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
     // "noUncheckedSideEffectImports": true,             /* Check side effect imports. */
-    // "resolveJsonModule": true,                        /* Enable importing .json files. */
+    "resolveJsonModule": true,                           /* Enable importing .json files. */
     // "allowArbitraryExtensions": true,                 /* Enable importing files with any extension, provided a declaration file is present. */
     // "noResolve": true,                                /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
 
@@ -74,10 +74,10 @@
     // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
 
     /* Interop Constraints */
-    // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
+    "isolatedModules": true,                             /* Ensure that each file can be safely transpiled without relying on other imports. */
     // "verbatimModuleSyntax": true,                     /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
     // "isolatedDeclarations": true,                     /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
-    // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
+    "allowSyntheticDefaultImports": true,                /* Allow 'import x from y' when a module doesn't have a default export. */
     "esModuleInterop": true,                             /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
     // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
     "forceConsistentCasingInFileNames": true,            /* Ensure that casing is correct in imports. */
 
@@ -11,7 +11,6 @@
 - "great"
 - "great thanks"
 - "ha ha"
-- "hi"
 - "hmm"
 - "humm"
 - "huh"
 
@@ -28,12 +28,13 @@
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.processors.frameworks.rtvi import RTVIAction, RTVIConfig, RTVIObserver, RTVIProcessor
+from pipecat.processors.frameworks.rtvi import RTVIAction, RTVIConfig, RTVIProcessor
 from pipecat.serializers.protobuf import ProtobufFrameSerializer
 
-from nemo.agents.voice_agent.pipecat.services.nemo.diar import NeMoDiarInputParams, NemoDiarService
+from nemo.agents.voice_agent.pipecat.processors.frameworks.rtvi import RTVIObserver
+from nemo.agents.voice_agent.pipecat.services.nemo.diar import NemoDiarService
 from nemo.agents.voice_agent.pipecat.services.nemo.llm import get_llm_service_from_config
-from nemo.agents.voice_agent.pipecat.services.nemo.stt import NeMoSTTInputParams, NemoSTTService
+from nemo.agents.voice_agent.pipecat.services.nemo.stt import NemoSTTService
 from nemo.agents.voice_agent.pipecat.services.nemo.tts import KokoroTTSService, NeMoFastPitchHiFiGANTTSService
 from nemo.agents.voice_agent.pipecat.services.nemo.turn_taking import NeMoTurnTakingService
 from nemo.agents.voice_agent.pipecat.transports.network.websocket_server import (
@@ -243,7 +244,9 @@ async def reset_context_handler(rtvi_processor: RTVIProcessor, service: str, arg
             assistant_context_aggregator.reset()
             user_context_aggregator.set_messages(copy.deepcopy(original_messages))
             assistant_context_aggregator.set_messages(copy.deepcopy(original_messages))
-
+            text_aggregator.reset()
+            if diar is not None:
+                diar.reset()
             logger.info("Conversation context reset successfully")
             return True
         except Exception as e:
@@ -276,6 +279,7 @@ async def reset_context_handler(rtvi_processor: RTVIProcessor, service: str, arg
 
     pipeline = Pipeline(pipeline)
 
+    rtvi_text_aggregator = SimpleSegmentedTextAggregator("\n?!.", min_sentence_length=5)
     task = PipelineTask(
         pipeline,
         params=PipelineParams(
@@ -286,7 +290,7 @@ async def reset_context_handler(rtvi_processor: RTVIProcessor, service: str, arg
             report_only_initial_ttfb=True,
             idle_timeout=None,  # Disable idle timeout
         ),
-        observers=[RTVIObserver(rtvi)],
+        observers=[RTVIObserver(rtvi, text_aggregator=rtvi_text_aggregator)],
         idle_timeout_secs=None,
         cancel_on_idle_timeout=False,
     )
 
@@ -1,6 +1,6 @@
 Fast Bites Lunch Menu
 
-Burgers and Sandwiches
+Burgers and Sandwiches:
 1. Classic Cheeseburger – $5.99
    Juicy beef patty, cheddar cheese, pickles, ketchup & mustard on a toasted bun.
    - Make it a double cheeseburger by adding another patty - $1.50
@@ -14,18 +14,18 @@ Combo Deals (includes small fries and fountain soda)
 5. Chicken Sandwich Combo – $9.49
 6. Veggie Wrap Combo – $8.49
 
-Sides
+Sides:
 7. French Fries
  - Small - $2.49
  - Medium - $3.49
  - Large - $4.49
 8. Chicken Nuggets
- - 4 pcs - $3.29
- - 8 pcs - $5.99
- - 12 pcs - $8.99
-9. Side Salad - $2.99
+ - 4 pieces - $3.29
+ - 8 pieces -  $5.99
+ - 12 pieces -  $8.99
+9. Side Salad -  $2.99
 
-Drinks
+Drinks:
 10. Fountain Soda (16 oz, choices: Coke, Diet Coke, Sprite, Fanta) – $1.99
 11. Iced Tea or Lemonade – $2.29
 12. Bottled Water – $1.49
 
@@ -16,6 +16,7 @@ vad:
 stt:
   type: nemo # choices in ['nemo'] currently only NeMo is supported
   model: "stt_en_fastconformer_hybrid_large_streaming_80ms"
+  # model: "nvidia/parakeet_realtime_eou_120m-v1"
   model_config: "./server_configs/stt_configs/nemo_cache_aware_streaming.yaml"
   device: "cuda"
 
@@ -41,12 +42,12 @@ llm:
   # model_config: "./server_configs/llm_configs/qwen2.5-7B.yaml"
   # model: "Qwen/Qwen3-8B"
   # model_config: "./server_configs/llm_configs/qwen3-8B.yaml"
+  # model: meta-llama/Llama-3.1-8B-Instruct
   device: "cuda"
   enable_reasoning: false  # it's best to turn-off reasoning for lowest latency
   # `system_prompt` is used as the sytem prompt to the LLM, please refer to differnt LLM webpage for spcial functions like enabling/disabling thinking
   # system_prompt: /path/to/prompt.txt  # or use path to a txt file that contains a long prompt, for example in `../example_prompts/fast_bite.txt`
   system_prompt: "You are a helpful AI agent named Lisa. Start by greeting the user warmly and introducing yourself within one sentence. Your answer should be concise and to the point. You might also see speaker tags (<speaker_0>, <speaker_1>, etc.) in the user context. You should respond to the user based on the speaker tag and the context of that speaker. Do not include the speaker tags in your response, use them only to identify the speaker. Do not include any emoji in response."
-
 tts:
   type: kokoro # choices in ['nemo', 'kokoro']
   model: "hexgrad/Kokoro-82M"
 
@@ -1,9 +1,9 @@
 # This is an example config for setting up a generic HuggingFace LLM for a NeMo Voice Agent server.
 # Please refer to https://github.com/NVIDIA-NeMo/NeMo/tree/main/examples/voice_agent/README.md for more details
 
-type: auto  # choices in ['auto', 'hf', 'vllm']
+# type: auto  # choices in ['auto', 'hf', 'vllm']
+# device: "cuda"
 dtype: bfloat16  # torch.dtype for LLM
-device: "cuda"
 system_role: "system"  # role for system prompt, set it to `user` for models that do not support system prompt
 system_prompt_suffix: "/no_think"  # a string that would be appended to the system prompt, used to enable/disable thinking
 
 
@@ -1,10 +1,10 @@
 # This is an example config for setting up Qwen2.5-7B model for a NeMo Voice Agent server.
 # Please refer to https://github.com/NVIDIA-NeMo/NeMo/tree/main/examples/voice_agent/README.md for more details
 
-type: auto  # choices in ['auto', 'hf', 'vllm']
+# type: auto  # choices in ['auto', 'hf', 'vllm']
+# model: meta-llama/Llama-3.1-8B-Instruct
+# device: "cuda"
 dtype: bfloat16  # torch.dtype for LLM
-model: meta-llama/Llama-3.1-8B-Instruct
-device: "cuda"
 system_role: "system"  # role for system prompt, set it to `user` for models that do not support system prompt
 system_prompt_suffix: null  # a string that would be appended to the system prompt, used to enable/disable thinking
-Original file line number
+Diff line change
 - "great"
 - "great thanks"
 - "ha ha"
 -- "hi"
 - "hmm"
 - "humm"
 - "huh"