resemble-ai · barbicane · Sep 13, 2025 · Sep 13, 2025 · Sep 14, 2025 · Sep 14, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.11-slim-bullseye
+WORKDIR /app
+
+# Install system dependencies required for Python packages and Chatterbox
+RUN apt-get update && apt-get install -y \
+    git \
+    libsndfile1 \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY . .
+# Inject custom parameters for docker to communicate to host
+RUN sed -i 's/\.launch(/.launch(server_name="0.0.0.0", server_port=7860,/' gradio_tts_app.py && \
+sed -i 's/\.launch(/.launch(server_name="0.0.0.0", server_port=7860,/' gradio_vc_app.py && \
+sed -i 's/\.launch(/.launch(server_name="0.0.0.0", server_port=7860,/' multilingual_app.py
+# Install numpy explicitly to avoid dependency issues with pkuseg
+RUN pip install --no-cache-dir "numpy>=1.24.0,<1.26.0" "torchaudio==2.6.0" "librosa==0.11.0" gradio[mcp] && \
+pip install --no-cache-dir -e .
+
+EXPOSE 7860
+
+ENV PYTHONUNBUFFERED=1
+
+CMD ["bash"]
diff --git a/README.md b/README.md
@@ -39,7 +39,20 @@ Arabic (ar) • Danish (da) • German (de) • Greek (el) • English (en) •
   - Try lower `cfg_weight` values (e.g. `~0.3`) and increase `exaggeration` to around `0.7` or higher.
   - Higher `exaggeration` tends to speed up speech; reducing `cfg_weight` helps compensate with slower, more deliberate pacing.
 
-
+# Testing using Docker
+First, build the docker image with the command below
+```
+cd chatterbox
+docker build -t chatterbox-tts .
+```
+then, run it with
+```
+docker run -it -p 7860:7860 chatterbox-tts:latest
+```
+when inside container, use regular command like 
+```
+python gradio_tts_app.py
+``` 
 # Installation
 ```shell
 pip install chatterbox-tts

diff --git a/multilingual_app.py b/multilingual_app.py
@@ -314,4 +314,4 @@ def on_language_change(lang, current_ref, current_text):
         outputs=[audio_output],
     )
 
-demo.launch(mcp_server=True)
+demo.launch(mcp_server=True, share=True)
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
     "resemble-perth==1.0.1",
     "conformer==0.3.2",
     "safetensors==0.5.3",
-    "pkuseg ==0.0.25",
+    "pkuseg==0.0.25",
     "pykakasi==2.3.0",
     "gradio==5.44.1",
 

diff --git a/src/chatterbox/mtl_tts.py b/src/chatterbox/mtl_tts.py
@@ -176,7 +176,7 @@ def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
 
         s3gen = S3Gen()
         s3gen.load_state_dict(
-            torch.load(ckpt_dir / "s3gen.pt", weights_only=True)
+            torch.load(ckpt_dir / "s3gen.pt", weights_only=True, map_location=torch.device('cpu') if device=="cpu" else None)
         )
         s3gen.to(device).eval()