diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..0b0e43f3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.11-slim-bullseye +WORKDIR /app + +# Install system dependencies required for Python packages and Chatterbox +RUN apt-get update && apt-get install -y \ + git \ + libsndfile1 \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY . . +# Inject custom parameters for docker to communicate to host +RUN sed -i 's/\.launch(/.launch(server_name="0.0.0.0", server_port=7860,/' gradio_tts_app.py && \ +sed -i 's/\.launch(/.launch(server_name="0.0.0.0", server_port=7860,/' gradio_vc_app.py && \ +sed -i 's/\.launch(/.launch(server_name="0.0.0.0", server_port=7860,/' multilingual_app.py +# Install numpy explicitly to avoid dependency issues with pkuseg +RUN pip install --no-cache-dir "numpy>=1.24.0,<1.26.0" "torchaudio==2.6.0" "librosa==0.11.0" gradio[mcp] && \ +pip install --no-cache-dir -e . + +EXPOSE 7860 + +ENV PYTHONUNBUFFERED=1 + +CMD ["bash"] \ No newline at end of file diff --git a/README.md b/README.md index d6651ef7..b3d3523b 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,20 @@ Arabic (ar) • Danish (da) • German (de) • Greek (el) • English (en) • - Try lower `cfg_weight` values (e.g. `~0.3`) and increase `exaggeration` to around `0.7` or higher. - Higher `exaggeration` tends to speed up speech; reducing `cfg_weight` helps compensate with slower, more deliberate pacing. - +# Testing using Docker +First, build the docker image with the command below +``` +cd chatterbox +docker build -t chatterbox-tts . +``` +then, run it with +``` +docker run -it -p 7860:7860 chatterbox-tts:latest +``` +when inside container, use regular command like +``` +python gradio_tts_app.py +``` # Installation ```shell pip install chatterbox-tts diff --git a/multilingual_app.py b/multilingual_app.py index 51e9c693..a1e58cea 100644 --- a/multilingual_app.py +++ b/multilingual_app.py @@ -314,4 +314,4 @@ def on_language_change(lang, current_ref, current_text): outputs=[audio_output], ) -demo.launch(mcp_server=True) +demo.launch(mcp_server=True, share=True) diff --git a/pyproject.toml b/pyproject.toml index e190b3af..4b249e54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "resemble-perth==1.0.1", "conformer==0.3.2", "safetensors==0.5.3", - "pkuseg ==0.0.25", + "pkuseg==0.0.25", "pykakasi==2.3.0", "gradio==5.44.1", diff --git a/src/chatterbox/mtl_tts.py b/src/chatterbox/mtl_tts.py index fccfbf3d..47ce4e5f 100644 --- a/src/chatterbox/mtl_tts.py +++ b/src/chatterbox/mtl_tts.py @@ -176,7 +176,7 @@ def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS': s3gen = S3Gen() s3gen.load_state_dict( - torch.load(ckpt_dir / "s3gen.pt", weights_only=True) + torch.load(ckpt_dir / "s3gen.pt", weights_only=True, map_location=torch.device('cpu') if device=="cpu" else None) ) s3gen.to(device).eval()