-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
121 lines (111 loc) · 3.26 KB
/
Copy pathdocker-compose.yml
File metadata and controls
121 lines (111 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
version: "3.9"
services:
# ----------------------------------------------------
# 1. Backend API (FastAPI)
# ----------------------------------------------------
api:
build:
context: .
dockerfile: docker/Dockerfile
container_name: symptomsense-api
ports:
- "8000:8000"
volumes:
- ./backend:/app/backend
- ./scripts:/app/scripts
- ./data:/app/data
- ./models:/app/models
- hf_cache:/root/.cache/huggingface
- torch_cache:/root/.cache/torch
environment:
- OLLAMA_BASE_URL=http://ollama:11434/v1
- OLLAMA_MODEL=deepseek-r1:latest
- OLLAMA_VISION_MODEL=llama3.2-vision
- USE_LOCAL_EMBEDDINGS=true
- LOCAL_EMBED_MODEL=BAAI/bge-m3
- KOKORO_VOICE=af_heart
- QDRANT_URL=http://qdrant:6333
- QDRANT_MODE=remote
deploy:
resources:
limits:
memory: 8G
depends_on:
- qdrant
- ollama
restart: unless-stopped
tty: true
stdin_open: true
shm_size: '2gb'
# ----------------------------------------------------
# ----------------------------------------------------
# 2. Frontend (Streamlit)
# ----------------------------------------------------
frontend:
build:
context: .
dockerfile: docker/Dockerfile.streamlit
container_name: symptomsense-frontend
ports:
- "8501:8501"
volumes:
- ./frontend:/app/frontend
environment:
- BACKEND_URL=http://api:8000
depends_on:
- api
restart: unless-stopped
# ----------------------------------------------------
# 3. Vector Database (Qdrant)
# ----------------------------------------------------
qdrant:
image: qdrant/qdrant:latest
container_name: symptomsense-qdrant
ports:
- "6333:6333"
volumes:
- ./data/qdrant_db:/qdrant/storage
restart: unless-stopped
# ----------------------------------------------------
# 6. Local LLM Service (Ollama) - Self-contained
# ----------------------------------------------------
ollama:
build:
context: ./docker
dockerfile: Dockerfile.ollama
container_name: symptomsense-ollama
ports:
- "11434:11434"
volumes:
- ./data/ollama:/root/.ollama
# GPU support if available (optional, requires nvidia-container-toolkit)
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
restart: unless-stopped
tty: true
# ----------------------------------------------------
# 7. Init Service (Automated Model Pull)
# ----------------------------------------------------
init-ollama:
image: curlimages/curl:latest
container_name: symptomsense-init-ollama
command: >
/bin/sh -c "
echo 'Waiting for Ollama Service...' &&
until curl -s http://ollama:11434/api/tags > /dev/null; do sleep 2; done &&
echo 'Ollama is ready. Pulling models in parallel...' &&
(curl -X POST http://ollama:11434/api/pull -d '{\"name\": \"deepseek-r1:latest\"}' &) &&
(curl -X POST http://ollama:11434/api/pull -d '{\"name\": \"llama3.2-vision\"}' &) &&
wait &&
echo 'Models pulled successfully!'
"
depends_on:
- ollama
volumes:
hf_cache:
torch_cache: