Skip to content

Commit 0f6fd21

Browse files
authored
Merge branch 'main' into ollama-preload
2 parents e795c24 + eff6eb0 commit 0f6fd21

3 files changed

Lines changed: 44 additions & 14 deletions

File tree

examples/rag_example.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,37 @@
66
load_dotenv()
77
Settings.setup_logging()
88

9-
persist_directory = './defaultDb'
9+
persist_directory = "./defaultDb"
1010
model_embeddings = Settings.DEFAULT_EMBEDDINGS_MODEL
11-
model_name = 'llama3.1:8b'
11+
model_name = "llama3.1:8b"
1212
system_prompt_directory = Settings.DEFAULT_SYSTEM_PROMPT
1313
collection_name = str(uuid.uuid4())
1414

15-
rag = Builder() \
16-
.with_embeddings(Settings.HUGGINGFACE, model_name=model_embeddings) \
17-
.with_vector_store(Settings.CHROMA, persist_directory=persist_directory, collection_name=collection_name) \
18-
.with_llm(Settings.OLLAMA, model_name=model_name, system_prompt=system_prompt_directory) \
15+
rag = (
16+
Builder()
17+
.with_embeddings(Settings.HUGGINGFACE, model_name=model_embeddings)
18+
.with_vector_store(
19+
Settings.CHROMA,
20+
persist_directory=persist_directory,
21+
collection_name=collection_name,
22+
)
23+
.with_llm(
24+
Settings.OLLAMA,
25+
model_name=model_name,
26+
system_prompt=system_prompt_directory,
27+
options={"num_ctx": 8192},
28+
)
1929
.build_rag(k=5)
30+
)
2031

2132
rag.vector_store.ingest(
22-
data_path='./src/raglight',
23-
# ignore_folders=ignore_folders
24-
)
33+
data_path="./src/raglight",
34+
# ignore_folders=ignore_folders
35+
)
2536
while True:
2637
query = input(">>> ")
27-
if query == "quit" or query == "bye" :
28-
print('🤖 : See you soon 👋')
38+
if query == "quit" or query == "bye":
39+
print("🤖 : See you soon 👋")
2940
break
3041
response = rag.generate(query)
3142
print(response)
32-

src/raglight/llm/ollama_model.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
from json import dumps
88
import logging
99

10+
# https://docs.ollama.com/context-length
11+
OLLAMA_DEFAULT_CONTEXT_SIZE = 4096
12+
OLLAMA_OPTION_CONTEXT_SIZE = "num_ctx"
13+
OLLAMA_WARNING_CONTEXT_SIZE = 0.80
14+
1015

1116
class OllamaModel(LLM):
1217
"""
@@ -47,10 +52,15 @@ def __init__(
4752
self.headers = headers
4853
self.preload_model = preload_model
4954
self.options = options
55+
self.max_context_size = (
56+
self.options.get(OLLAMA_OPTION_CONTEXT_SIZE, OLLAMA_DEFAULT_CONTEXT_SIZE)
57+
if self.options
58+
else OLLAMA_DEFAULT_CONTEXT_SIZE
59+
)
5060
super().__init__(model_name, system_prompt, system_prompt_file, self.api_base)
5161
logging.info(f"Using Ollama with {model_name} model 🤖")
5262
self.role: str = role
53-
63+
5464
@override
5565
def load(self) -> Client:
5666
"""
@@ -100,6 +110,14 @@ def generate(self, input: Dict[str, Any]) -> str:
100110
messages=messages,
101111
options=self.options,
102112
)
113+
114+
token_usage = response.eval_count + response.prompt_eval_count
115+
if token_usage / self.max_context_size > OLLAMA_WARNING_CONTEXT_SIZE:
116+
logging.warning(
117+
f"Over {OLLAMA_WARNING_CONTEXT_SIZE * 100}% of context window reached, consider increasing it or reducing prompt size."
118+
+ f" Current usage : {token_usage} out of {self.max_context_size} Tokens"
119+
)
120+
103121
return response.message.content
104122

105123
@override

tests/tests_llm/test_ollama_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ def setUp(self):
2121
role="assistant",
2222
content="Machine learning (ML) is a subset of artificial intelligence",
2323
)
24-
chat_response: ChatResponse = ChatResponse(message=message)
24+
chat_response: ChatResponse = ChatResponse(
25+
message=message, prompt_eval_count=200, eval_count=50
26+
)
2527
mock_ollama_client.chat = MagicMock(return_value=chat_response)
2628
self.model.model = mock_ollama_client
2729

0 commit comments

Comments
 (0)