Skip to content

Commit f13847c

Browse files
authored
server: fix regression on streamed non-chat completion w/ stops (#13785)
* more forgiving message diffs: partial stop words aren't erased, full stops are * Add (slow) server test for completion + stream + stop
1 parent 79c137f commit f13847c

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

common/chat.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ static std::string string_diff(const std::string & last, const std::string & cur
3131
return current;
3232
}
3333
if (!string_starts_with(current, last)) {
34+
if (string_starts_with(last, current)) {
35+
// This happens if the last generation ended on a partial stop word (not erased),
36+
// and the current ended on a stop word (erased).
37+
return "";
38+
}
3439
throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
3540
}
3641
return current.substr(last.size());

tools/server/tests/unit/test_completion.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,30 @@ def test_completion_stream_with_openai_library():
121121
assert match_regex("(going|bed)+", output_text)
122122

123123

124+
# Test case from https://github.com/ggml-org/llama.cpp/issues/13780
125+
@pytest.mark.slow
126+
def test_completion_stream_with_openai_library_stops():
127+
global server
128+
server.model_hf_repo = "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M"
129+
server.model_hf_file = None
130+
server.start()
131+
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
132+
res = client.completions.create(
133+
model="davinci-002",
134+
prompt="System: You are helpfull assistant.\nAssistant:\nHey! How could I help?\nUser:\nTell me a joke.\nAssistant:\n",
135+
stop=["User:\n", "Assistant:\n"],
136+
max_tokens=200,
137+
stream=True,
138+
)
139+
output_text = ''
140+
for data in res:
141+
choice = data.choices[0]
142+
if choice.finish_reason is None:
143+
assert choice.text is not None
144+
output_text += choice.text
145+
assert match_regex("Sure, here's one for[\\s\\S]*", output_text), f'Unexpected output: {output_text}'
146+
147+
124148
@pytest.mark.parametrize("n_slots", [1, 2])
125149
def test_consistent_result_same_seed(n_slots: int):
126150
global server

0 commit comments

Comments
 (0)