Skip to content

Commit 948ff13

Browse files
authored
server : fix handling of characters that span multiple tokens when streaming (#4446)
1 parent 4d98d9a commit 948ff13

File tree

1 file changed

+19
-20
lines changed

1 file changed

+19
-20
lines changed

examples/server/server.cpp

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,6 @@ struct llama_client_slot
376376

377377
int32_t num_prompt_tokens = 0;
378378
int32_t num_prompt_tokens_processed = 0;
379-
int32_t multibyte_pending = 0;
380379

381380
json prompt;
382381
std::string generated_text;
@@ -425,7 +424,6 @@ struct llama_client_slot
425424
stopped_word = false;
426425
stopped_limit = false;
427426
stopping_word = "";
428-
multibyte_pending = 0;
429427
n_past = 0;
430428
sent_count = 0;
431429
sent_token_probs_index = 0;
@@ -992,35 +990,36 @@ struct llama_server_context
992990
slot.generated_text += token_str;
993991
slot.has_next_token = true;
994992

995-
if (slot.multibyte_pending > 0)
993+
// check if there is incomplete UTF-8 character at the end
994+
bool incomplete = false;
995+
for (unsigned i = 1; i < 5 && i <= slot.generated_text.size(); ++i)
996996
{
997-
slot.multibyte_pending -= token_str.size();
998-
}
999-
else if (token_str.size() == 1)
1000-
{
1001-
const char c = token_str[0];
1002-
// 2-byte characters: 110xxxxx 10xxxxxx
997+
unsigned char c = slot.generated_text[slot.generated_text.size() - i];
998+
if ((c & 0xC0) == 0x80)
999+
{
1000+
// continuation byte: 10xxxxxx
1001+
continue;
1002+
}
10031003
if ((c & 0xE0) == 0xC0)
10041004
{
1005-
slot.multibyte_pending = 1;
1006-
// 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
1005+
// 2-byte character: 110xxxxx ...
1006+
incomplete = i < 2;
10071007
}
10081008
else if ((c & 0xF0) == 0xE0)
10091009
{
1010-
slot.multibyte_pending = 2;
1011-
// 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1010+
// 3-byte character: 1110xxxx ...
1011+
incomplete = i < 3;
10121012
}
10131013
else if ((c & 0xF8) == 0xF0)
10141014
{
1015-
slot.multibyte_pending = 3;
1016-
}
1017-
else
1018-
{
1019-
slot.multibyte_pending = 0;
1015+
// 4-byte character: 11110xxx ...
1016+
incomplete = i < 4;
10201017
}
1018+
// else 1-byte character or invalid byte
1019+
break;
10211020
}
10221021

1023-
if (slot.multibyte_pending == 0)
1022+
if (!incomplete)
10241023
{
10251024
size_t pos = std::min(slot.sent_count, slot.generated_text.size());
10261025
const std::string str_test = slot.generated_text.substr(pos);
@@ -1055,7 +1054,7 @@ struct llama_server_context
10551054
}
10561055
}
10571056

1058-
if (slot.multibyte_pending > 0 && !slot.has_next_token)
1057+
if (incomplete)
10591058
{
10601059
slot.has_next_token = true;
10611060
}

0 commit comments

Comments
 (0)