Skip to content

Commit 15a11d1

Browse files
authored
pcre2test: avoid printing invalid utf trail in partial match (#237)
When match_invalid_utf is enabled, invalid UTF-8 data can't match but it was mistakenly getting printed as part of a partial match eventhough the ovector correctly didn't include it, as shown by: PCRE2 version 10.34 2019-11-21 re> /(?<=..)X/match_invalid_utf,allvector data> XX\x80\=ph,ovector=1 Partial match: \x{80} ** ovector[1] is not equal to the subject length: 2 != 3 0: 2 2 Fix the logic to print instead the empty match that was returned and address a buffer overread when trying to decode UTF-8 that was missing code units. Fixes: #235
1 parent 9bad465 commit 15a11d1

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ testtemp2
6666
testtemp2grep
6767
testtry
6868
testtrygrep
69+
testSinput
70+
testbtables
71+
testsaved1
72+
testsaved2
6973

7074
m4/libtool.m4
7175
m4/ltoptions.m4

src/pcre2test.c

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2913,14 +2913,15 @@ limit.
29132913
29142914
Argument:
29152915
utf8bytes a pointer to the byte vector
2916+
end a pointer to the end of the byte vector
29162917
vptr a pointer to an int to receive the value
29172918
29182919
Returns: > 0 => the number of bytes consumed
29192920
-6 to 0 => malformed UTF-8 character at offset = (-return)
29202921
*/
29212922

29222923
static int
2923-
utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2924+
utf82ord(PCRE2_SPTR8 utf8bytes, PCRE2_SPTR8 end, uint32_t *vptr)
29242925
{
29252926
uint32_t c = *utf8bytes++;
29262927
uint32_t d = c;
@@ -2942,6 +2943,8 @@ d = (c & utf8_table3[i]) << s;
29422943

29432944
for (j = 0; j < i; j++)
29442945
{
2946+
if (utf8bytes >= end) return 0;
2947+
29452948
c = *utf8bytes++;
29462949
if ((c & 0xc0) != 0x80) return -(j+1);
29472950
s -= 6;
@@ -3052,14 +3055,16 @@ counts chars without printing (because pchar() does that). */
30523055

30533056
static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
30543057
{
3058+
PCRE2_SPTR8 end;
30553059
uint32_t c = 0;
30563060
int yield = 0;
30573061
if (length < 0) length = *p++;
3062+
end = p + length;
30583063
while (length-- > 0)
30593064
{
30603065
if (utf)
30613066
{
3062-
int rc = utf82ord(p, &c);
3067+
int rc = utf82ord(p, end, &c);
30633068
if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
30643069
{
30653070
length -= rc - 1;
@@ -3238,7 +3243,8 @@ if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
32383243
else while (len > 0)
32393244
{
32403245
uint32_t c;
3241-
int chlen = utf82ord(p, &c);
3246+
const uint8_t *end = p + len;
3247+
int chlen = utf82ord(p, end, &c);
32423248
if (chlen <= 0) return -1;
32433249
if (!utf && c > 0xffff) return -3;
32443250
if (c > 0x10ffff) return -2;
@@ -3329,13 +3335,14 @@ else while (len > 0)
33293335
int chlen;
33303336
uint32_t c;
33313337
uint32_t topbit = 0;
3338+
const uint8_t *end = p + len;
33323339
if (!utf && *p == 0xff && len > 1)
33333340
{
33343341
topbit = 0x80000000u;
33353342
p++;
33363343
len--;
33373344
}
3338-
chlen = utf82ord(p, &c);
3345+
chlen = utf82ord(p, end, &c);
33393346
if (chlen <= 0) return -1;
33403347
if (utf && c > 0x10ffff) return -2;
33413348
p += chlen;
@@ -6852,7 +6859,9 @@ if (utf)
68526859
uint8_t *q;
68536860
uint32_t cc;
68546861
int n = 1;
6855-
for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6862+
uint8_t *q_end = p + len;
6863+
6864+
for (q = p; n > 0 && *q; q += n) n = utf82ord(q, q_end, &cc);
68566865
if (n <= 0)
68576866
{
68586867
fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
@@ -8081,7 +8090,7 @@ for (gmatched = 0;; gmatched++)
80818090
rubriclength += 15;
80828091

80838092
PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
8084-
PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
8093+
PCHARSV(pp, ovector[0], ovector[1] - ovector[0], utf, outfile);
80858094

80868095
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
80878096
fprintf(outfile, " (JIT)");

0 commit comments

Comments
 (0)