diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 8eb0b68dc..2f5c70528 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -3815,9 +3815,10 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If this option is set, however, \fBpcre2_substitute()\fP continues to go through the motions of matching and substituting (without, of course, writing anything) -in order to compute the size of buffer that is needed. This value is passed -back via the \fIoutlengthptr\fP variable, with the result of the function still -being PCRE2_ERROR_NOMEMORY. +in order to compute the size of buffer that is needed, which will include the +extra space for the terminating NUL. This value is passed back via the +\fIoutlengthptr\fP variable, with the result of the function still being +PCRE2_ERROR_NOMEMORY. .P Passing a buffer size of zero is a permitted way of finding out how much memory is needed for given substitution. However, this does mean that the entire @@ -3938,7 +3939,7 @@ that can be applied to group captures. For example, if group 1 has captured .P If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater -than 127. However, only basic case folding, as determined by the Unicode file +than 127. However, only simple case folding, as determined by the Unicode file \fBCaseFolding.txt\fP is supported. PCRE2 does not support language-specific special casing rules such as using different lower case Greek sigmas in the middle and ends of words (as defined in the Unicode file diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 73845f65b..db385c865 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1225,7 +1225,7 @@ command are of two types. The following modifiers set options for \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. See .\" HREF -\fBpcreapi\fP +\fBpcre2api\fP .\" for a description of their effects. .sp diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index caf34a66f..c439b495b 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -826,12 +826,12 @@ for (;;) return; case META_CAPTURE: - fprintf(stderr, "META_CAPTURE %d", meta_arg); + fprintf(stderr, "META_CAPTURE %u", meta_arg); break; case META_RECURSE: GETOFFSET(offset, pptr); - fprintf(stderr, "META_RECURSE %d %zd", meta_arg, offset); + fprintf(stderr, "META_RECURSE %u %zu", meta_arg, offset); break; case META_BACKREF: @@ -839,7 +839,7 @@ for (;;) offset = cb->small_ref_offset[meta_arg]; else GETOFFSET(offset, pptr); - fprintf(stderr, "META_BACKREF %d %zd", meta_arg, offset); + fprintf(stderr, "META_BACKREF %u %zu", meta_arg, offset); break; case META_ESCAPE: @@ -847,7 +847,7 @@ for (;;) { uint32_t ptype = *pptr >> 16; uint32_t pvalue = *pptr++ & 0xffff; - fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? CHAR_P:CHAR_p, + fprintf(stderr, "META \\%c %u %u", (meta_arg == ESC_P)? CHAR_P:CHAR_p, ptype, pvalue); } else @@ -914,7 +914,7 @@ for (;;) case META_LOOKAHEAD_NA: fprintf(stderr, "META (*napla:"); break; case META_SCRIPT_RUN: fprintf(stderr, "META (*sr:"); break; case META_KET: fprintf(stderr, "META )"); break; - case META_ALT: fprintf(stderr, "META | %d", meta_arg); break; + case META_ALT: fprintf(stderr, "META | %u", meta_arg); break; case META_CLASS: fprintf(stderr, "META ["); break; case META_CLASS_NOT: fprintf(stderr, "META [^"); break; @@ -925,8 +925,8 @@ for (;;) case META_RANGE_LITERAL: fprintf(stderr, "META - (literal)"); break; case META_RANGE_ESCAPED: fprintf(stderr, "META - (escaped)"); break; - case META_POSIX: fprintf(stderr, "META_POSIX %d", *pptr++); break; - case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break; + case META_POSIX: fprintf(stderr, "META_POSIX %u", *pptr++); break; + case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %u", *pptr++); break; case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break; case META_FAIL: fprintf(stderr, "META (*FAIL)"); break; @@ -941,22 +941,22 @@ for (;;) break; case META_LOOKBEHIND: - fprintf(stderr, "META (?<= %d %d", meta_arg, *pptr); + fprintf(stderr, "META (?<= %u %u", meta_arg, *pptr); pptr += 2; break; case META_LOOKBEHIND_NA: - fprintf(stderr, "META (*naplb: %d %d", meta_arg, *pptr); + fprintf(stderr, "META (*naplb: %u %u", meta_arg, *pptr); pptr += 2; break; case META_LOOKBEHINDNOT: - fprintf(stderr, "META (?="); - fprintf(stderr, "%d.", *pptr++); - fprintf(stderr, "%d)", *pptr++); + fprintf(stderr, "%u.", *pptr++); + fprintf(stderr, "%u)", *pptr++); break; case META_COND_NAME: - fprintf(stderr, "META (?() length=%d offset=", *pptr++); + fprintf(stderr, "META (?() length=%u offset=", *pptr++); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); break; case META_COND_RNAME: - fprintf(stderr, "META (?(R&name) length=%d offset=", *pptr++); + fprintf(stderr, "META (?(R&name) length=%u offset=", *pptr++); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); break; /* This is kept as a name, because it might be. */ case META_COND_RNUMBER: - fprintf(stderr, "META (?(Rnumber) length=%d offset=", *pptr++); + fprintf(stderr, "META (?(Rnumber) length=%u offset=", *pptr++); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); break; case META_SCS_NAME: - fprintf(stderr, "META (*scan_substring:() length=%d offset=", *pptr++); + fprintf(stderr, "META (*scan_substring:() length=%u offset=", *pptr++); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); break; case META_SCS_NUMBER: - fprintf(stderr, "META_SCS_NUMBER %d offset=", pptr[SIZEOFFSET]); + fprintf(stderr, "META_SCS_NUMBER %u offset=", pptr[SIZEOFFSET]); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); pptr++; break; case META_SCS_NEXT_NAME: - fprintf(stderr, "META_SCS_NEXT_NAME length=%d offset=", *pptr++); + fprintf(stderr, "META_SCS_NEXT_NAME length=%u offset=", *pptr++); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); break; case META_SCS_NEXT_NUMBER: - fprintf(stderr, "META_SCS_NEXT_NUMBER %d offset=", pptr[SIZEOFFSET]); + fprintf(stderr, "META_SCS_NEXT_NUMBER %u offset=", pptr[SIZEOFFSET]); GETOFFSET(offset, pptr); - fprintf(stderr, "%zd", offset); + fprintf(stderr, "%zu", offset); pptr++; break; @@ -1099,7 +1099,6 @@ associated JIT data. */ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION pcre2_code_copy(const pcre2_code *code) { -PCRE2_SIZE *ref_count; pcre2_code *newcode; if (code == NULL) return NULL; @@ -1113,7 +1112,7 @@ in the decoded tables. */ if ((code->flags & PCRE2_DEREF_TABLES) != 0) { - ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH); + PCRE2_SIZE *ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH); (*ref_count)++; } @@ -2597,9 +2596,7 @@ won't be recognized. */ } while (ptr < ptrend && MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) - { ptr++; - } } /* Check name length */ @@ -5479,7 +5476,7 @@ static BOOL find_dupname_details(PCRE2_SPTR name, uint32_t length, int *indexptr, int *countptr, int *errorcodeptr, compile_block *cb) { -uint32_t i, groupnumber; +uint32_t i; int count; PCRE2_UCHAR *slot = cb->name_table; @@ -5511,8 +5508,9 @@ count = 0; for (;;) { + uint32_t groupnumber = GET2(slot,0); + count++; - groupnumber = GET2(slot,0); cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1; if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; if (++i >= cb->names_found) break; @@ -6800,7 +6798,7 @@ for (;; pptr++) #ifdef MAYBE_UTF_MULTI if (utf && NOT_FIRSTCU(code[-1])) { - PCRE2_UCHAR *lastchar = code - 1; + PCRE2_SPTR lastchar = code - 1; BACKCHAR(lastchar); mclength = (uint32_t)(code - lastchar); /* Length of UTF character */ memcpy(mcbuffer, lastchar, CU2BYTES(mclength)); /* Save the char */ @@ -7940,7 +7938,6 @@ PCRE2_UCHAR *last_branch = code; PCRE2_UCHAR *start_bracket = code; BOOL lookbehind; open_capitem capitem; -int capnumber = 0; int okreturn = 1; uint32_t *pptr = *pptrptr; uint32_t firstcu, reqcu; @@ -7998,7 +7995,7 @@ OP_SCBRAPOS, happens later, after the group has been compiled. */ if (*code == OP_CBRA) { - capnumber = GET2(code, 1 + LINK_SIZE); + int capnumber = GET2(code, 1 + LINK_SIZE); capitem.number = capnumber; capitem.next = open_caps; capitem.assert_depth = cb->assert_depth; @@ -10212,11 +10209,9 @@ show_parsed(&cb); #ifdef DEBUG_SHOW_CAPTURES { named_group *ng = cb.named_groups; - fprintf(stderr, "+++Captures: %d\n", cb.bracount); + fprintf(stderr, "+++Captures: %u\n", cb.bracount); for (i = 0; i < cb.names_found; i++, ng++) - { - fprintf(stderr, "+++%3d %.*s\n", ng->number, ng->length, ng->name); - } + fprintf(stderr, "+++%3u %.*s\n", ng->number, ng->length, ng->name); } #endif @@ -10415,11 +10410,11 @@ if (errorcode == 0 && cb.had_recurse) rcode != NULL; rcode = find_recurse(rcode + 1 + LINK_SIZE, utf)) { - int p, groupnumber; + int groupnumber = (int)GET(rcode, 1); - groupnumber = (int)GET(rcode, 1); if (groupnumber == 0) rgroup = codestart; else { + int p; PCRE2_SPTR search_from = codestart; rgroup = NULL; for (i = 0, p = start; i < ccount; i++, p = (p + 1) & 7) diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c index fa9fe4450..9125d40f8 100644 --- a/src/pcre2_substitute.c +++ b/src/pcre2_substitute.c @@ -289,13 +289,14 @@ Returns: >= 0 number of substitutions made overflow, either give an error immediately, or keep on, accumulating the length. */ -#define CHECKMEMCPY(from,length) \ +#define CHECKMEMCPY(from,length) do \ { \ if (!overflowed && lengthleft < length) \ { \ if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ overflowed = TRUE; \ extra_needed = length - lengthleft; \ + lengthleft = 0; \ } \ else if (overflowed) \ { \ @@ -307,7 +308,7 @@ length. */ buff_offset += length; \ lengthleft -= length; \ } \ - } + } while (0) /* Here's the function */ @@ -439,8 +440,7 @@ if (subject == NULL) /* Find length of zero-terminated subject */ -if (length == PCRE2_ZERO_TERMINATED) - length = subject? PRIV(strlen)(subject) : 0; +if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); /* Check UTF replacement string if necessary. */ @@ -597,9 +597,7 @@ do ptr = replacement; if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0) - { CHECKMEMCPY(ptr, rlength); - } /* Within a non-literal replacement, which must be scanned character by character, local literal mode can be set by \Q, but only in extended mode @@ -942,7 +940,7 @@ do GETCHARINCTEST(ch, subptr); if (forcecase != 0) { - if (mcontext->substitute_case_callout) + if (mcontext != NULL && mcontext->substitute_case_callout != NULL) { ch = mcontext->substitute_case_callout( ch, @@ -1104,7 +1102,7 @@ do LITERAL: if (forcecase != 0) { - if (mcontext->substitute_case_callout) + if (mcontext != NULL && mcontext->substitute_case_callout != NULL) { ch = mcontext->substitute_case_callout( ch, @@ -1194,8 +1192,17 @@ if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength); } -temp[0] = 0; -CHECKMEMCPY(temp, 1); +/* Zero terminate the output string, truncating if needed. */ + +if (lengthleft-- > 0) ++buff_offset; +else + { + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; + overflowed = TRUE; + extra_needed++; + lengthleft = 0; + } +if (!overflowed || lengthleft == 0) buffer[buff_offset] = 0; else extra_needed++; /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set, and matching has carried on after a full buffer, in order to compute the length @@ -1222,6 +1229,7 @@ if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data); return rc; NOROOM: +buffer[buff_offset] = 0; rc = PCRE2_ERROR_NOMEMORY; goto EXIT; diff --git a/src/pcre2test.c b/src/pcre2test.c index 7f01cf928..a241aba43 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -7500,6 +7500,13 @@ if (pat_patctl.replacement[0] != 0) return PR_OK; } + if (pat_patctl.replacement_case[0] != 0 && + (dat_datctl.control & CTL_NULLCONTEXT) != 0) + { + fprintf(outfile, "** Replacement case callouts are not supported with null_context.\n"); + return PR_OK; + } + if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) fprintf(outfile, "** Ignored with replacement text: allcaptures\n"); } diff --git a/testdata/testinput2 b/testdata/testinput2 index 1fbb778e0..45c8c909f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4220,6 +4220,37 @@ apple lemon blackberry /abc/ + 123abc123\=replace=XYZ + 123abc123\=replace=[10]XYZ +\= Expect error + 123abc123\=replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[6]XYZ + 123abc123\=substitute_overflow_length,replace=[1]XYZ + 123abc123\=substitute_overflow_length,replace=[0]XYZ + +/abc/ + 123abc123\=replace=XY + 123abc123\=replace=[9]XY + 123abc123\=replace=[9]XY,substitute_literal +\= Expect error + 123abc123\=replace=[8]XY,substitute_overflow_length + 123abc123\=replace=[8]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[6]XY,substitute_overflow_length + 123abc123\=replace=[6]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[5]XY,substitute_overflow_length + 123abc123\=replace=[5]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[4]XY,substitute_overflow_length + 123abc123\=replace=[4]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[3]XY,substitute_overflow_length + 123abc123\=replace=[3]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[2]XY,substitute_overflow_length + 123abc123\=replace=[2]XY,substitute_overflow_length,substitute_literal + +/abc/substitute_literal + 123abc123\=replace=XYZ + 123abc123\=replace=[10]XYZ +\= Expect error 123abc123\=replace=[9]XYZ 123abc123\=substitute_overflow_length,replace=[9]XYZ 123abc123\=substitute_overflow_length,replace=[6]XYZ @@ -4714,7 +4745,7 @@ B)x/alt_verbnames,mark /abcd/null_context abcd\=null_context -\= Expect error - not allowed together +\= Expect not allowed together abcd\=null_context,find_limits abcd\=allusedtext,startchar @@ -6061,6 +6092,8 @@ a)"xI /abc/replace=\U$0,substitute_extended,substitute_case_callout=a.U.b.V. XabcY +\= Expect not supported + XabcY\=null_context /a/substitute_extended XaY\=replace=\U$0,substitute_case_callout=a.U. diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 99714596e..6e671d1ad 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -13884,6 +13884,61 @@ Failed: error -48: no more memory: 23 code units are needed 1: fruit\x00juice lemon blackberry /abc/ + 123abc123\=replace=XYZ + 1: 123XYZ123 + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 +\= Expect error + 123abc123\=replace=[9]XYZ +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]XYZ +Failed: error -48: no more memory: 10 code units are needed + +/abc/ + 123abc123\=replace=XY + 1: 123XY123 + 123abc123\=replace=[9]XY + 1: 123XY123 + 123abc123\=replace=[9]XY,substitute_literal + 1: 123XY123 +\= Expect error + 123abc123\=replace=[8]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[8]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[6]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[6]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[5]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[5]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[4]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[4]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[3]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[3]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[2]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[2]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + +/abc/substitute_literal + 123abc123\=replace=XYZ + 1: 123XYZ123 + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 +\= Expect error 123abc123\=replace=[9]XYZ Failed: error -48: no more memory 123abc123\=substitute_overflow_length,replace=[9]XYZ @@ -15131,7 +15186,7 @@ No match /abcd/null_context abcd\=null_context 0: abcd -\= Expect error - not allowed together +\= Expect not allowed together abcd\=null_context,find_limits ** Not allowed together: find_limits null_context abcd\=allusedtext,startchar @@ -18065,6 +18120,9 @@ Failed: error -55 at offset 3 in replacement: requested value is not set /abc/replace=\U$0,substitute_extended,substitute_case_callout=a.U.b.V. XabcY 1: XUVcY +\= Expect not supported + XabcY\=null_context +** Replacement case callouts are not supported with null_context. /a/substitute_extended XaY\=replace=\U$0,substitute_case_callout=a.U.