Skip to content

Commit fe71448

Browse files
committed
add pcre2_get_match_data_heapframes_size()
Since PCRE2 10.41, the match_data contains a pointer to a vector of frames allocated in the heap and that are used by pcre2_match() when doing non JIT matches. There is though, no outside visibility on the side of it, and therefore the memory it uses is locked away until the match_data itself is freed. Add an API that allows getting that value, so an application could decide if it is worth keeping it and keep reusing it for future matches or not. While at it, update the documentation of other related functions for clarity.
1 parent 0746b3d commit fe71448

8 files changed

+121
-4
lines changed

Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ dist_html_DATA = \
4646
doc/html/pcre2_general_context_free.html \
4747
doc/html/pcre2_get_error_message.html \
4848
doc/html/pcre2_get_mark.html \
49+
doc/html/pcre2_get_match_data_heapframes_size.html \
4950
doc/html/pcre2_get_match_data_size.html \
5051
doc/html/pcre2_get_ovector_count.html \
5152
doc/html/pcre2_get_ovector_pointer.html \
@@ -142,6 +143,7 @@ dist_man_MANS = \
142143
doc/pcre2_general_context_free.3 \
143144
doc/pcre2_get_error_message.3 \
144145
doc/pcre2_get_mark.3 \
146+
doc/pcre2_get_match_data_heapframes_size.3 \
145147
doc/pcre2_get_match_data_size.3 \
146148
doc/pcre2_get_ovector_count.3 \
147149
doc/pcre2_get_ovector_pointer.3 \
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<html>
2+
<head>
3+
<title>pcre2_get_match_data_heapframes_size specification</title>
4+
</head>
5+
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
6+
<h1>pcre2_get_match_data_heapframes_size man page</h1>
7+
<p>
8+
Return to the <a href="index.html">PCRE2 index page</a>.
9+
</p>
10+
<p>
11+
This page is part of the PCRE2 HTML documentation. It was generated
12+
automatically from the original man page. If there is any nonsense in it,
13+
please consult the man page, in case the conversion went wrong.
14+
<br>
15+
<br><b>
16+
SYNOPSIS
17+
</b><br>
18+
<P>
19+
<b>#include &#60;pcre2.h&#62;</b>
20+
</P>
21+
<P>
22+
<b>PCRE2_SIZE pcre2_get_match_data_heapframes_size(pcre2_match_data *<i>match_data</i>);</b>
23+
</P>
24+
<br><b>
25+
DESCRIPTION
26+
</b><br>
27+
<P>
28+
This function returns the size, in bytes, of the heapframes data block that is its
29+
argument.
30+
</P>
31+
<P>
32+
There is a complete description of the PCRE2 native API in the
33+
<a href="pcre2api.html"><b>pcre2api</b></a>
34+
page and a description of the POSIX API in the
35+
<a href="pcre2posix.html"><b>pcre2posix</b></a>
36+
page.
37+
<p>
38+
Return to the <a href="index.html">PCRE2 index page</a>.
39+
</p>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "13 January 2023" "PCRE2 10.43"
2+
.SH NAME
3+
PCRE2 - Perl-compatible regular expressions (revised API)
4+
.SH SYNOPSIS
5+
.rs
6+
.sp
7+
.B #include <pcre2.h>
8+
.PP
9+
.nf
10+
.B PCRE2_SIZE pcre2_get_match_data_heapframes_size(pcre2_match_data *\fImatch_data\fP);
11+
.fi
12+
.
13+
.SH DESCRIPTION
14+
.rs
15+
.sp
16+
This function returns the size, in bytes, of the heapframes data block that is owned
17+
by its argument.
18+
.P
19+
There is a complete description of the PCRE2 native API in the
20+
.\" HREF
21+
\fBpcre2api\fP
22+
.\"
23+
page and a description of the POSIX API in the
24+
.\" HREF
25+
\fBpcre2posix\fP
26+
.\"
27+
page.

doc/pcre2_match_data_free.3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using the memory freeing function from the general context or compiled pattern
1919
with which it was created, or \fBfree()\fP if that was not set.
2020
.P
2121
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
22-
match data block, the copy of the subject that was remembered with the block is
22+
match data block, the copy of the subject that was referencedd within the block is
2323
also freed.
2424
.P
2525
There is a complete description of the PCRE2 native API in the

doc/pcre2test.1

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ heavily used in the test files.
642642
fullbincode show binary code with lengths
643643
/I info show info about compiled pattern
644644
hex unquoted characters are hexadecimal
645+
heapframes_size show match data heapframes size
645646
jit[=<number>] use JIT
646647
jitfast use JIT fast path
647648
jitverify verify JIT use
@@ -728,6 +729,10 @@ The \fBframesize\fP modifier shows the size, in bytes, of the storage frames
728729
used by \fBpcre2_match()\fP for handling backtracking. The size depends on the
729730
number of capturing parentheses in the pattern.
730731
.P
732+
The \fBheapframes_size\fP modifier shows the size, in bytes, of the allocated
733+
heapframes used by \fBpcre2_match()\fP and associated with the match_data.
734+
The vector is reused by all matching patterns,
735+
.P
731736
The \fBcallout_info\fP modifier requests information about all the callouts in
732737
the pattern. A list of them is output at the end of any other information that
733738
is requested. For each callout, either its number or string is given, followed

src/pcre2.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,8 @@ PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
687687
pcre2_get_mark(pcre2_match_data *); \
688688
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
689689
pcre2_get_match_data_size(pcre2_match_data *); \
690+
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
691+
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
690692
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
691693
pcre2_get_ovector_count(pcre2_match_data *); \
692694
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
@@ -851,6 +853,7 @@ pcre2_compile are called by application code. */
851853
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
852854
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
853855
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
856+
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
854857
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
855858
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
856859
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)

src/pcre2_match_data.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,4 +170,16 @@ return offsetof(pcre2_match_data, ovector) +
170170
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
171171
}
172172

173+
174+
175+
/*************************************************
176+
* Get heapframes size *
177+
*************************************************/
178+
179+
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
180+
pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
181+
{
182+
return match_data->heapframes_size;
183+
}
184+
173185
/* End of pcre2_match_data.c */

src/pcre2test.c

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,7 @@ so many of them that they are split into two fields. */
524524
#define CTL2_NULL_REPLACEMENT 0x00002000u
525525
#define CTL2_FRAMESIZE 0x00004000u
526526

527+
#define CTL2_HEAPFRAMES_SIZE 0x20000000u /* Informational */
527528
#define CTL2_NL_SET 0x40000000u /* Informational */
528529
#define CTL2_BSR_SET 0x80000000u /* Informational */
529530

@@ -682,6 +683,7 @@ static modstruct modlist[] = {
682683
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
683684
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
684685
{ "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
686+
{ "heapframes_size", MOD_PD, MOD_CTL, CTL2_HEAPFRAMES_SIZE, PO(control2) },
685687
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
686688
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
687689
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
@@ -786,8 +788,8 @@ static modstruct modlist[] = {
786788
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY| \
787789
CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
788790

789-
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_FRAMESIZE| \
790-
CTL2_NL_SET)
791+
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET| \
792+
CTL2_HEAPFRAMES_SIZE|CTL2_FRAMESIZE|CTL2_NL_SET)
791793

792794
/* Controls that apply only at compile time with 'push'. */
793795

@@ -4130,7 +4132,7 @@ Returns: nothing
41304132
static void
41314133
show_controls(uint32_t controls, uint32_t controls2, const char *before)
41324134
{
4133-
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4135+
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
41344136
before,
41354137
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
41364138
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@@ -4153,6 +4155,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
41534155
((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
41544156
((controls & CTL_GETALL) != 0)? " getall" : "",
41554157
((controls & CTL_GLOBAL) != 0)? " global" : "",
4158+
((controls & CTL2_HEAPFRAMES_SIZE) != 0)? " heapframes_size" : "",
41564159
((controls & CTL_HEXPAT) != 0)? " hex" : "",
41574160
((controls & CTL_INFO) != 0)? " info" : "",
41584161
((controls & CTL_JITFAST) != 0)? " jitfast" : "",
@@ -4357,6 +4360,31 @@ fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size);
43574360

43584361

43594362

4363+
/*************************************************
4364+
* Show heapframes size info for a match_data *
4365+
*************************************************/
4366+
4367+
static void
4368+
show_heapframes_size(void)
4369+
{
4370+
size_t heapframes_size;
4371+
#ifdef SUPPORT_PCRE2_8
4372+
if (code_unit_size == 1)
4373+
heapframes_size = pcre2_get_match_data_heapframes_size_8(match_data8);
4374+
#endif
4375+
#ifdef SUPPORT_PCRE2_16
4376+
if (code_unit_size == 2)
4377+
heapframes_size = pcre2_get_match_data_heapframes_size_16(match_data16);
4378+
#endif
4379+
#ifdef SUPPORT_PCRE2_32
4380+
if (code_unit_size == 4)
4381+
heapframes_size = pcre2_get_match_data_heapframes_size_32(match_data32);
4382+
#endif
4383+
fprintf(outfile, "Heapframes size for pcre2_match(): %" SIZ_FORM "\n", heapframes_size);
4384+
}
4385+
4386+
4387+
43604388
/*************************************************
43614389
* Get and output an error message *
43624390
*************************************************/
@@ -5971,6 +5999,7 @@ if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
59715999

59726000
if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
59736001
if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
6002+
if ((pat_patctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0) show_heapframes_size();
59746003
if ((pat_patctl.control & CTL_ANYINFO) != 0)
59756004
{
59766005
int rc = show_pattern_info();

0 commit comments

Comments
 (0)