Skip to content

Commit c192b8c

Browse files
zherczegZoltan Herczeg
andauthored
Various code improvements for eclasses (PCRE2Project#546)
Co-authored-by: Zoltan Herczeg <[email protected]>
1 parent 16d7edb commit c192b8c

File tree

4 files changed

+206
-240
lines changed

4 files changed

+206
-240
lines changed

src/pcre2_compile.c

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2859,6 +2859,7 @@ uint32_t delimiter;
28592859
uint32_t namelen;
28602860
uint32_t class_range_state;
28612861
uint32_t class_op_state;
2862+
uint32_t *class_start;
28622863
uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */
28632864
uint32_t *verbstartptr = NULL;
28642865
uint32_t *previous_callout = NULL;
@@ -2869,9 +2870,8 @@ uint32_t *prev_parsed_item = NULL;
28692870
uint32_t meta_quantifier = 0;
28702871
uint32_t add_after_mark = 0;
28712872
uint16_t nest_depth = 0;
2872-
uint16_t class_depth = 0;
2873-
uint16_t class_maxdepth = 0;
2874-
uint8_t class_op_used[ECLASS_NEST_LIMIT];
2873+
int16_t class_depth_m1 = -1; /* The m1 means minus 1. */
2874+
int16_t class_maxdepth_m1 = -1;
28752875
int after_manual_callout = 0;
28762876
int expect_cond_assert = 0;
28772877
int errorcode = 0;
@@ -3650,10 +3650,11 @@ while (ptr < ptrend)
36503650

36513651
/* c is still set to '[' so the loop will handle the start of the class. */
36523652

3653-
class_depth = 0;
3654-
class_maxdepth = 0;
3653+
class_depth_m1 = -1;
3654+
class_maxdepth_m1 = -1;
36553655
class_range_state = RANGE_NO;
36563656
class_op_state = CLASS_OP_NONE;
3657+
class_start = NULL;
36573658

36583659
for (;;)
36593660
{
@@ -3684,7 +3685,7 @@ while (ptr < ptrend)
36843685
[.ch.] and [=ch=] ("collating elements") and fault them, as Perl
36853686
5.6 and 5.8 do. */
36863687

3687-
if (class_depth > 0 &&
3688+
if (class_depth_m1 >= 0 &&
36883689
c == CHAR_LEFT_SQUARE_BRACKET &&
36893690
ptrend - ptr >= 3 &&
36903691
(*ptr == CHAR_COLON || *ptr == CHAR_DOT ||
@@ -3792,7 +3793,7 @@ while (ptr < ptrend)
37923793
/* Check for the start of the outermost class, or the start of a nested class. */
37933794

37943795
else if (c == CHAR_LEFT_SQUARE_BRACKET &&
3795-
(class_depth == 0 || (options & PCRE2_ALT_EXTENDED_CLASS) != 0))
3796+
(class_depth_m1 < 0 || (options & PCRE2_ALT_EXTENDED_CLASS) != 0))
37963797
{
37973798
/* Tidy up the other class before starting the nested class. */
37983799
/* -[ beginning a nested class is a literal '-' */
@@ -3801,7 +3802,7 @@ while (ptr < ptrend)
38013802
parsed_pattern[-1] = CHAR_MINUS;
38023803

38033804
/* Validate nesting depth */
3804-
if (class_depth >= ECLASS_NEST_LIMIT)
3805+
if (class_depth_m1 >= ECLASS_NEST_LIMIT - 1)
38053806
{
38063807
errorcode = ERR107;
38073808
goto FAILED; /* Classes too deeply nested */
@@ -3845,11 +3846,19 @@ while (ptr < ptrend)
38453846
if (c == CHAR_RIGHT_SQUARE_BRACKET &&
38463847
(cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)
38473848
{
3849+
if (class_start != NULL)
3850+
{
3851+
PCRE2_ASSERT(class_depth_m1 >= 0);
3852+
/* Represents that the class is an extended class. */
3853+
*class_start |= CLASS_IS_ECLASS;
3854+
class_start = NULL;
3855+
}
3856+
38483857
*parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY;
38493858

38503859
/* Leave nesting depth unchanged; but check for zero depth to handle the
38513860
very first (top-level) class being empty. */
3852-
if (class_depth == 0) break;
3861+
if (class_depth_m1 < 0) break;
38533862

38543863
class_range_state = RANGE_NO; /* for processing the containing class */
38553864
class_op_state = CLASS_OP_OPERAND;
@@ -3858,12 +3867,23 @@ while (ptr < ptrend)
38583867

38593868
/* Enter a non-empty class. */
38603869

3870+
if (class_start != NULL)
3871+
{
3872+
PCRE2_ASSERT(class_depth_m1 >= 0);
3873+
/* Represents that the class is an extended class. */
3874+
*class_start |= CLASS_IS_ECLASS;
3875+
class_start = NULL;
3876+
}
3877+
3878+
class_start = parsed_pattern;
38613879
*parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS;
38623880
class_range_state = RANGE_NO;
38633881
class_op_state = CLASS_OP_NONE;
3864-
++class_depth;
3865-
class_maxdepth = class_depth > class_maxdepth ?class_depth :class_maxdepth;
3866-
class_op_used[class_depth-1] = 0; /* reset; no op seen yet at new depth */
3882+
++class_depth_m1;
3883+
if (class_maxdepth_m1 < class_depth_m1)
3884+
class_maxdepth_m1 = class_depth_m1;
3885+
/* Reset; no op seen yet at new depth. */
3886+
cb->class_op_used[class_depth_m1] = 0;
38673887

38683888
/* Implement the special start-of-class literal meaning of ']'. */
38693889
if (c == CHAR_RIGHT_SQUARE_BRACKET)
@@ -3894,10 +3914,13 @@ while (ptr < ptrend)
38943914

38953915
*parsed_pattern++ = META_CLASS_END;
38963916

3897-
if (--class_depth == 0) break;
3917+
if (--class_depth_m1 < 0) break;
38983918

38993919
class_range_state = RANGE_NO; /* for processing the containing class */
39003920
class_op_state = CLASS_OP_OPERAND;
3921+
/* The extended class flag has already
3922+
been set for the parent class. */
3923+
class_start = NULL;
39013924
}
39023925

39033926
/* Handle a set operator */
@@ -3924,13 +3947,21 @@ while (ptr < ptrend)
39243947
}
39253948

39263949
/* Check for mixed precedence. Forbid [A--B&&C]. */
3927-
if (class_op_used[class_depth-1] != 0 &&
3928-
class_op_used[class_depth-1] != (uint8_t)c)
3950+
if (cb->class_op_used[class_depth_m1] != 0 &&
3951+
cb->class_op_used[class_depth_m1] != (uint8_t)c)
39293952
{
39303953
errorcode = ERR111;
39313954
goto FAILED;
39323955
}
39333956

3957+
if (class_start != NULL)
3958+
{
3959+
PCRE2_ASSERT(class_depth_m1 >= 0);
3960+
/* Represents that the class is an extended class. */
3961+
*class_start |= CLASS_IS_ECLASS;
3962+
class_start = NULL;
3963+
}
3964+
39343965
/* Dangling '-' before an operator is a literal */
39353966
if (class_range_state == RANGE_STARTED)
39363967
parsed_pattern[-1] = CHAR_MINUS;
@@ -3940,7 +3971,7 @@ while (ptr < ptrend)
39403971
META_ECLASS_AND;
39413972
class_range_state = RANGE_NO;
39423973
class_op_state = CLASS_OP_OPERATOR;
3943-
class_op_used[class_depth-1] = (uint8_t)c;
3974+
cb->class_op_used[class_depth_m1] = (uint8_t)c;
39443975
}
39453976

39463977
/* Handle potential start of range */
@@ -4133,7 +4164,7 @@ while (ptr < ptrend)
41334164
if (ptr >= ptrend)
41344165
{
41354166
if ((options & PCRE2_ALT_EXTENDED_CLASS) != 0 &&
4136-
class_depth == 1 && class_maxdepth == 2)
4167+
class_depth_m1 == 0 && class_maxdepth_m1 == 1)
41374168
errorcode = ERR112; /* Missing terminating ']', but we saw '[ [ ]...' */
41384169
else
41394170
errorcode = ERR6; /* Missing terminating ']' */
@@ -5614,7 +5645,6 @@ for (;; pptr++)
56145645
uint32_t groupnumber;
56155646
uint32_t verbarglen, verbculen;
56165647
uint32_t subreqcuflags, subfirstcuflags;
5617-
uint32_t *end_ptr;
56185648
open_capitem *oc;
56195649
PCRE2_UCHAR mcbuffer[8];
56205650

@@ -5781,7 +5811,7 @@ for (;; pptr++)
57815811

57825812
/* Check for complex extended classes and handle them separately. */
57835813

5784-
if (!PRIV(check_class_not_nested)(pptr + 1, &end_ptr))
5814+
if ((*pptr & CLASS_IS_ECLASS) != 0)
57855815
{
57865816
previous = code;
57875817
*code++ = OP_ECLASS;
@@ -5920,13 +5950,11 @@ for (;; pptr++)
59205950

59215951
/* Now emit the OP_CLASS/OP_NCLASS/OP_XCLASS/OP_ALLANY opcode. */
59225952

5923-
if (!PRIV(compile_class_not_nested)(options, xoptions, pptr + 1, end_ptr,
5924-
&code, meta == META_CLASS_NOT,
5925-
errorcodeptr, cb, lengthptr))
5926-
return 0;
5927-
5928-
PCRE2_ASSERT(*end_ptr == META_CLASS_END);
5929-
pptr = end_ptr;
5953+
pptr = PRIV(compile_class_not_nested)(options, xoptions, pptr + 1,
5954+
&code, meta == META_CLASS_NOT,
5955+
errorcodeptr, cb, lengthptr);
5956+
if (pptr == NULL) return 0;
5957+
PCRE2_ASSERT(*pptr == META_CLASS_END);
59305958

59315959
/* If this class is the first thing in the branch, there can be no first
59325960
char setting, whatever the repeat count. Any reqcu setting must remain

src/pcre2_compile.h

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ therefore no need for it to have a length entry, so use a high value. */
182182
#define META_DATA(x) (x & 0x0000ffffu)
183183
#define META_DIFF(x,y) ((x-y)>>16)
184184

185+
/* Extended class management flags. */
186+
187+
#define CLASS_IS_ECLASS 0x1
188+
185189
/* Macro for the highest character value. */
186190

187191
#if PCRE2_CODE_UNIT_WIDTH == 8
@@ -213,7 +217,6 @@ therefore no need for it to have a length entry, so use a high value. */
213217
/* Macros for the definitions below, to prevent name collisions. */
214218

215219
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
216-
#define _pcre2_optimize_class PCRE2_SUFFIX(_pcre2_optimize_class_)
217220
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
218221
#define _pcre2_check_class_not_nested PCRE2_SUFFIX(_pcre2_check_class_not_nested_)
219222
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
@@ -232,12 +235,6 @@ posix_class_maps, and posix_substitutes. They must be kept in sync. */
232235
extern const int PRIV(posix_class_maps)[];
233236

234237

235-
/* Merge intersecting ranges of classes. */
236-
237-
class_ranges *PRIV(optimize_class)(uint32_t *start_ptr,
238-
const uint32_t *end_ptr, uint32_t options, uint32_t xoptions,
239-
compile_block *cb);
240-
241238
/* Set bits in classbits according to the property type */
242239

243240
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
@@ -261,10 +258,9 @@ BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
261258
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
262259
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
263260

264-
BOOL PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
265-
uint32_t *start_ptr, const uint32_t *end_ptr, PCRE2_UCHAR **pcode,
266-
BOOL negate_class, int *errorcodeptr, compile_block *cb,
267-
PCRE2_SIZE *lengthptr);
261+
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
262+
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class,
263+
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
268264

269265
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
270266

0 commit comments

Comments
 (0)