|
88 | 88 | # NOTE: not an error under PCRE/PRE:
|
89 | 89 | (r'\u', '', SYNTAX_ERROR), # A Perl escape
|
90 | 90 | # (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
|
91 |
| - (r'\xff', '\377', SUCCEED, 'found', chr(255)), |
92 | 91 | # new \x semantics
|
93 | 92 | (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
|
94 | 93 | (r'\x00f', '\017', FAIL, 'found', chr(15)),
|
|
273 | 272 | # Test octal escapes/memory references
|
274 | 273 |
|
275 | 274 | ('\\1', 'a', SYNTAX_ERROR),
|
276 |
| - ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), |
277 |
| - ('\\141', 'a', SUCCEED, 'found', 'a'), |
278 |
| - ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), |
279 | 275 |
|
280 | 276 | # All tests from Perl
|
281 | 277 |
|
282 |
| - ('abc', 'abc', SUCCEED, 'found', 'abc'), |
283 |
| - ('abc', 'xbc', FAIL), |
284 |
| - ('abc', 'axc', FAIL), |
285 |
| - ('abc', 'abx', FAIL), |
286 |
| - ('abc', 'xabcy', SUCCEED, 'found', 'abc'), |
287 |
| - ('abc', 'ababc', SUCCEED, 'found', 'abc'), |
288 |
| - ('ab*c', 'abc', SUCCEED, 'found', 'abc'), |
289 |
| - ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), |
290 |
| - ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), |
291 |
| - ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), |
292 | 278 | ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
|
293 |
| - ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), |
294 |
| - ('ab+bc', 'abc', FAIL), |
295 |
| - ('ab+bc', 'abq', FAIL), |
296 | 279 | ('ab{1,}bc', 'abq', FAIL),
|
297 |
| - ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), |
298 | 280 | ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
|
299 | 281 | ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
|
300 | 282 | ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
|
301 | 283 | ('ab{4,5}bc', 'abbbbc', FAIL),
|
302 |
| - ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), |
303 |
| - ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), |
304 | 284 | ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
|
305 |
| - ('ab?bc', 'abbbbc', FAIL), |
306 |
| - ('ab?c', 'abc', SUCCEED, 'found', 'abc'), |
307 | 285 | ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
|
308 |
| - ('^abc$', 'abc', SUCCEED, 'found', 'abc'), |
309 |
| - ('^abc$', 'abcc', FAIL), |
310 |
| - ('^abc', 'abcc', SUCCEED, 'found', 'abc'), |
311 |
| - ('^abc$', 'aabc', FAIL), |
312 |
| - ('abc$', 'aabc', SUCCEED, 'found', 'abc'), |
313 | 286 | ('^', 'abc', SUCCEED, 'found', ''),
|
314 | 287 | ('$', 'abc', SUCCEED, 'found', ''),
|
315 |
| - ('a.c', 'abc', SUCCEED, 'found', 'abc'), |
316 |
| - ('a.c', 'axc', SUCCEED, 'found', 'axc'), |
317 |
| - ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), |
318 |
| - ('a.*c', 'axyzd', FAIL), |
319 |
| - ('a[bc]d', 'abc', FAIL), |
320 |
| - ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), |
321 |
| - ('a[b-d]e', 'abd', FAIL), |
322 |
| - ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), |
323 |
| - ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), |
324 |
| - ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), |
325 | 288 | ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
|
326 | 289 | ('a[b-a]', '-', SYNTAX_ERROR),
|
327 |
| - ('a[]b', '-', SYNTAX_ERROR), |
328 |
| - ('a[', '-', SYNTAX_ERROR), |
329 |
| - ('a]', 'a]', SUCCEED, 'found', 'a]'), |
330 |
| - ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), |
331 |
| - ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), |
332 |
| - ('a[^bc]d', 'abd', FAIL), |
333 |
| - ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), |
334 |
| - ('a[^-b]c', 'a-c', FAIL), |
335 |
| - ('a[^]b]c', 'a]c', FAIL), |
336 |
| - ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), |
337 |
| - ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), |
338 |
| - ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), |
339 |
| - ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), |
340 | 290 | ('*a', '-', SYNTAX_ERROR),
|
341 | 291 | ('(*)b', '-', SYNTAX_ERROR),
|
342 |
| - ('$b', 'b', FAIL), |
343 |
| - ('a\\', '-', SYNTAX_ERROR), |
344 |
| - ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), |
345 |
| - ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), |
346 |
| - ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), |
347 |
| - ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), |
348 |
| - ('abc)', '-', SYNTAX_ERROR), |
349 |
| - ('(abc', '-', SYNTAX_ERROR), |
350 |
| - ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), |
351 |
| - ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), |
352 |
| - ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), |
353 | 292 | ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
|
354 | 293 | ('a**', '-', SYNTAX_ERROR),
|
355 | 294 | ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
|
356 |
| - ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), |
357 | 295 | ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
|
358 |
| - ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), |
359 | 296 | ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
|
360 |
| - ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), |
361 | 297 | ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
|
362 |
| - (')(', '-', SYNTAX_ERROR), |
363 |
| - ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), |
364 |
| - ('abc', '', FAIL), |
365 |
| - ('a*', '', SUCCEED, 'found', ''), |
366 | 298 | ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
|
367 | 299 | ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
|
368 |
| - ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), |
369 |
| - ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), |
370 |
| - ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), |
371 |
| - ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), |
372 |
| - ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), |
373 |
| - ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), |
374 |
| - ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), |
375 | 300 | ('^(ab|cd)e', 'abcde', FAIL),
|
376 |
| - ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), |
377 |
| - ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), |
378 |
| - ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), |
379 |
| - ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), |
380 |
| - ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), |
381 |
| - ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), |
382 |
| - ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), |
383 |
| - ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), |
384 |
| - ('a[bcd]+dcdcde', 'adcdcde', FAIL), |
385 |
| - ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), |
386 |
| - ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), |
387 |
| - ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), |
388 |
| - ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), |
389 |
| - ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), |
390 |
| - ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), |
391 |
| - ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), |
392 |
| - ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), |
393 |
| - ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), |
394 | 301 | ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
|
395 | 302 | ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
|
396 | 303 | # Python does not have the same rules for \\41 so this is a syntax error
|
397 | 304 | # ('((((((((((a))))))))))\\41', 'aa', FAIL),
|
398 | 305 | # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
|
399 | 306 | ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
|
400 | 307 | ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
|
401 |
| - ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), |
402 |
| - ('multiple words of text', 'uh-uh', FAIL), |
403 |
| - ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), |
404 |
| - ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), |
405 |
| - ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), |
406 |
| - ('[k]', 'ab', FAIL), |
407 |
| - ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), |
408 |
| - ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), |
409 |
| - ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), |
410 | 308 | ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
|
411 | 309 | ('(?i)abc', 'XBC', FAIL),
|
412 | 310 | ('(?i)abc', 'AXC', FAIL),
|
|
546 | 444 | ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
|
547 | 445 | ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
|
548 | 446 | ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
|
549 |
| - ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), |
550 | 447 |
|
551 | 448 | # lookbehind: split by : but not if it is escaped by -.
|
552 | 449 | ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
|
|
586 | 483 | xyzabc
|
587 | 484 | 123""", SUCCEED, 'found', 'abc'),
|
588 | 485 |
|
589 |
| - # using the s embedded pattern modifier |
590 | 486 |
|
591 |
| - ('a.b', 'a\nb', FAIL), |
592 |
| - ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), |
593 | 487 |
|
594 | 488 | # test \w, etc. both inside and outside character classes
|
595 | 489 |
|
|
0 commit comments