libregex: fix our mapping for \w
A small oversight in our implementation of \w is that it's actually not strictly [[:alnum:]]. According to the GNU documentation, it's actually [[:alnum:]] + underscore. The fix is rather trivial: just add it to our set explicitly, and amend our test set to be sure that _ is actually included. PR: 287396
This commit is contained in:
@@ -1183,6 +1183,7 @@ p_b_pseudoclass(struct parse *p, char c) {
|
|||||||
/* PASSTHROUGH */
|
/* PASSTHROUGH */
|
||||||
case 'w':
|
case 'w':
|
||||||
p_b_cclass_named(p, cs, "alnum");
|
p_b_cclass_named(p, cs, "alnum");
|
||||||
|
CHadd(p, cs, '_');
|
||||||
break;
|
break;
|
||||||
case 'S':
|
case 'S':
|
||||||
cs->invert = 1;
|
cs->invert = 1;
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ a\|b\|c b abc a
|
|||||||
(ab)\1 - abab abab
|
(ab)\1 - abab abab
|
||||||
\1(ab) C ESUBREG
|
\1(ab) C ESUBREG
|
||||||
(a)(b)(c)(d)(e)(f)(g)(h)(i)\9 - abcdefghii abcdefghii
|
(a)(b)(c)(d)(e)(f)(g)(h)(i)\9 - abcdefghii abcdefghii
|
||||||
# \w, \W, \s, \S (alnum, ^alnum, space, ^space)
|
# \w, \W, \s, \S (_alnum, ^_alnum, space, ^space)
|
||||||
\w+ - -%@a0X- a0X
|
\w+ - -%@a_0X- a_0X
|
||||||
\w\+ b -%@a0X- a0X
|
\w\+ b -%@a_0X- a_0X
|
||||||
\s+ - aSNTb SNT
|
\s+ - aSNTb SNT
|
||||||
\s\+ b aSNTb SNT
|
\s\+ b aSNTb SNT
|
||||||
# Word boundaries (\b, \B, \<, \>, \`, \')
|
# Word boundaries (\b, \B, \<, \>, \`, \')
|
||||||
|
|||||||
Reference in New Issue
Block a user