Skip to content

Commit 6ac09a2

Browse files
committed
Add ability to parse removeparam= as queryprune=
Related issue: - uBlockOrigin/uBlock-issues#1356 Related commit: - bde3164 It is not possible to achieve perfect compatiblity at this point, but reasonable compatibility should be achieved for a majority of instances of `removeparam=`. Notable differences: -------------------- uBO always matches in a case insensitive manner, there is no need to ask for case-insensitivity, and no need to use uppercase characters in `queryprune=` values. uBO does not escape special regex characters since the `queryprune=` values are always assumed to be literal regex expression (leaving out the documented special characters). This means `removeparam=` with characters which are special regex characters won't be properly translated and are unlikely to work properly in uBO. For example, the `queryprune` value of a filter such as `$removeparam=__xts__[0]` internally become the literal regex `/__xts__[0]/`, and consequently would not match a query parameter such as `...?__xts__[0]=...`. Notes: ------ Additionally, for performance reason, when uBO encounter a pattern-less `queryprune=` (or `removeparam=`) filter, it will try to extract a valid pattern from the `queryprune=` value. For instance, the following filter: $queryprune=utm_campaign Will be translated internally into: utm_campaign$queryprune=utm_campaign The logger will reflect this internal translation.
1 parent 80413df commit 6ac09a2

File tree

2 files changed

+74
-20
lines changed

2 files changed

+74
-20
lines changed

src/js/static-filtering-parser.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2092,6 +2092,7 @@ const netOptionTokenDescriptors = new Map([
20922092
[ 'popunder', OPTTokenPopunder | OPTNonNetworkType | OPTNonCspableType | OPTNonRedirectableType ],
20932093
[ 'popup', OPTTokenPopup | OPTNonNetworkType | OPTCanNegate | OPTNonCspableType | OPTNonRedirectableType ],
20942094
[ 'queryprune', OPTTokenQueryprune | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType | OPTNonRedirectableType ],
2095+
[ 'removeparam', OPTTokenQueryprune | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType | OPTNonRedirectableType ],
20952096
[ 'redirect', OPTTokenRedirect | OPTMustAssign | OPTAllowMayAssign | OPTModifierType ],
20962097
[ 'redirect-rule', OPTTokenRedirectRule | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType ],
20972098
[ 'script', OPTTokenScript | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ],
@@ -2147,6 +2148,7 @@ Parser.netOptionTokenIds = new Map([
21472148
[ 'popunder', OPTTokenPopunder ],
21482149
[ 'popup', OPTTokenPopup ],
21492150
[ 'queryprune', OPTTokenQueryprune ],
2151+
[ 'removeparam', OPTTokenQueryprune ],
21502152
[ 'redirect', OPTTokenRedirect ],
21512153
[ 'redirect-rule', OPTTokenRedirectRule ],
21522154
[ 'script', OPTTokenScript ],

src/js/static-net-filtering.js

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2628,7 +2628,7 @@ const FilterParser = class {
26282628
this.noTokenHash = urlTokenizer.noTokenHash;
26292629
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
26302630
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
2631-
this.reRegexToken = /[%0-9A-Za-z]+/g;
2631+
this.reToken = /[%0-9A-Za-z]+/g;
26322632
this.reRegexTokenAbort = /[\(\)\[\]]/;
26332633
this.reRegexBadPrefix = /(^|[^\\]\.|\\[%SDWsdw]|[^\\][()*+?[\\\]{}])$/;
26342634
this.reRegexBadSuffix = /^([^\\]\.|\\[%SDWsdw]|[()*+?[\]{}]|$)/;
@@ -3110,34 +3110,48 @@ const FilterParser = class {
31103110
// i.e. very common with a high probability of ending up as a miss,
31113111
// are not good. Avoid if possible. This has a significant positive
31123112
// impact on performance.
3113+
//
3114+
// For pattern-less queryprune filters, try to derive a pattern from
3115+
// the queryprune value.
31133116

31143117
makeToken() {
3115-
if ( this.pattern === '*' ) { return; }
3118+
if ( this.pattern === '*' ) {
3119+
if (
3120+
this.modifyType !== this.parser.OPTTokenQueryprune ||
3121+
this.makePatternFromQuerypruneValue() === false
3122+
) {
3123+
return;
3124+
}
3125+
}
31163126
if ( this.isRegex ) {
31173127
return this.extractTokenFromRegex();
31183128
}
3119-
const match = this.extractTokenFromPattern();
3120-
if ( match === null ) { return; }
3121-
this.token = match.token;
3122-
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
3123-
this.tokenBeg = match.pos;
3129+
this.extractTokenFromPattern();
31243130
}
31253131

31263132
// Note: a one-char token is better than a documented bad token.
31273133
extractTokenFromPattern() {
3134+
this.reToken.lastIndex = 0;
3135+
const pattern = this.pattern;
31283136
let bestMatch = null;
31293137
let bestBadness = 0x7FFFFFFF;
3130-
for ( const match of this.parser.patternTokens() ) {
3131-
const badness = match.token.length > 1
3132-
? this.badTokens.get(match.token) || 0
3138+
for (;;) {
3139+
const match = this.reToken.exec(pattern);
3140+
if ( match === null ) { break; }
3141+
const badness = match[0].length > 1
3142+
? this.badTokens.get(match[0]) || 0
31333143
: 1;
3134-
if ( badness === 0 ) { return match; }
31353144
if ( badness < bestBadness ) {
31363145
bestMatch = match;
3146+
if ( badness === 0 ) { break; }
31373147
bestBadness = badness;
31383148
}
31393149
}
3140-
return bestMatch;
3150+
if ( bestMatch !== null ) {
3151+
this.token = bestMatch[0];
3152+
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
3153+
this.tokenBeg = bestMatch.index;
3154+
}
31413155
}
31423156

31433157
// https://github.com/gorhill/uBlock/issues/2781
@@ -3147,15 +3161,16 @@ const FilterParser = class {
31473161
// Mind `\b` directives: `/\bads\b/` should result in token being `ads`,
31483162
// not `bads`.
31493163
extractTokenFromRegex() {
3150-
this.reRegexToken.lastIndex = 0;
3151-
const s = this.pattern;
3164+
this.reToken.lastIndex = 0;
3165+
const pattern = this.pattern;
3166+
let bestToken;
31523167
let bestBadness = 0x7FFFFFFF;
31533168
for (;;) {
3154-
const matches = this.reRegexToken.exec(s);
3169+
const matches = this.reToken.exec(pattern);
31553170
if ( matches === null ) { break; }
31563171
let token = matches[0];
3157-
let prefix = s.slice(0, matches.index);
3158-
let suffix = s.slice(this.reRegexToken.lastIndex);
3172+
let prefix = pattern.slice(0, matches.index);
3173+
let suffix = pattern.slice(this.reToken.lastIndex);
31593174
if (
31603175
this.reRegexTokenAbort.test(prefix) &&
31613176
this.reRegexTokenAbort.test(suffix)
@@ -3181,13 +3196,47 @@ const FilterParser = class {
31813196
? this.badTokens.get(token) || 0
31823197
: 1;
31833198
if ( badness < bestBadness ) {
3184-
this.token = token.toLowerCase();
3185-
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
3186-
this.tokenBeg = matches.index;
3199+
bestToken = token;
31873200
if ( badness === 0 ) { break; }
31883201
bestBadness = badness;
31893202
}
31903203
}
3204+
if ( bestToken !== undefined ) {
3205+
this.token = bestToken.toLowerCase();
3206+
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
3207+
}
3208+
}
3209+
3210+
makePatternFromQuerypruneValue() {
3211+
let pattern = this.modifyValue;
3212+
if ( pattern === '*' || pattern.charCodeAt(0) === 0x21 /* '!' */ ) {
3213+
return false;
3214+
}
3215+
if ( /^\w+$/.test(pattern) ) {
3216+
this.pattern = `${pattern}=`;
3217+
return true;
3218+
}
3219+
const reRegex = /^\/(.+)\/i?$/;
3220+
if ( reRegex.test(pattern) ) {
3221+
pattern = reRegex.exec(pattern)[1];
3222+
} else {
3223+
let prefix = '', suffix = '';
3224+
if ( pattern.startsWith('|') ) {
3225+
pattern = pattern.slice(1);
3226+
prefix = '\\b';
3227+
}
3228+
if ( pattern.endsWith('|') ) {
3229+
pattern = pattern.slice(0, -1);
3230+
suffix = '\\b';
3231+
}
3232+
if ( pattern.indexOf('|') !== -1 ) {
3233+
pattern = `(?:${pattern})`;
3234+
}
3235+
pattern = prefix + pattern + suffix;
3236+
}
3237+
this.pattern = pattern;
3238+
this.isRegex = true;
3239+
return true;
31913240
}
31923241

31933242
hasNoOptionUnits() {
@@ -4288,6 +4337,7 @@ FilterContainer.prototype.filterQuery = function(fctxt) {
42884337

42894338
FilterContainer.prototype.parseFilterPruneValue = function(modifier) {
42904339
const cache = {};
4340+
const reRegex = /^\/(.+)\/i?$/;
42914341
let retext = modifier.value;
42924342
if ( retext === '*' ) {
42934343
cache.all = true;
@@ -4296,6 +4346,8 @@ FilterContainer.prototype.parseFilterPruneValue = function(modifier) {
42964346
if ( cache.not ) { retext = retext.slice(1); }
42974347
if ( /^\w+$/.test(retext) ) {
42984348
retext = `^${retext}=`;
4349+
} else if ( reRegex.test(retext) ) {
4350+
retext = reRegex.exec(retext)[1];
42994351
} else {
43004352
if ( retext.startsWith('|') ) { retext = `^${retext.slice(1)}`; }
43014353
if ( retext.endsWith('|') ) { retext = `${retext.slice(0,-1)}$`; }

0 commit comments

Comments
 (0)