diff --git a/benchmark/http/proxy-should-use-proxy.js b/benchmark/http/proxy-should-use-proxy.js new file mode 100644 index 00000000000000..ad85133c326bbe --- /dev/null +++ b/benchmark/http/proxy-should-use-proxy.js @@ -0,0 +1,64 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); + +// Benchmark configuration +const bench = common.createBenchmark(main, { + hostname: [ + '127.0.0.1', + 'localhost', + 'localhost:123', + 'www.example.com', + 'example.com', + 'myexample.com', + ], + no_proxy: [ + '', + '*', + '126.255.255.1-127.0.0.255', + 'localhost', + 'localhost:123', + '127.0.0.1', + 'example.com', + '.example.com', + '*.example.com', + ], + n: [1e6], +}, { + flags: ['--expose-internals'], +}); + +function main({ hostname, no_proxy, n }) { + const { parseProxyConfigFromEnv } = require('internal/http'); + + const protocol = 'https:'; + const env = { + no_proxy, + https_proxy: `https://www.example.proxy`, + }; + const proxyConfig = parseProxyConfigFromEnv(env, protocol); + + // Warm up. + const length = 1024; + const array = []; + for (let i = 0; i < length; ++i) { + array.push(proxyConfig.shouldUseProxy(hostname)); + } + + // // Benchmark + bench.start(); + + for (let i = 0; i < n; ++i) { + const index = i % length; + array[index] = proxyConfig.shouldUseProxy(hostname); + } + + bench.end(n); + + // Verify the entries to prevent dead code elimination from making + // the benchmark invalid. + for (let i = 0; i < length; ++i) { + assert.strictEqual(typeof array[i], 'boolean'); + } +} diff --git a/lib/internal/http.js b/lib/internal/http.js index 4f250a2e70a20f..948c90a3735be0 100644 --- a/lib/internal/http.js +++ b/lib/internal/http.js @@ -58,12 +58,21 @@ function traceEnd(...args) { } function ipToInt(ip) { - const octets = ip.split('.'); let result = 0; - for (let i = 0; i < octets.length; i++) { - result = (result << 8) + NumberParseInt(octets[i]); + let multiplier = 1; + let octetShift = 0; + let code = 0; + + for (let i = ip.length - 1; i >= 0; --i) { + code = ip.charCodeAt(i); + if (code !== 46) { + result += ((code - 48) * multiplier) << octetShift; + multiplier *= 10; + } else { + octetShift += 8; + multiplier = 1; + } } - // Force unsigned 32-bit result return result >>> 0; } @@ -95,6 +104,19 @@ function ipToInt(ip) { // When the proxy protocol is HTTPS, the modified request needs to be sent after // TLS handshake with the proxy server. Same goes to the HTTPS request tunnel establishment. +/** + * @callback ProxyBypassMatchFn + * @param {string} host - Host to match against the bypass list. + * @param {string} [hostWithPort] - Host with port to match against the bypass list. + * @returns {boolean} - True if the host should be bypassed, false otherwise. + */ + +/** + * @typedef {object} ProxyConnectionOptions + * @property {string} host - Hostname of the proxy server. + * @property {number} port - Port of the proxy server. + */ + /** * Represents the proxy configuration for an agent. The built-in http and https agent * implementation have one of this when they are configured to use a proxy. @@ -105,9 +127,28 @@ function ipToInt(ip) { * @property {string} protocol - Protocol of the proxy server, e.g. 'http:' or 'https:'. * @property {string|undefined} auth - proxy-authorization header value, if username or password is provided. * @property {Array} bypassList - List of hosts to bypass the proxy. - * @property {object} proxyConnectionOptions - Options for connecting to the proxy server. + * @property {ProxyConnectionOptions} proxyConnectionOptions - Options for connecting to the proxy server. */ class ProxyConfig { + /** @type {Array} */ + #bypassList = []; + /** @type {Array} */ + #bypassMatchFns = []; + + /** @type {ProxyConnectionOptions} */ + get proxyConnectionOptions() { + return { + host: this.hostname, + port: this.port, + }; + } + + /** + * @param {string} proxyUrl - The URL of the proxy server, e.g. 'http://localhost:8080'. + * @param {boolean} [keepAlive] - Whether to keep the connection alive. + * This is not used in the current implementation but can be used in the future. + * @param {string} [noProxyList] - Comma-separated list of hosts to bypass the proxy. + */ constructor(proxyUrl, keepAlive, noProxyList) { const { host, hostname, port, protocol, username, password } = new URL(proxyUrl); this.href = proxyUrl; // Full URL of the proxy server. @@ -121,59 +162,93 @@ class ProxyConfig { const auth = `${decodeURIComponent(username)}:${decodeURIComponent(password)}`; this.auth = `Basic ${Buffer.from(auth).toString('base64')}`; } + if (noProxyList) { - this.bypassList = noProxyList.split(',').map((entry) => entry.trim().toLowerCase()); - } else { - this.bypassList = []; // No bypass list provided. + this.#bypassList = noProxyList + .split(',') + .map((entry) => entry.trim().toLowerCase()); } - this.proxyConnectionOptions = { - host: this.hostname, - port: this.port, - }; - } - // See: https://about.gitlab.com/blog/we-need-to-talk-no-proxy - // TODO(joyeecheung): share code with undici. - shouldUseProxy(hostname, port) { - const bypassList = this.bypassList; - if (this.bypassList.length === 0) { - return true; // No bypass list, always use the proxy. + if (this.#bypassList.length === 0) { + this.shouldUseProxy = () => true; // No bypass list, always use the proxy. + } else if (this.#bypassList.includes('*')) { + this.shouldUseProxy = () => false; // '*' in the bypass list means to bypass all hosts. + } else { + this.#buildBypassMatchFns(); + // Use the bypass match functions to determine if the proxy should be used. + this.shouldUseProxy = this.#match; } + } - const host = hostname.toLowerCase(); - const hostWithPort = port ? `${host}:${port}` : host; - - for (let i = 0; i < bypassList.length; i++) { - const entry = bypassList[i]; - - if (entry === '*') return false; // * bypasses all hosts. - if (entry === host || entry === hostWithPort) return false; // Matching host and host:port - - // Follow curl's behavior: strip leading dot before matching suffixes. - if (entry.startsWith('.')) { - const suffix = entry.substring(1); - if (host.endsWith(suffix)) return false; + #buildBypassMatchFns(bypassList = this.#bypassList) { + this.#bypassMatchFns = []; + + for (const entry of this.#bypassList) { + if ( + // Handle wildcard entries like *.example.com + entry.startsWith('*.') || + // Follow curl's behavior: strip leading dot before matching suffixes. + entry.startsWith('.') + ) { + const suffix = entry.split(''); + suffix.shift(); // Remove the leading dot or asterisk. + const suffixLength = suffix.length; + if (suffixLength === 0) { + // If the suffix is empty, it means to match all hosts. + this.#bypassMatchFns.push(() => true); + continue; + } + this.#bypassMatchFns.push((host) => { + const offset = host.length - suffixLength; + if (offset < 0) return false; // Host is shorter than the suffix. + for (let i = 0; i < suffixLength; i++) { + if (host[offset + i] !== suffix[i]) { + return false; + } + } + return true; + }); + continue; } - // Handle wildcards like *.example.com - if (entry.startsWith('*.') && host.endsWith(entry.substring(1))) return false; - // Handle IP ranges (simple format like 192.168.1.0-192.168.1.255) // TODO(joyeecheung): support IPv6. - if (entry.includes('-') && isIPv4(host)) { - let { 0: startIP, 1: endIP } = entry.split('-'); - startIP = startIP.trim(); - endIP = endIP.trim(); - if (startIP && endIP && isIPv4(startIP) && isIPv4(endIP)) { - const hostInt = ipToInt(host); - const startInt = ipToInt(startIP); - const endInt = ipToInt(endIP); - if (hostInt >= startInt && hostInt <= endInt) return false; - } + const { 0: startIP, 1: endIP } = entry.split('-').map((ip) => ip.trim()); + if (entry.includes('-') && startIP && endIP && isIPv4(startIP) && isIPv4(endIP)) { + const startInt = ipToInt(startIP); + const endInt = ipToInt(endIP); + this.#bypassMatchFns.push((host) => { + if (isIPv4(host)) { + const hostInt = ipToInt(host); + return hostInt >= startInt && hostInt <= endInt; + } + return false; + }); + continue; } - // It might be useful to support CIDR notation, but it's not so widely supported - // in other tools as a de-facto standard to follow, so we don't implement it for now. + // Handle simple host or IP entries + this.#bypassMatchFns.push((host, hostWithPort) => { + return (host === entry || hostWithPort === entry); + }); + } + } + + get bypassList() { + // Return a copy of the bypass list to prevent external modification. + return [...this.#bypassList]; + } + + // See: https://about.gitlab.com/blog/we-need-to-talk-no-proxy + // TODO(joyeecheung): share code with undici. + #match(hostname, port) { + const host = hostname.toLowerCase(); + const hostWithPort = port ? `${host}:${port}` : host; + + for (let i = 0; i < this.#bypassMatchFns.length; i++) { + if (this.#bypassMatchFns[i](host, hostWithPort)) { + return false; // If any bypass function matches, do not use the proxy. + } } return true; // If no matches found, use the proxy.