@@ -34,6 +34,7 @@ max_allowed_length - if set, isURL will not allow URLs longer than the specified
3434
3535*/
3636
37+
3738const default_url_options = {
3839 protocols : [ 'http' , 'https' , 'ftp' ] ,
3940 require_tld : true ,
@@ -70,10 +71,7 @@ export default function isURL(url, options) {
7071 return false ;
7172 }
7273
73- if (
74- ! options . allow_query_components &&
75- ( includes ( url , '?' ) || includes ( url , '&' ) )
76- ) {
74+ if ( ! options . allow_query_components && ( includes ( url , '?' ) || includes ( url , '&' ) ) ) {
7775 return false ;
7876 }
7977
@@ -87,30 +85,91 @@ export default function isURL(url, options) {
8785
8886 // Replaced the 'split("://")' logic with a regex to match the protocol.
8987 // This correctly identifies schemes like `javascript:` which don't use `//`.
88+ // However, we need to be careful not to confuse authentication credentials (user:password@host)
89+ // with protocols. A colon before an @ symbol might be part of auth, not a protocol separator.
9090 const protocol_match = url . match ( / ^ ( [ a - z ] [ a - z 0 - 9 + \- . ] * ) : / i) ;
91- const hadExplicitProtocol = ! ! protocol_match ;
91+ let had_explicit_protocol = false ;
92+
93+ const cleanUpProtocol = ( potential_protocol ) => {
94+ had_explicit_protocol = true ;
95+ protocol = potential_protocol . toLowerCase ( ) ;
96+
97+ if ( options . require_valid_protocol && options . protocols . indexOf ( protocol ) === - 1 ) {
98+ // The identified protocol is not in the allowed list.
99+ return false ;
100+ }
101+
102+ // Remove the protocol from the URL string.
103+ return url . substring ( protocol_match [ 0 ] . length ) ;
104+ } ;
92105
93106 if ( protocol_match ) {
94- protocol = protocol_match [ 1 ] . toLowerCase ( ) ;
95- if (
96- options . require_valid_protocol &&
97- options . protocols . indexOf ( protocol ) === - 1
98- ) {
99- return false ; // The identified protocol is not in the allowed list.
107+ const potential_protocol = protocol_match [ 1 ] ;
108+ const after_colon = url . substring ( protocol_match [ 0 ] . length ) ;
109+
110+ // Check if what follows looks like authentication credentials (user:password@host)
111+ // rather than a protocol. This happens when:
112+ // 1. There's no `//` after the colon (protocols like `http://` have this)
113+ // 2. There's an `@` symbol before any `/`
114+ // 3. The part before `@` contains only valid auth characters (alphanumeric, -, _, ., %, :)
115+ const starts_with_slashes = after_colon . slice ( 0 , 2 ) === '//' ;
116+
117+ if ( ! starts_with_slashes ) {
118+ const first_slash_position = after_colon . indexOf ( '/' ) ;
119+ const before_slash = first_slash_position === - 1
120+ ? after_colon
121+ : after_colon . substring ( 0 , first_slash_position ) ;
122+ const at_position = before_slash . indexOf ( '@' ) ;
123+
124+ if ( at_position !== - 1 ) {
125+ const before_at = before_slash . substring ( 0 , at_position ) ;
126+ const valid_auth_regex = / ^ [ a - z A - Z 0 - 9 \- _ . % : ] * $ / ;
127+ const is_valid_auth = valid_auth_regex . test ( before_at ) ;
128+
129+ if ( is_valid_auth ) {
130+ // This looks like authentication (e.g., user:password@host), not a protocol
131+ if ( options . require_protocol ) {
132+ return false ;
133+ }
134+
135+ // Don't consume the colon; let the auth parsing handle it later
136+ } else {
137+ // This looks like a malicious protocol (e.g., javascript:alert();@host)
138+ url = cleanUpProtocol ( potential_protocol ) ;
139+
140+ if ( url === false ) {
141+ return false ;
142+ }
143+ }
144+ } else {
145+ // No @ symbol, this is definitely a protocol
146+ url = cleanUpProtocol ( potential_protocol ) ;
147+
148+ if ( url === false ) {
149+ return false ;
150+ }
151+ }
152+ } else {
153+ // Starts with '//', this is definitely a protocol like http://
154+ url = cleanUpProtocol ( potential_protocol ) ;
155+
156+ if ( url === false ) {
157+ return false ;
158+ }
100159 }
101- url = url . substring ( protocol_match [ 0 ] . length ) ; // Remove the protocol from the URL string.
102160 } else if ( options . require_protocol ) {
103- return false ; // A protocol was required but not found.
161+ return false ;
104162 }
105163
106164 // Handle leading '//' only as protocol-relative when there was NO explicit protocol.
107165 // If there was an explicit protocol, '//' is the normal separator
108166 // and should be stripped unconditionally.
109167 if ( url . slice ( 0 , 2 ) === '//' ) {
110- if ( ! hadExplicitProtocol && ! options . allow_protocol_relative_urls ) {
168+ if ( ! had_explicit_protocol && ! options . allow_protocol_relative_urls ) {
111169 return false ;
112170 }
113- url = url . slice ( 2 ) ; // Remove the '//' from the URL string.
171+
172+ url = url . slice ( 2 ) ;
114173 }
115174
116175 if ( url === '' ) {
0 commit comments