@@ -13,6 +13,7 @@ import (
1313 "net"
1414 "net/http"
1515 "net/http/httputil"
16+ "net/url"
1617 "os"
1718 "path"
1819 "path/filepath"
@@ -2024,7 +2025,7 @@ retry:
20242025 var faviconData []byte
20252026 if scanopts .Favicon {
20262027 var err error
2027- faviconMMH3 , faviconMD5 , faviconPath , faviconData , faviconURL , err = r .HandleFaviconHash (hp , req , resp .Data , true )
2028+ faviconMMH3 , faviconMD5 , faviconPath , faviconData , faviconURL , err = r .HandleFaviconHash (hp , req , resp .Data , finalURL , true )
20282029 if err == nil {
20292030 builder .WriteString (" [" )
20302031 if ! scanopts .OutputWithNoColor {
@@ -2333,85 +2334,119 @@ func calculatePerceptionHash(screenshotBytes []byte) (uint64, error) {
23332334 return pHash .GetHash (), nil
23342335}
23352336
2336- func (r * Runner ) HandleFaviconHash (hp * httpx.HTTPX , req * retryablehttp.Request , currentResp []byte , defaultProbe bool ) (string , string , string , []byte , string , error ) {
2337+ func (r * Runner ) HandleFaviconHash (hp * httpx.HTTPX , req * retryablehttp.Request , currentResp []byte , finalURL string , defaultProbe bool ) (string , string , string , []byte , string , error ) {
23372338 // Check if current URI is ending with .ico => use current body without additional requests
23382339 if path .Ext (req .URL .Path ) == ".ico" {
2339- MMH3Hash , MD5Hash , err := r .calculateFaviconHashWithRaw (currentResp )
2340- return MMH3Hash , MD5Hash , req .URL .Path , currentResp , "" , err
2340+ mmh3 , md5h , err := r .calculateFaviconHashWithRaw (currentResp )
2341+ return mmh3 , md5h , req .URL .Path , currentResp , req . URL . String () , err
23412342 }
23422343
2343- // search in the response of the requested path for element and rel shortcut/mask/apple-touch icon
2344- // link with .ico extension (which will be prioritized if available)
2345- // if not, any of link from other icons can be requested
2346- potentialURLs , err := extractPotentialFavIconsURLs (currentResp )
2344+ // Parse HTML: collect <link rel="...icon..."> hrefs + optional <base href>
2345+ hrefs , baseHref , err := extractPotentialFavIconsURLs (currentResp )
23472346 if err != nil {
23482347 return "" , "" , "" , nil , "" , err
23492348 }
23502349
2351- clone := req .Clone (context .Background ())
2350+ // If none found and probing allowed, add default /favicon.ico
2351+ if len (hrefs ) == 0 && defaultProbe {
2352+ hrefs = append (hrefs , "/favicon.ico" )
2353+ }
23522354
2353- var faviconMMH3 , faviconMD5 , faviconPath , faviconURL string
2354- var faviconData , faviconDecodedData []byte
2355- errCount := 0
2356- if len (potentialURLs ) == 0 && defaultProbe {
2357- potentialURLs = append (potentialURLs , "/favicon.ico" )
2358- }
2359- // We only want upto two favicon requests, if the
2360- // first one fails, we will try the second one
2361- for _ , potentialURL := range potentialURLs {
2362- if errCount == 2 {
2363- break
2355+ // Determine base URL: prefer finalURL (redirect target) then apply <base href>
2356+ baseNet , _ := url .Parse (req .URL .String ())
2357+ if finalURL != "" {
2358+ if u , err := url .Parse (finalURL ); err == nil {
2359+ baseNet = u
23642360 }
2365- URL , err := urlutil .ParseURL (potentialURL , r .options .Unsafe )
2366-
2367- isFavUrl , isBase64FavIcon := err == nil , false
2368- if ! isFavUrl {
2369- isBase64FavIcon = stringz .IsBase64Icon (potentialURL )
2361+ }
2362+ if baseHref != "" {
2363+ if bu , err := url .Parse (baseHref ); err == nil {
2364+ baseNet = baseNet .ResolveReference (bu )
23702365 }
2366+ }
23712367
2372- if ! isFavUrl && ! isBase64FavIcon {
2368+ // Clone original request (reuse headers/cookies)
2369+ clone := req .Clone (context .Background ())
2370+
2371+ var (
2372+ faviconMMH3 string
2373+ faviconMD5 string
2374+ faviconPath string
2375+ faviconURL string
2376+ faviconData []byte
2377+ tries int // network fetch attempts
2378+ )
2379+
2380+ // Iterate candidates (.ico first ordering handled in extractPotentialFavIconsURLs)
2381+ for _ , raw := range hrefs {
2382+ if tries == 2 {
2383+ break
2384+ }
2385+ raw = strings .TrimSpace (raw )
2386+ if raw == "" {
23732387 continue
23742388 }
23752389
2376- if isFavUrl {
2377- if URL .IsAbs () {
2378- clone .SetURL (URL )
2379- clone .Host = URL .Host
2380- potentialURL = ""
2381- } else {
2382- potentialURL = URL .String ()
2383- }
2384-
2385- if potentialURL != "" {
2386- err = clone .UpdateRelPath (potentialURL , false )
2387- if err != nil {
2388- continue
2389- }
2390+ // data: URL (base64) favicon
2391+ if stringz .IsBase64Icon (raw ) {
2392+ data , err := stringz .DecodeBase64Icon (raw )
2393+ if err != nil {
2394+ continue
23902395 }
2391- resp , err := hp . Do ( clone , httpx. UnsafeOptions {} )
2396+ mmh3 , md5h , err := r . calculateFaviconHashWithRaw ( data )
23922397 if err != nil {
2393- errCount ++
23942398 continue
23952399 }
2396- faviconDecodedData = resp . Data
2400+ return mmh3 , md5h , "data:" , data , "" , nil
23972401 }
2398- // if the favicon is base64 encoded, decode before hashing
2399- if isBase64FavIcon {
2400- if faviconDecodedData , err = stringz .DecodeBase64Icon (potentialURL ); err != nil {
2402+
2403+ // Resolve relative/absolute href
2404+ parsedHref , err := url .Parse (raw )
2405+ if err != nil {
2406+ continue
2407+ }
2408+ resolvedNet := baseNet .ResolveReference (parsedHref )
2409+ resolvedURL , err := urlutil .ParseURL (resolvedNet .String (), r .options .Unsafe )
2410+ if err != nil {
2411+ continue
2412+ }
2413+
2414+ clone .SetURL (resolvedURL )
2415+ respFav , err := hp .Do (clone , httpx.UnsafeOptions {})
2416+ if err != nil || len (respFav .Data ) == 0 {
2417+ tries ++
2418+ // Root fallback: directory-relative failed and raw had no leading slash
2419+ if ! strings .HasPrefix (raw , "/" ) {
2420+ rootResolvedNet := baseNet .ResolveReference (& url.URL {Path : "/" + raw })
2421+ rootResolvedURL , err2 := urlutil .ParseURL (rootResolvedNet .String (), r .options .Unsafe )
2422+ if err2 != nil {
2423+ continue
2424+ }
2425+ clone .SetURL (rootResolvedURL )
2426+ if respFav2 , err3 := hp .Do (clone , httpx.UnsafeOptions {}); err3 == nil && len (respFav2 .Data ) > 0 {
2427+ respFav = respFav2
2428+ } else {
2429+ continue
2430+ }
2431+ } else {
24012432 continue
24022433 }
24032434 }
2404- MMH3Hash , MD5Hash , err := r .calculateFaviconHashWithRaw (faviconDecodedData )
2435+
2436+ // Hash favicon bytes
2437+ mmh3 , md5h , err := r .calculateFaviconHashWithRaw (respFav .Data )
24052438 if err != nil {
24062439 continue
24072440 }
2441+ faviconMMH3 = mmh3
2442+ faviconMD5 = md5h
2443+ faviconPath = raw
24082444 faviconURL = clone .URL .String ()
2409- faviconPath = potentialURL
2410- faviconMMH3 = MMH3Hash
2411- faviconMD5 = MD5Hash
2412- faviconData = faviconDecodedData
2445+ faviconData = respFav .Data
2446+ gologger .Debug ().Msgf ("favicon resolved url=%s raw_href=%s size=%d bytes" , faviconURL , faviconPath , len (faviconData ))
24132447 break
24142448 }
2449+
24152450 return faviconMMH3 , faviconMD5 , faviconPath , faviconData , faviconURL , nil
24162451}
24172452
@@ -2423,25 +2458,43 @@ func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, string, error
24232458 return fmt .Sprintf ("%d" , hashNum ), md5Hash , nil
24242459}
24252460
2426- func extractPotentialFavIconsURLs (resp []byte ) ([]string , error ) {
2427- var potentialURLs []string
2428- document , err := goquery .NewDocumentFromReader (bytes .NewReader (resp ))
2461+ func extractPotentialFavIconsURLs (resp []byte ) (candidates []string , baseHref string , err error ) {
2462+ doc , err := goquery .NewDocumentFromReader (bytes .NewReader (resp ))
24292463 if err != nil {
2430- return nil , err
2464+ return nil , "" , err
2465+ }
2466+
2467+ if b := doc .Find ("base[href]" ).First (); b .Length () == 1 {
2468+ if v , ok := b .Attr ("href" ); ok {
2469+ baseHref = strings .TrimSpace (v )
2470+ }
24312471 }
2432- document .Find ("link" ).Each (func (i int , item * goquery.Selection ) {
2433- href , okHref := item .Attr ("href" )
2434- rel , okRel := item .Attr ("rel" )
2435- isValidRel := okRel && stringsutil .EqualFoldAny (rel , "icon" , "shortcut icon" , "mask-icon" , "apple-touch-icon" )
2436- if okHref && isValidRel {
2437- potentialURLs = append (potentialURLs , href )
2472+
2473+ doc .Find ("link[rel]" ).Each (func (_ int , s * goquery.Selection ) {
2474+ rel := strings .ToLower (strings .TrimSpace (s .AttrOr ("rel" , "" )))
2475+ href := strings .TrimSpace (s .AttrOr ("href" , "" ))
2476+ if href == "" {
2477+ return
2478+ }
2479+ for _ , tok := range strings .Fields (rel ) {
2480+ switch tok {
2481+ case "icon" , "shortcut" , "shortcut-icon" , "apple-touch-icon" , "mask-icon" , "alternate" :
2482+ candidates = append (candidates , href )
2483+ return
2484+ }
24382485 }
24392486 })
2440- // Sort and prefer icon with .ico extension
2441- sort .Slice (potentialURLs , func (i , j int ) bool {
2442- return ! strings .HasSuffix (potentialURLs [i ], ".ico" )
2487+
2488+ sort .SliceStable (candidates , func (i , j int ) bool {
2489+ ai := strings .HasSuffix (strings .ToLower (candidates [i ]), ".ico" )
2490+ aj := strings .HasSuffix (strings .ToLower (candidates [j ]), ".ico" )
2491+ if ai == aj {
2492+ return candidates [i ] < candidates [j ]
2493+ }
2494+ return ai && ! aj
24432495 })
2444- return potentialURLs , nil
2496+
2497+ return candidates , baseHref , nil
24452498}
24462499
24472500// SaveResumeConfig to file
0 commit comments