Skip to content

Commit 04fd376

Browse files
authored
Merge branch 'dev' into patch-1
2 parents d6e95e1 + faac44c commit 04fd376

File tree

2 files changed

+119
-66
lines changed

2 files changed

+119
-66
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Base
2-
FROM golang:1.21.4-alpine AS builder
2+
FROM golang:1.24.5-alpine AS builder
33

44
RUN apk add --no-cache git build-base gcc musl-dev
55
WORKDIR /app

runner/runner.go

Lines changed: 118 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"net"
1414
"net/http"
1515
"net/http/httputil"
16+
"net/url"
1617
"os"
1718
"path"
1819
"path/filepath"
@@ -2024,7 +2025,7 @@ retry:
20242025
var faviconData []byte
20252026
if scanopts.Favicon {
20262027
var err error
2027-
faviconMMH3, faviconMD5, faviconPath, faviconData, faviconURL, err = r.HandleFaviconHash(hp, req, resp.Data, true)
2028+
faviconMMH3, faviconMD5, faviconPath, faviconData, faviconURL, err = r.HandleFaviconHash(hp, req, resp.Data, finalURL, true)
20282029
if err == nil {
20292030
builder.WriteString(" [")
20302031
if !scanopts.OutputWithNoColor {
@@ -2333,85 +2334,119 @@ func calculatePerceptionHash(screenshotBytes []byte) (uint64, error) {
23332334
return pHash.GetHash(), nil
23342335
}
23352336

2336-
func (r *Runner) HandleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request, currentResp []byte, defaultProbe bool) (string, string, string, []byte, string, error) {
2337+
func (r *Runner) HandleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request, currentResp []byte, finalURL string, defaultProbe bool) (string, string, string, []byte, string, error) {
23372338
// Check if current URI is ending with .ico => use current body without additional requests
23382339
if path.Ext(req.URL.Path) == ".ico" {
2339-
MMH3Hash, MD5Hash, err := r.calculateFaviconHashWithRaw(currentResp)
2340-
return MMH3Hash, MD5Hash, req.URL.Path, currentResp, "", err
2340+
mmh3, md5h, err := r.calculateFaviconHashWithRaw(currentResp)
2341+
return mmh3, md5h, req.URL.Path, currentResp, req.URL.String(), err
23412342
}
23422343

2343-
// search in the response of the requested path for element and rel shortcut/mask/apple-touch icon
2344-
// link with .ico extension (which will be prioritized if available)
2345-
// if not, any of link from other icons can be requested
2346-
potentialURLs, err := extractPotentialFavIconsURLs(currentResp)
2344+
// Parse HTML: collect <link rel="...icon..."> hrefs + optional <base href>
2345+
hrefs, baseHref, err := extractPotentialFavIconsURLs(currentResp)
23472346
if err != nil {
23482347
return "", "", "", nil, "", err
23492348
}
23502349

2351-
clone := req.Clone(context.Background())
2350+
// If none found and probing allowed, add default /favicon.ico
2351+
if len(hrefs) == 0 && defaultProbe {
2352+
hrefs = append(hrefs, "/favicon.ico")
2353+
}
23522354

2353-
var faviconMMH3, faviconMD5, faviconPath, faviconURL string
2354-
var faviconData, faviconDecodedData []byte
2355-
errCount := 0
2356-
if len(potentialURLs) == 0 && defaultProbe {
2357-
potentialURLs = append(potentialURLs, "/favicon.ico")
2358-
}
2359-
// We only want upto two favicon requests, if the
2360-
// first one fails, we will try the second one
2361-
for _, potentialURL := range potentialURLs {
2362-
if errCount == 2 {
2363-
break
2355+
// Determine base URL: prefer finalURL (redirect target) then apply <base href>
2356+
baseNet, _ := url.Parse(req.URL.String())
2357+
if finalURL != "" {
2358+
if u, err := url.Parse(finalURL); err == nil {
2359+
baseNet = u
23642360
}
2365-
URL, err := urlutil.ParseURL(potentialURL, r.options.Unsafe)
2366-
2367-
isFavUrl, isBase64FavIcon := err == nil, false
2368-
if !isFavUrl {
2369-
isBase64FavIcon = stringz.IsBase64Icon(potentialURL)
2361+
}
2362+
if baseHref != "" {
2363+
if bu, err := url.Parse(baseHref); err == nil {
2364+
baseNet = baseNet.ResolveReference(bu)
23702365
}
2366+
}
23712367

2372-
if !isFavUrl && !isBase64FavIcon {
2368+
// Clone original request (reuse headers/cookies)
2369+
clone := req.Clone(context.Background())
2370+
2371+
var (
2372+
faviconMMH3 string
2373+
faviconMD5 string
2374+
faviconPath string
2375+
faviconURL string
2376+
faviconData []byte
2377+
tries int // network fetch attempts
2378+
)
2379+
2380+
// Iterate candidates (.ico first ordering handled in extractPotentialFavIconsURLs)
2381+
for _, raw := range hrefs {
2382+
if tries == 2 {
2383+
break
2384+
}
2385+
raw = strings.TrimSpace(raw)
2386+
if raw == "" {
23732387
continue
23742388
}
23752389

2376-
if isFavUrl {
2377-
if URL.IsAbs() {
2378-
clone.SetURL(URL)
2379-
clone.Host = URL.Host
2380-
potentialURL = ""
2381-
} else {
2382-
potentialURL = URL.String()
2383-
}
2384-
2385-
if potentialURL != "" {
2386-
err = clone.UpdateRelPath(potentialURL, false)
2387-
if err != nil {
2388-
continue
2389-
}
2390+
// data: URL (base64) favicon
2391+
if stringz.IsBase64Icon(raw) {
2392+
data, err := stringz.DecodeBase64Icon(raw)
2393+
if err != nil {
2394+
continue
23902395
}
2391-
resp, err := hp.Do(clone, httpx.UnsafeOptions{})
2396+
mmh3, md5h, err := r.calculateFaviconHashWithRaw(data)
23922397
if err != nil {
2393-
errCount++
23942398
continue
23952399
}
2396-
faviconDecodedData = resp.Data
2400+
return mmh3, md5h, "data:", data, "", nil
23972401
}
2398-
// if the favicon is base64 encoded, decode before hashing
2399-
if isBase64FavIcon {
2400-
if faviconDecodedData, err = stringz.DecodeBase64Icon(potentialURL); err != nil {
2402+
2403+
// Resolve relative/absolute href
2404+
parsedHref, err := url.Parse(raw)
2405+
if err != nil {
2406+
continue
2407+
}
2408+
resolvedNet := baseNet.ResolveReference(parsedHref)
2409+
resolvedURL, err := urlutil.ParseURL(resolvedNet.String(), r.options.Unsafe)
2410+
if err != nil {
2411+
continue
2412+
}
2413+
2414+
clone.SetURL(resolvedURL)
2415+
respFav, err := hp.Do(clone, httpx.UnsafeOptions{})
2416+
if err != nil || len(respFav.Data) == 0 {
2417+
tries++
2418+
// Root fallback: directory-relative failed and raw had no leading slash
2419+
if !strings.HasPrefix(raw, "/") {
2420+
rootResolvedNet := baseNet.ResolveReference(&url.URL{Path: "/" + raw})
2421+
rootResolvedURL, err2 := urlutil.ParseURL(rootResolvedNet.String(), r.options.Unsafe)
2422+
if err2 != nil {
2423+
continue
2424+
}
2425+
clone.SetURL(rootResolvedURL)
2426+
if respFav2, err3 := hp.Do(clone, httpx.UnsafeOptions{}); err3 == nil && len(respFav2.Data) > 0 {
2427+
respFav = respFav2
2428+
} else {
2429+
continue
2430+
}
2431+
} else {
24012432
continue
24022433
}
24032434
}
2404-
MMH3Hash, MD5Hash, err := r.calculateFaviconHashWithRaw(faviconDecodedData)
2435+
2436+
// Hash favicon bytes
2437+
mmh3, md5h, err := r.calculateFaviconHashWithRaw(respFav.Data)
24052438
if err != nil {
24062439
continue
24072440
}
2441+
faviconMMH3 = mmh3
2442+
faviconMD5 = md5h
2443+
faviconPath = raw
24082444
faviconURL = clone.URL.String()
2409-
faviconPath = potentialURL
2410-
faviconMMH3 = MMH3Hash
2411-
faviconMD5 = MD5Hash
2412-
faviconData = faviconDecodedData
2445+
faviconData = respFav.Data
2446+
gologger.Debug().Msgf("favicon resolved url=%s raw_href=%s size=%d bytes", faviconURL, faviconPath, len(faviconData))
24132447
break
24142448
}
2449+
24152450
return faviconMMH3, faviconMD5, faviconPath, faviconData, faviconURL, nil
24162451
}
24172452

@@ -2423,25 +2458,43 @@ func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, string, error
24232458
return fmt.Sprintf("%d", hashNum), md5Hash, nil
24242459
}
24252460

2426-
func extractPotentialFavIconsURLs(resp []byte) ([]string, error) {
2427-
var potentialURLs []string
2428-
document, err := goquery.NewDocumentFromReader(bytes.NewReader(resp))
2461+
func extractPotentialFavIconsURLs(resp []byte) (candidates []string, baseHref string, err error) {
2462+
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp))
24292463
if err != nil {
2430-
return nil, err
2464+
return nil, "", err
2465+
}
2466+
2467+
if b := doc.Find("base[href]").First(); b.Length() == 1 {
2468+
if v, ok := b.Attr("href"); ok {
2469+
baseHref = strings.TrimSpace(v)
2470+
}
24312471
}
2432-
document.Find("link").Each(func(i int, item *goquery.Selection) {
2433-
href, okHref := item.Attr("href")
2434-
rel, okRel := item.Attr("rel")
2435-
isValidRel := okRel && stringsutil.EqualFoldAny(rel, "icon", "shortcut icon", "mask-icon", "apple-touch-icon")
2436-
if okHref && isValidRel {
2437-
potentialURLs = append(potentialURLs, href)
2472+
2473+
doc.Find("link[rel]").Each(func(_ int, s *goquery.Selection) {
2474+
rel := strings.ToLower(strings.TrimSpace(s.AttrOr("rel", "")))
2475+
href := strings.TrimSpace(s.AttrOr("href", ""))
2476+
if href == "" {
2477+
return
2478+
}
2479+
for _, tok := range strings.Fields(rel) {
2480+
switch tok {
2481+
case "icon", "shortcut", "shortcut-icon", "apple-touch-icon", "mask-icon", "alternate":
2482+
candidates = append(candidates, href)
2483+
return
2484+
}
24382485
}
24392486
})
2440-
// Sort and prefer icon with .ico extension
2441-
sort.Slice(potentialURLs, func(i, j int) bool {
2442-
return !strings.HasSuffix(potentialURLs[i], ".ico")
2487+
2488+
sort.SliceStable(candidates, func(i, j int) bool {
2489+
ai := strings.HasSuffix(strings.ToLower(candidates[i]), ".ico")
2490+
aj := strings.HasSuffix(strings.ToLower(candidates[j]), ".ico")
2491+
if ai == aj {
2492+
return candidates[i] < candidates[j]
2493+
}
2494+
return ai && !aj
24432495
})
2444-
return potentialURLs, nil
2496+
2497+
return candidates, baseHref, nil
24452498
}
24462499

24472500
// SaveResumeConfig to file

0 commit comments

Comments
 (0)