Skip to content

Commit 9e039d5

Browse files
committed
Async: Use Mutex to prevent data race amongst various goroutines
1 parent ed2ef64 commit 9e039d5

File tree

4 files changed

+64
-32
lines changed

4 files changed

+64
-32
lines changed

engine/besthdmovies.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,13 @@ func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex i
8989
return movie, nil
9090
}
9191

92-
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
92+
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
9393
submissionDetails := make(map[string]string)
9494
// Update movie download link if div.post-single-content on page
9595
downloadCollector.OnHTML("div.post-single-content", func(e *colly.HTMLElement) {
96-
movie := getMovieFromMovies(e.Request, movies)
96+
movie := getMovieFromMovies(e.Request, scrapedMovies)
97+
scrapedMovies.Lock()
98+
defer scrapedMovies.Unlock()
9799
ptags := e.ChildTexts("p")
98100
if ptags[len(ptags)-3] >= ptags[len(ptags)-2] {
99101
movie.Description = strings.TrimSpace(ptags[len(ptags)-3])
@@ -120,7 +122,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
120122
})
121123

122124
downloadCollector.OnHTML("div.content-area", func(e *colly.HTMLElement) {
123-
movie := getMovieFromMovies(e.Request, movies)
125+
movie := getMovieFromMovies(e.Request, scrapedMovies)
124126
links := e.ChildAttrs("a", "href")
125127
for _, link := range links {
126128
if strings.HasPrefix(link, "https://zeefiles") || strings.HasPrefix(link, "http://zeefiles") {
@@ -130,6 +132,8 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
130132
}
131133
downloadlink, err := url.Parse(link)
132134
if err == nil {
135+
scrapedMovies.Lock()
136+
defer scrapedMovies.Unlock()
133137
movie.DownloadLink = downloadlink
134138
downloadCollector.Visit(downloadlink.String())
135139
} else {
@@ -140,11 +144,13 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
140144
})
141145

142146
downloadCollector.OnHTML("div.freeDownload", func(e *colly.HTMLElement) {
143-
movie := getMovieFromMovies(e.Request, movies)
147+
movie := getMovieFromMovies(e.Request, scrapedMovies)
144148
zeesubmission := make(map[string]string)
145149
if e.ChildAttr("a.link_button", "href") != "" {
146150
downloadlink, err := url.Parse(e.ChildAttr("a.link_button", "href"))
147151
if err == nil {
152+
scrapedMovies.Lock()
153+
defer scrapedMovies.Unlock()
148154
movie.DownloadLink = downloadlink
149155
}
150156
} else {
@@ -165,7 +171,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
165171

166172
downloadCollector.OnHTML("form[method=post]", func(e *colly.HTMLElement) {
167173
var err error
168-
movie := getMovieFromMovies(e.Request, movies)
174+
movie := getMovieFromMovies(e.Request, scrapedMovies)
169175
downloadlink := movie.DownloadLink
170176
inputNames := e.ChildAttrs("input", "name")
171177
inputValues := e.ChildAttrs("input", "value")
@@ -188,7 +194,9 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
188194

189195
downloadCollector.OnHTML("video", func(e *colly.HTMLElement) {
190196
downloadlink := e.ChildAttr("source", "src")
191-
movie := getMovieFromMovies(e.Request, movies)
197+
movie := getMovieFromMovies(e.Request, scrapedMovies)
198+
scrapedMovies.Lock()
199+
defer scrapedMovies.Unlock()
192200
movie.DownloadLink, _ = url.Parse(downloadlink)
193201
})
194202
}

engine/engines.go

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net/url"
88
"strconv"
99
"strings"
10+
"sync"
1011

1112
"github.com/gocolly/colly/v2"
1213
// "github.com/gocolly/colly/v2/debug"
@@ -43,7 +44,14 @@ type Engine interface {
4344
getParseAttrs() (string, string, error)
4445

4546
// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
46-
updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie)
47+
updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped)
48+
}
49+
50+
// All scraped movies are stored here. Since accessed on different goroutine
51+
// Mutex to prevent Data Race
52+
type scraped struct {
53+
movies map[string]*Movie
54+
sync.Mutex
4755
}
4856

4957
// Scrape : Parse queries a url and return results
@@ -58,10 +66,10 @@ func Scrape(engine Engine) ([]Movie, error) {
5866
// Another collector for download Links
5967
downloadLinkCollector := c.Clone()
6068

61-
var movies = make(map[string]*Movie)
69+
scrapedMovies := scraped{movies: make(map[string]*Movie)}
6270

6371
// Any Extras setup for downloads using can be specified in the function
64-
engine.updateDownloadProps(downloadLinkCollector, movies)
72+
engine.updateDownloadProps(downloadLinkCollector, &scrapedMovies)
6573

6674
main, article, err := engine.getParseAttrs()
6775
if err != nil {
@@ -75,8 +83,10 @@ func Scrape(engine Engine) ([]Movie, error) {
7583
log.Errorf("%v could not be parsed", movie)
7684
} else {
7785
// Using DownloadLink as key to movie makes it unique
86+
scrapedMovies.Lock()
87+
defer scrapedMovies.Unlock()
7888
m := strconv.Itoa(movieIndex)
79-
movies[m] = &movie
89+
scrapedMovies.movies[m] = &movie
8090
ctx := colly.NewContext()
8191
ctx.Put("movieIndex", m)
8292
downloadLinkCollector.Request("GET", movie.DownloadLink.String(), nil, ctx, nil)
@@ -99,35 +109,32 @@ func Scrape(engine Engine) ([]Movie, error) {
99109
// movie details when we need it
100110
downloadLinkCollector.OnRequest(func(r *colly.Request) {
101111
r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml")
102-
movie := getMovieFromMovies(r, movies)
103-
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
112+
movie := getMovieFromMovies(r, &scrapedMovies)
113+
log.Debugf("Retrieving Download Link %s\n", movie.Title)
104114
})
105115

106116
// If Response Content Type is not Text, Abort the Request to prevent fully downloading the
107117
// body in case of other types like mp4
108118
downloadLinkCollector.OnResponseHeaders(func(r *colly.Response) {
109-
log.Infof("%s", r.Headers)
110119
if !strings.Contains(r.Headers.Get("Content-Type"), "text") {
111-
log.Errorf("Response %s is not text/html. Aborting request", r.Request.URL)
120+
log.Debugf("Response %s is not text/html. Aborting request", r.Request.URL)
112121
r.Request.Abort()
113122
}
114123
})
115124

116125
downloadLinkCollector.OnResponse(func(r *colly.Response) {
117-
movie := getMovieFromMovies(r.Request, movies)
118-
log.Infof("Movie on Response %v", movie)
119-
// prettyPrint([]Movie{*movie})
120-
// log.Debugf("Retrieved Download Page %s\n", movie.DownloadLink.String())
126+
movie := getMovieFromMovies(r.Request, &scrapedMovies)
127+
log.Debugf("Retrieved Download Page %s\n", movie.Title)
121128
})
122129

123130
c.Visit(engine.getParseURL().String())
124131
c.Wait()
125132
downloadLinkCollector.Wait()
126133

127134
// Create a List of Movies
128-
v := make([]Movie, 0, len(movies))
135+
v := make([]Movie, 0, len(scrapedMovies.movies))
129136

130-
for _, value := range movies {
137+
for _, value := range scrapedMovies.movies {
131138
v = append(v, *value)
132139
}
133140
prettyPrint(v)
@@ -241,10 +248,12 @@ func getMovieIndexFromCtx(r *colly.Request) int {
241248
}
242249

243250
// Get Movie from a Context
244-
func getMovieFromMovies(r *colly.Request, movies map[string]*Movie) *Movie {
251+
func getMovieFromMovies(r *colly.Request, scrapedMovies *scraped) *Movie {
245252
movieIndex := r.Ctx.Get("movieIndex")
246-
if _, ok := movies[movieIndex]; ok {
247-
return movies[movieIndex]
253+
scrapedMovies.Lock()
254+
defer scrapedMovies.Unlock()
255+
if _, ok := scrapedMovies.movies[movieIndex]; ok {
256+
return scrapedMovies.movies[movieIndex]
248257
}
249258
return &Movie{}
250259
}

engine/fzmovies.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,19 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex int)
9090
return movie, nil
9191
}
9292

93-
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
93+
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
9494
// Update movie download link if ul.downloadlinks on page
9595
downloadCollector.OnHTML("ul.moviesfiles", func(e *colly.HTMLElement) {
96-
movie := getMovieFromMovies(e.Request, movies)
96+
movie := getMovieFromMovies(e.Request, scrapedMovies)
9797
link := strings.Replace(e.ChildAttr("a", "href"), "download1.php", "download.php", 1)
9898
downloadLink, err := url.Parse(e.Request.AbsoluteURL(link + "&pt=jRGarGzOo2"))
9999
// downloadLink, err := url.Parse(e.ChildAttr("a", "href") + "&pt=jRGarGzOo2")
100100
if err != nil {
101101
log.Fatal(err)
102102
}
103+
104+
scrapedMovies.Lock()
105+
defer scrapedMovies.Unlock()
103106
movie.DownloadLink = downloadLink
104107
re := regexp.MustCompile(`(.* MB)`)
105108
dl := strings.TrimPrefix(re.FindStringSubmatch(e.ChildText("dcounter"))[0], "(")
@@ -114,7 +117,9 @@ func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector,
114117
if err != nil {
115118
log.Fatal(err)
116119
}
117-
movie := getMovieFromMovies(e.Request, movies)
120+
movie := getMovieFromMovies(e.Request, scrapedMovies)
121+
scrapedMovies.Lock()
122+
defer scrapedMovies.Unlock()
118123
movie.DownloadLink = downloadLink
119124
}
120125
})

engine/netnaija.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,12 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex
115115
return movie, nil
116116
}
117117

118-
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
118+
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
119119
// Update movie size
120120
downloadCollector.OnHTML("button[id=download-button]", func(e *colly.HTMLElement) {
121-
movie := getMovieFromMovies(e.Request, movies)
121+
movie := getMovieFromMovies(e.Request, scrapedMovies)
122+
scrapedMovies.Lock()
123+
defer scrapedMovies.Unlock()
122124
movie.Size = strings.TrimSpace(e.ChildText("span.size"))
123125
})
124126

@@ -127,14 +129,18 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
127129
if err != nil {
128130
log.Fatal(err)
129131
}
130-
movie := getMovieFromMovies(e.Request, movies)
132+
movie := getMovieFromMovies(e.Request, scrapedMovies)
133+
scrapedMovies.Lock()
134+
defer scrapedMovies.Unlock()
131135
movie.DownloadLink = downloadLink
132136
downloadCollector.Visit(downloadLink.String())
133137
})
134138

135139
// Update movie download link if a[id=download] on page
136140
downloadCollector.OnHTML("a[id=download]", func(e *colly.HTMLElement) {
137-
movie := getMovieFromMovies(e.Request, movies)
141+
movie := getMovieFromMovies(e.Request, scrapedMovies)
142+
scrapedMovies.Lock()
143+
defer scrapedMovies.Unlock()
138144
movie.Size = strings.TrimSpace(e.ChildText("span[id=download-size]"))
139145
downloadLink, err := url.Parse(e.Attr("href"))
140146
if err != nil {
@@ -151,15 +157,19 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
151157
if err != nil {
152158
log.Fatal(err)
153159
}
154-
movie := getMovieFromMovies(e.Request, movies)
160+
movie := getMovieFromMovies(e.Request, scrapedMovies)
155161
log.Infof("Parsing Downloads %s %s", movie.Title, downloadLink.String())
162+
scrapedMovies.Lock()
163+
defer scrapedMovies.Unlock()
156164
movie.DownloadLink = downloadLink
157165
}
158166
})
159167

160168
//for series or parts
161169
downloadCollector.OnHTML("div.video-series-latest-episodes", func(inn *colly.HTMLElement) {
162-
movie := getMovieFromMovies(inn.Request, movies)
170+
movie := getMovieFromMovies(inn.Request, scrapedMovies)
171+
scrapedMovies.Lock()
172+
defer scrapedMovies.Unlock()
163173
movie.IsSeries = true
164174
inn.ForEach("a", func(_ int, e *colly.HTMLElement) {
165175
downloadLink, err := url.Parse(e.Attr("href"))

0 commit comments

Comments
 (0)