Skip to content

Commit 02e457d

Browse files
committed
Async Feature: Used context to synchronize movie updates
1 parent 41a8ff9 commit 02e457d

File tree

4 files changed

+44
-45
lines changed

4 files changed

+44
-45
lines changed

engine/besthdmovies.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ func (engine *BestHDEngine) getParseAttrs() (string, string, error) {
5757
return "body", "article.latestPost", nil
5858
}
5959

60-
func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
60+
func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error) {
6161
movie := Movie{
62+
Index: movieIndex,
6263
IsSeries: false,
6364
Source: engine.Name,
6465
}
@@ -92,7 +93,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
9293
submissionDetails := make(map[string]string)
9394
// Update movie download link if div.post-single-content on page
9495
downloadCollector.OnHTML("div.post-single-content", func(e *colly.HTMLElement) {
95-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
96+
movie := getMovieFromMovies(e.Request, movies)
9697
ptags := e.ChildTexts("p")
9798
if ptags[len(ptags)-3] >= ptags[len(ptags)-2] {
9899
movie.Description = strings.TrimSpace(ptags[len(ptags)-3])
@@ -119,7 +120,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
119120
})
120121

121122
downloadCollector.OnHTML("div.content-area", func(e *colly.HTMLElement) {
122-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
123+
movie := getMovieFromMovies(e.Request, movies)
123124
links := e.ChildAttrs("a", "href")
124125
for _, link := range links {
125126
if strings.HasPrefix(link, "https://zeefiles") || strings.HasPrefix(link, "http://zeefiles") {
@@ -139,7 +140,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
139140
})
140141

141142
downloadCollector.OnHTML("div.freeDownload", func(e *colly.HTMLElement) {
142-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
143+
movie := getMovieFromMovies(e.Request, movies)
143144
zeesubmission := make(map[string]string)
144145
if e.ChildAttr("a.link_button", "href") != "" {
145146
downloadlink, err := url.Parse(e.ChildAttr("a.link_button", "href"))
@@ -164,7 +165,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
164165

165166
downloadCollector.OnHTML("form[method=post]", func(e *colly.HTMLElement) {
166167
var err error
167-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
168+
movie := getMovieFromMovies(e.Request, movies)
168169
downloadlink := movie.DownloadLink
169170
inputNames := e.ChildAttrs("input", "name")
170171
inputValues := e.ChildAttrs("input", "value")
@@ -188,7 +189,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
188189

189190
downloadCollector.OnHTML("video", func(e *colly.HTMLElement) {
190191
downloadlink := e.ChildAttr("source", "src")
191-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
192+
movie := getMovieFromMovies(e.Request, movies)
192193
movie.DownloadLink, _ = url.Parse(downloadlink)
193194
})
194195
}

engine/engines.go

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ type Engine interface {
3434
List(page int) SearchResult
3535
String() string
3636
// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
37-
parseSingleMovie(el *colly.HTMLElement) (Movie, error)
37+
parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error)
3838

3939
// getParseAttrs : get the attributes to use to parse a returned soup
4040
// the first return string is the part of the html to be parsed e.g `body`, `main`
@@ -52,14 +52,11 @@ func Scrape(engine Engine) ([]Movie, error) {
5252
// Cache responses to prevent multiple download of pages
5353
// even if the collector is restarted
5454
colly.CacheDir("./gophie_cache"),
55-
// colly.Async(true),
55+
colly.Async(true),
5656
// colly.Debugger(&debug.LogDebugger{}),
5757
)
5858
// Another collector for download Links
59-
downloadLinkCollector := colly.NewCollector(
60-
colly.CacheDir("./gophie-cache"),
61-
colly.Async(true)
62-
)
59+
downloadLinkCollector := c.Clone()
6360

6461
var movies = make(map[string]*Movie)
6562

@@ -70,21 +67,22 @@ func Scrape(engine Engine) ([]Movie, error) {
7067
if err != nil {
7168
log.Fatal(err)
7269
}
70+
movieIndex := 0
7371
c.OnHTML(main, func(e *colly.HTMLElement) {
7472
e.ForEach(article, func(_ int, el *colly.HTMLElement) {
75-
movie, err := engine.parseSingleMovie(el)
73+
movie, err := engine.parseSingleMovie(el, movieIndex)
7674
if err != nil {
7775
log.Errorf("%v could not be parsed", movie)
7876
} else {
7977
// Using DownloadLink as key to movie makes it unique
80-
movies[movie.DownloadLink.String()] = &movie
81-
// downloadLinkCollector.Visit(movie.DownloadLink.String())
78+
m := strconv.Itoa(movieIndex)
79+
movies[m] = &movie
80+
ctx := colly.NewContext()
81+
ctx.Put("movieIndex", m)
82+
downloadLinkCollector.Request("GET", movie.DownloadLink.String(), nil, ctx, nil)
83+
movieIndex++
8284
}
8385
})
84-
85-
for _, movie := range movies{
86-
downloadLinkCollector.Visit(movie.DownloadLink.String())
87-
}
8886
})
8987

9088
c.OnRequest(func(r *colly.Request) {
@@ -101,25 +99,25 @@ func Scrape(engine Engine) ([]Movie, error) {
10199
// movie details when we need it
102100
downloadLinkCollector.OnRequest(func(r *colly.Request) {
103101
r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml")
104-
if movie, ok := movies[r.URL.String()]; ok {
105-
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
106-
}
102+
movie := getMovieFromMovies(r, movies)
103+
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
107104
})
108105

109106
// If Response Content Type is not Text, Abort the Request to prevent fully downloading the
110107
// body in case of other types like mp4
111108
downloadLinkCollector.OnResponseHeaders(func(r *colly.Response) {
109+
log.Infof("%s", r.Headers)
112110
if !strings.Contains(r.Headers.Get("Content-Type"), "text") {
111+
log.Errorf("Response %s is not text/html. Aborting request", r.Request.URL)
113112
r.Request.Abort()
114-
log.Debugf("Response %s is not text/html. Aborting request", r.Request.URL)
115113
}
116114
})
117115

118116
downloadLinkCollector.OnResponse(func(r *colly.Response) {
119-
log.Debug(r.Request.URL.String)
120-
// movie := movies[r.Request.URL.String()]
121-
// log.Infof("%s %v %s", r.Request.URL.String(), movie.DownloadLink, movie.Title)
122-
// log.Debugf("Retrieved Download Link %v\n", movie.DownloadLink)
117+
movie := getMovieFromMovies(r.Request, movies)
118+
log.Infof("Movie on Response %v", movie)
119+
// prettyPrint([]Movie{*movie})
120+
// log.Debugf("Retrieved Download Page %s\n", movie.DownloadLink.String())
123121
})
124122

125123
c.Visit(engine.getParseURL().String())
@@ -242,15 +240,11 @@ func getMovieIndexFromCtx(r *colly.Request) int {
242240
return movieIndex
243241
}
244242

245-
// Get Movie from a URL
246-
func getMovieFromMovies(url string, movies map[string]*Movie) *Movie {
247-
if _, ok := movies[url]; ok {
248-
return movies[url]
249-
}
250-
for _, movie := range movies {
251-
if (*movie).DownloadLink.String() == url {
252-
return movie
253-
}
243+
// Get Movie from a Context
244+
func getMovieFromMovies(r *colly.Request, movies map[string]*Movie) *Movie {
245+
movieIndex := r.Ctx.Get("movieIndex")
246+
if _, ok := movies[movieIndex]; ok {
247+
return movies[movieIndex]
254248
}
255249
return &Movie{}
256250
}

engine/fzmovies.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,9 @@ func (engine *FzEngine) getParseAttrs() (string, string, error) {
5858
return "body", "div.mainbox", nil
5959
}
6060

61-
func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
61+
func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error) {
6262
movie := Movie{
63+
Index: movieIndex,
6364
IsSeries: false,
6465
Source: engine.Name,
6566
}
@@ -92,7 +93,7 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
9293
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
9394
// Update movie download link if ul.downloadlinks on page
9495
downloadCollector.OnHTML("ul.moviesfiles", func(e *colly.HTMLElement) {
95-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
96+
movie := getMovieFromMovies(e.Request, movies)
9697
link := strings.Replace(e.ChildAttr("a", "href"), "download1.php", "download.php", 1)
9798
downloadLink, err := url.Parse(e.Request.AbsoluteURL(link + "&pt=jRGarGzOo2"))
9899
// downloadLink, err := url.Parse(e.ChildAttr("a", "href") + "&pt=jRGarGzOo2")
@@ -113,7 +114,7 @@ func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector,
113114
if err != nil {
114115
log.Fatal(err)
115116
}
116-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
117+
movie := getMovieFromMovies(e.Request, movies)
117118
movie.DownloadLink = downloadLink
118119
}
119120
})

engine/netnaija.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ func (engine *NetNaijaEngine) getParseAttrs() (string, string, error) {
6969
return "main", article, nil
7070
}
7171

72-
func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
72+
func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error) {
7373
// movie title identifier
7474
var title string
7575
if title = "h3.file-name"; engine.mode == SearchMode {
@@ -78,6 +78,7 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, er
7878

7979
re := regexp.MustCompile(`\((.*)\)`)
8080
movie := Movie{
81+
Index: movieIndex,
8182
IsSeries: false,
8283
Source: engine.Name,
8384
}
@@ -117,7 +118,7 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, er
117118
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
118119
// Update movie size
119120
downloadCollector.OnHTML("button[id=download-button]", func(e *colly.HTMLElement) {
120-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
121+
movie := getMovieFromMovies(e.Request, movies)
121122
movie.Size = strings.TrimSpace(e.ChildText("span.size"))
122123
})
123124

@@ -126,20 +127,21 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
126127
if err != nil {
127128
log.Fatal(err)
128129
}
129-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
130+
movie := getMovieFromMovies(e.Request, movies)
130131
movie.DownloadLink = downloadLink
131132
downloadCollector.Visit(downloadLink.String())
132133
})
133134

134135
// Update movie download link if a[id=download] on page
135136
downloadCollector.OnHTML("a[id=download]", func(e *colly.HTMLElement) {
136-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
137+
movie := getMovieFromMovies(e.Request, movies)
137138
movie.Size = strings.TrimSpace(e.ChildText("span[id=download-size]"))
138139
downloadLink, err := url.Parse(e.Attr("href"))
139140
if err != nil {
140141
log.Fatal(err)
141142
}
142143
movie.DownloadLink = downloadLink
144+
log.Infof("Parsing Downloads from %s %s %s", e.Request.URL.String(), movie.Title, downloadLink.String())
143145
})
144146

145147
// Update Download Link if "Direct Download" HTML on page
@@ -149,14 +151,15 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
149151
if err != nil {
150152
log.Fatal(err)
151153
}
152-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
154+
movie := getMovieFromMovies(e.Request, movies)
155+
log.Infof("Parsing Downloads %s %s", movie.Title, downloadLink.String())
153156
movie.DownloadLink = downloadLink
154157
}
155158
})
156159

157160
//for series or parts
158161
downloadCollector.OnHTML("div.video-series-latest-episodes", func(inn *colly.HTMLElement) {
159-
movie := getMovieFromMovies(inn.Request.URL.String(), movies)
162+
movie := getMovieFromMovies(inn.Request, movies)
160163
movie.IsSeries = true
161164
inn.ForEach("a", func(_ int, e *colly.HTMLElement) {
162165
downloadLink, err := url.Parse(e.Attr("href"))

0 commit comments

Comments
 (0)