Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 33 additions & 18 deletions engine/besthdmovies.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ func (engine *BestHDEngine) getParseAttrs() (string, string, error) {
return "body", "article.latestPost", nil
}

func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error) {
func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error) {
movie := Movie{
Index: index,
Index: movieIndex,
IsSeries: false,
Source: engine.Name,
}
Expand Down Expand Up @@ -89,11 +89,13 @@ func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement, index int) (
return movie, nil
}

func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie) {
submissionDetails := make(map[string]string)
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
// Update movie download link if div.post-single-content on page
downloadCollector.OnHTML("div.post-single-content", func(e *colly.HTMLElement) {
movie := &(*movies)[getMovieIndexFromCtx(e.Request)]
movie := getMovieFromMovies(e.Request, scrapedMovies)
log.Debug(movie.Index)
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
ptags := e.ChildTexts("p")
if ptags[len(ptags)-3] >= ptags[len(ptags)-2] {
movie.Description = strings.TrimSpace(ptags[len(ptags)-3])
Expand All @@ -111,7 +113,10 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
downloadlink, err := url.Parse(link)
if err == nil {
movie.DownloadLink = downloadlink
downloadCollector.Visit(downloadlink.String())
ctx := colly.NewContext()
ctx.Put("movieIndex", strconv.Itoa(movie.Index))
downloadCollector.Request("GET", movie.DownloadLink.String(), nil, ctx, nil)
// downloadCollector.Visit(downloadlink.String())
} else {
log.Fatal(err)
}
Expand All @@ -120,7 +125,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
})

downloadCollector.OnHTML("div.content-area", func(e *colly.HTMLElement) {
movie := &(*movies)[getMovieIndexFromCtx(e.Request)]
movie := getMovieFromMovies(e.Request, scrapedMovies)
links := e.ChildAttrs("a", "href")
for _, link := range links {
if strings.HasPrefix(link, "https://zeefiles") || strings.HasPrefix(link, "http://zeefiles") {
Expand All @@ -130,8 +135,12 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
}
downloadlink, err := url.Parse(link)
if err == nil {
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
movie.DownloadLink = downloadlink
downloadCollector.Visit(downloadlink.String())
ctx := colly.NewContext()
ctx.Put("movieIndex", strconv.Itoa(movie.Index))
downloadCollector.Request("GET", downloadlink.String(), nil, ctx, nil)
} else {
log.Fatal(err)
}
Expand All @@ -140,12 +149,13 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
})

downloadCollector.OnHTML("div.freeDownload", func(e *colly.HTMLElement) {
movieIndex := getMovieIndexFromCtx(e.Request)
movie := &(*movies)[movieIndex]
movie := getMovieFromMovies(e.Request, scrapedMovies)
zeesubmission := make(map[string]string)
if e.ChildAttr("a.link_button", "href") != "" {
downloadlink, err := url.Parse(e.ChildAttr("a.link_button", "href"))
if err == nil {
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
movie.DownloadLink = downloadlink
}
} else {
Expand All @@ -156,18 +166,19 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
for index := range inputNames {
zeesubmission[inputNames[index]] = inputValues[index]
}

err := downloadCollector.Post(movie.DownloadLink.String(), zeesubmission)
ctx := colly.NewContext()
ctx.Put("movieIndex", strconv.Itoa(movie.Index))
err := downloadCollector.Request("POST", movie.DownloadLink.String(), createFormReader(zeesubmission), ctx, nil)
if err != nil {
log.Fatal(err)
}
}
})

downloadCollector.OnHTML("form[method=post]", func(e *colly.HTMLElement) {
movieIndex := getMovieIndexFromCtx(e.Request)
var err error
movie := &(*movies)[movieIndex]
submissionDetails := make(map[string]string)
movie := getMovieFromMovies(e.Request, scrapedMovies)
downloadlink := movie.DownloadLink
inputNames := e.ChildAttrs("input", "name")
inputValues := e.ChildAttrs("input", "value")
Expand All @@ -177,11 +188,14 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
}
requestlink := e.Request.URL.String()
if !(strings.HasPrefix(requestlink, "https://zeefiles") || strings.HasPrefix(requestlink, "http://zeefiles")) {
downloadlink, err = url.Parse("https://udown.me/watchonline/?movieIndex=" + strconv.Itoa(movieIndex))
downloadlink, err = url.Parse("https://udown.me/watchonline/?movieIndex=" + strconv.Itoa(movie.Index))
// log.Debug("inside scraper ====>", downloadlink.String())
if err == nil {
movie.DownloadLink = downloadlink
}
err = downloadCollector.Post(downloadlink.String(), submissionDetails)
ctx := colly.NewContext()
ctx.Put("movieIndex", strconv.Itoa(movie.Index))
err = downloadCollector.Request("POST", downloadlink.String(), createFormReader(submissionDetails), ctx, nil)
if err != nil {
log.Fatal(err)
}
Expand All @@ -190,8 +204,9 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect

downloadCollector.OnHTML("video", func(e *colly.HTMLElement) {
downloadlink := e.ChildAttr("source", "src")
movieIndex := getMovieIndexFromCtx(e.Request)
movie := &(*movies)[movieIndex]
movie := getMovieFromMovies(e.Request, scrapedMovies)
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
movie.DownloadLink, _ = url.Parse(downloadlink)
})
}
Expand Down
87 changes: 70 additions & 17 deletions engine/engines.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"net/url"
"strconv"
"strings"
"sync"

"github.com/gocolly/colly/v2"
// "github.com/gocolly/colly/v2/debug"
log "github.com/sirupsen/logrus"
)

Expand All @@ -33,7 +36,7 @@ type Engine interface {
List(page int) SearchResult
String() string
// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error)
parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error)

// getParseAttrs : get the attributes to use to parse a returned soup
// the first return string is the part of the html to be parsed e.g `body`, `main`
Expand All @@ -42,7 +45,14 @@ type Engine interface {
getParseAttrs() (string, string, error)

// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie)
updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped)
}

// All scraped movies are stored here. Since accessed on different goroutine
// Mutex to prevent Data Race
type scraped struct {
movies map[string]*Movie
sync.Mutex
}

// Scrape : Parse queries a url and return results
Expand All @@ -51,28 +61,36 @@ func Scrape(engine Engine) ([]Movie, error) {
// Cache responses to prevent multiple download of pages
// even if the collector is restarted
colly.CacheDir("./gophie_cache"),
colly.Async(true),
// colly.Debugger(&debug.LogDebugger{}),
)
// Another collector for download Links
downloadLinkCollector := c.Clone()

movieIndex := 0
var movies []Movie
scrapedMovies := scraped{movies: make(map[string]*Movie)}

// Any Extras setup for downloads using can be specified in the function
engine.updateDownloadProps(downloadLinkCollector, &movies)
engine.updateDownloadProps(downloadLinkCollector, &scrapedMovies)

main, article, err := engine.getParseAttrs()
if err != nil {
log.Fatal(err)
}
movieIndex := 0
c.OnHTML(main, func(e *colly.HTMLElement) {
e.ForEach(article, func(_ int, el *colly.HTMLElement) {
movie, err := engine.parseSingleMovie(el, movieIndex)
if err != nil {
log.Errorf("%v could not be parsed", movie)
} else {
movies = append(movies, movie)
downloadLinkCollector.Visit(movie.DownloadLink.String())
// Using DownloadLink as key to movie makes it unique
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
m := strconv.Itoa(movieIndex)
scrapedMovies.movies[m] = &movie
ctx := colly.NewContext()
ctx.Put("movieIndex", m)
downloadLinkCollector.Request("GET", movie.DownloadLink.String(), nil, ctx, nil)
movieIndex++
}
})
Expand All @@ -92,29 +110,38 @@ func Scrape(engine Engine) ([]Movie, error) {
// movie details when we need it
downloadLinkCollector.OnRequest(func(r *colly.Request) {
r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml")
for i, movie := range movies {
if movie.DownloadLink.String() == r.URL.String() {
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
r.Ctx.Put("movieIndex", strconv.Itoa(i))
}
}
movie := getMovieFromMovies(r, &scrapedMovies)
// r.Ctx.Put("movieIndex", strconv.Itoa(movie.Index))
log.Debugf("Retrieving Download Link %s\n", movie.Title)
})

// If Response Content Type is not Text, Abort the Request to prevent fully downloading the
// body in case of other types like mp4
downloadLinkCollector.OnResponseHeaders(func(r *colly.Response) {
if !strings.Contains(r.Headers.Get("Content-Type"), "text") {
r.Request.Abort()
log.Debugf("Response %s is not text/html. Aborting request", r.Request.URL)
r.Request.Abort()
}
})

downloadLinkCollector.OnResponse(func(r *colly.Response) {
movie := &movies[getMovieIndexFromCtx(r.Request)]
log.Debugf("Retrieved Download Link %v\n", movie.DownloadLink)
movie := getMovieFromMovies(r.Request, &scrapedMovies)
log.Debugf("Retrieved Download Page %s\n", movie.Title)
})

c.Visit(engine.getParseURL().String())
return movies, nil
c.Wait()
downloadLinkCollector.Wait()

// Create a List of Movies
v := make([]Movie, 0, len(scrapedMovies.movies))

for _, value := range scrapedMovies.movies {
v = append(v, *value)
}
prettyPrint(v)

return v, nil
}

// Movie : the structure of all downloadable movies
Expand Down Expand Up @@ -221,3 +248,29 @@ func getMovieIndexFromCtx(r *colly.Request) int {
}
return movieIndex
}

// Get Movie from a Context
func getMovieFromMovies(r *colly.Request, scrapedMovies *scraped) *Movie {
movieIndex := r.Ctx.Get("movieIndex")
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
if _, ok := scrapedMovies.movies[movieIndex]; ok {
return scrapedMovies.movies[movieIndex]
}
return &Movie{}
}

func prettyPrint(s []Movie) {
b, err := json.MarshalIndent(s, "", " ")
if err == nil {
fmt.Println(string(b))
}
}

func createFormReader(data map[string]string) io.Reader {
form := url.Values{}
for k, v := range data {
form.Add(k, v)
}
return strings.NewReader(form.Encode())
}
21 changes: 16 additions & 5 deletions engine/fzmovies.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ func (engine *FzEngine) getParseAttrs() (string, string, error) {
return "body", "div.mainbox", nil
}

func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movie, error) {
func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, movieIndex int) (Movie, error) {
movie := Movie{
Index: index,
Index: movieIndex,
IsSeries: false,
Source: engine.Name,
}
Expand All @@ -71,6 +71,11 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movi
movie.CoverPhotoLink = cover.String()
// Remove all Video: or Movie: Prefixes
movie.UploadDate = strings.TrimSpace(el.ChildTexts("small")[1])
// Update Year
year, err := strconv.Atoi(strings.TrimSpace(el.ChildTexts("small")[1]))
if err == nil {
movie.Year = year
}
movie.Title = strings.TrimSuffix(strings.TrimSpace(el.ChildText("b")), "<more>")
movie.Description = strings.TrimSpace(el.ChildTexts("small")[3])
downloadLink, err := url.Parse(el.Request.AbsoluteURL(el.ChildAttr("a", "href")))
Expand All @@ -85,16 +90,19 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement, index int) (Movi
return movie, nil
}

func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies *[]Movie) {
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
// Update movie download link if ul.downloadlinks on page
downloadCollector.OnHTML("ul.moviesfiles", func(e *colly.HTMLElement) {
movie := &(*movies)[getMovieIndexFromCtx(e.Request)]
movie := getMovieFromMovies(e.Request, scrapedMovies)
link := strings.Replace(e.ChildAttr("a", "href"), "download1.php", "download.php", 1)
downloadLink, err := url.Parse(e.Request.AbsoluteURL(link + "&pt=jRGarGzOo2"))
// downloadLink, err := url.Parse(e.ChildAttr("a", "href") + "&pt=jRGarGzOo2")
if err != nil {
log.Fatal(err)
}

scrapedMovies.Lock()
defer scrapedMovies.Unlock()
movie.DownloadLink = downloadLink
re := regexp.MustCompile(`(.* MB)`)
dl := strings.TrimPrefix(re.FindStringSubmatch(e.ChildText("dcounter"))[0], "(")
Expand All @@ -109,7 +117,10 @@ func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector,
if err != nil {
log.Fatal(err)
}
(*movies)[getMovieIndexFromCtx(e.Request)].DownloadLink = downloadLink
movie := getMovieFromMovies(e.Request, scrapedMovies)
scrapedMovies.Lock()
defer scrapedMovies.Unlock()
movie.DownloadLink = downloadLink
}
})
}
Expand Down
Loading