File tree Expand file tree Collapse file tree 1 file changed +12
-3
lines changed
Expand file tree Collapse file tree 1 file changed +12
-3
lines changed Original file line number Diff line number Diff line change 2020# Active, suspended, or doesn't exist?
2121data1 = f"https://twitter.com/{ username } "
2222results = []
23- headers = {'user-agent' :'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https ://duckduckgo. com/duckduckbot )' }
23+ headers = {'user-agent' :'Mozilla/5.0 (compatible; bingbot/2.0; +http ://www.bing. com/bingbot.htm )' }
2424
2525response = session .get (data1 , headers = headers , allow_redirects = False )
2626status_code = response .status_code
4141
4242link = f"https://web.archive.org/cdx/search/cdx?url=twitter.com/{ username } /status&matchType=prefix&filter=statuscode:200&from={ fromdate } &to={ todate } "
4343data2 = []
44-
44+ blocklist = []
4545c = session .get (link ).text
4646urls = re .findall (r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' , c )
47+ blocks = re .findall (r'Blocked' , c )
48+
49+ for block in blocks :
50+ blocklist .append (f"{ block } " )
51+ if any ("Blocked" in s for s in blocklist ):
52+ print (f"Sorry, no deleted Tweets can be retrieved for { username } .\n This is because the Wayback Machine excludes Tweets for this handle." )
53+ exit ()
54+ else :
55+ pass
4756
4857for url in urls :
4958 data2 .append (f"{ url } " )
6170
6271# Obtain status codes
6372results = []
64- headers = {'user-agent' :'Mozilla/5.0 (compatible; bingbot/2.0; +http ://www.bing. com/bingbot.htm )' }
73+ headers = {'user-agent' :'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https ://duckduckgo. com/duckduckbot )' }
6574
6675for url in track (data4 ):
6776 response = session .get (url , headers = headers )
You can’t perform that action at this time.
0 commit comments