Skip to content

Commit c952a7b

Browse files
authored
Update twayback.py
1 parent 6ef2baf commit c952a7b

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

twayback.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# Active, suspended, or doesn't exist?
2121
data1 =f"https://twitter.com/{username}"
2222
results = []
23-
headers = {'user-agent':'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)'}
23+
headers = {'user-agent':'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'}
2424

2525
response = session.get(data1, headers=headers, allow_redirects=False)
2626
status_code = response.status_code
@@ -41,9 +41,18 @@
4141

4242
link = f"https://web.archive.org/cdx/search/cdx?url=twitter.com/{username}/status&matchType=prefix&filter=statuscode:200&from={fromdate}&to={todate}"
4343
data2 = []
44-
44+
blocklist = []
4545
c = session.get(link).text
4646
urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', c)
47+
blocks = re.findall(r'Blocked', c)
48+
49+
for block in blocks:
50+
blocklist.append(f"{block}")
51+
if any("Blocked" in s for s in blocklist):
52+
print(f"Sorry, no deleted Tweets can be retrieved for {username}.\nThis is because the Wayback Machine excludes Tweets for this handle.")
53+
exit()
54+
else:
55+
pass
4756

4857
for url in urls:
4958
data2.append(f"{url}")
@@ -61,7 +70,7 @@
6170

6271
# Obtain status codes
6372
results = []
64-
headers = {'user-agent':'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'}
73+
headers = {'user-agent':'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)'}
6574

6675
for url in track(data4):
6776
response = session.get(url, headers=headers)

0 commit comments

Comments
 (0)