-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsoup1.py
More file actions
27 lines (23 loc) · 813 Bytes
/
soup1.py
File metadata and controls
27 lines (23 loc) · 813 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
def main():
keyword = input("Please give the keyword:")
keyword = urllib.parse.urlencode({"word": keyword})
response = urllib.request.urlopen("http://baike.baidu.com/search/word?%s" % keyword)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
for each in soup.find_all(href=re.compile("view")):
content = ''.join([each.text])
url2 = ''.join(["http://baike.baidu.com", each["href"]])
response2 = urllib.request.urlopen(url2)
html2 = response2.read()
soup2 = BeautifulSoup(html2, "html.parser")
if soup2.h2:
content = "".join([content, soup2.h2.text])
content = "".join([content, "->", url2])
print(content)
#print(soup2.h2)
if __name__ == "__main__":
main()