Skip to content

Commit 7346c7d

Browse files
authored
Merge pull request #53 from liangzy-gh/master
整理代码逻辑
2 parents 86e699d + cdeced6 commit 7346c7d

2 files changed

Lines changed: 24 additions & 49 deletions

File tree

spider/HtmlDownloader.py

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,61 +12,36 @@
1212

1313

1414
class Html_Downloader(object):
15-
@classmethod
16-
def download(self, url):
17-
count = 0 # 重试次数
18-
r = ''
15+
@staticmethod
16+
def download(url):
1917
try:
2018
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT)
2119
r.encoding = chardet.detect(r.content)['encoding']
20+
if (not r.ok) or len(r.content) < 500:
21+
raise ConnectionError
22+
else:
23+
return r.text
24+
25+
except Exception:
26+
count = 0 # 重试次数
27+
proxylist = sqlhelper.select(10)
28+
if not proxylist:
29+
return None
30+
2231
while count < config.RETRY_TIME:
23-
if (not r.ok) or len(r.content) < 500:
24-
proxylist = sqlhelper.select(10)
32+
try:
2533
proxy = random.choice(proxylist)
2634
ip = proxy[0]
2735
port = proxy[1]
2836
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
29-
try:
30-
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
31-
r.encoding = chardet.detect(r.content)['encoding']
32-
count += 1
33-
except Exception as e:
34-
count += 1
35-
36-
else:
37-
return r.text
38-
39-
return None
40-
41-
except Exception as e:
42-
while count < config.RETRY_TIME:
43-
if r == '' or (not r.ok) or len(r.content) < 500:
44-
try:
45-
proxylist = sqlhelper.select(10)
46-
proxy = random.choice(proxylist)
47-
ip = proxy[0]
48-
port = proxy[1]
49-
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
50-
try:
51-
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
52-
r.encoding = chardet.detect(r.content)['encoding']
53-
count += 1
54-
except Exception as e:
55-
count += 1
56-
57-
except Exception as e:
58-
return None
59-
60-
else:
61-
return r.text
62-
63-
return None
64-
65-
66-
67-
68-
69-
70-
7137

38+
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
39+
r.encoding = chardet.detect(r.content)['encoding']
40+
if (not r.ok) or len(r.content) < 500:
41+
raise ConnectionError
42+
else:
43+
return r.text
44+
except Exception:
45+
count += 1
7246

47+
return None

util/exception.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
class Test_URL_Fail(Exception):
66
def __str__(self):
7-
str = "访问%s失败,请检查网络连接" % config.TEST_URL
7+
str = "访问%s失败,请检查网络连接" % config.TEST_IP
88
return str
99

1010

0 commit comments

Comments
 (0)