|
12 | 12 |
|
13 | 13 |
|
14 | 14 | class Html_Downloader(object): |
15 | | - @classmethod |
16 | | - def download(self, url): |
17 | | - count = 0 # 重试次数 |
18 | | - r = '' |
| 15 | + @staticmethod |
| 16 | + def download(url): |
19 | 17 | try: |
20 | 18 | r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT) |
21 | 19 | r.encoding = chardet.detect(r.content)['encoding'] |
| 20 | + if (not r.ok) or len(r.content) < 500: |
| 21 | + raise ConnectionError |
| 22 | + else: |
| 23 | + return r.text |
| 24 | + |
| 25 | + except Exception: |
| 26 | + count = 0 # 重试次数 |
| 27 | + proxylist = sqlhelper.select(10) |
| 28 | + if not proxylist: |
| 29 | + return None |
| 30 | + |
22 | 31 | while count < config.RETRY_TIME: |
23 | | - if (not r.ok) or len(r.content) < 500: |
24 | | - proxylist = sqlhelper.select(10) |
| 32 | + try: |
25 | 33 | proxy = random.choice(proxylist) |
26 | 34 | ip = proxy[0] |
27 | 35 | port = proxy[1] |
28 | 36 | proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)} |
29 | | - try: |
30 | | - r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies) |
31 | | - r.encoding = chardet.detect(r.content)['encoding'] |
32 | | - count += 1 |
33 | | - except Exception as e: |
34 | | - count += 1 |
35 | | - |
36 | | - else: |
37 | | - return r.text |
38 | | - |
39 | | - return None |
40 | | - |
41 | | - except Exception as e: |
42 | | - while count < config.RETRY_TIME: |
43 | | - if r == '' or (not r.ok) or len(r.content) < 500: |
44 | | - try: |
45 | | - proxylist = sqlhelper.select(10) |
46 | | - proxy = random.choice(proxylist) |
47 | | - ip = proxy[0] |
48 | | - port = proxy[1] |
49 | | - proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)} |
50 | | - try: |
51 | | - r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies) |
52 | | - r.encoding = chardet.detect(r.content)['encoding'] |
53 | | - count += 1 |
54 | | - except Exception as e: |
55 | | - count += 1 |
56 | | - |
57 | | - except Exception as e: |
58 | | - return None |
59 | | - |
60 | | - else: |
61 | | - return r.text |
62 | | - |
63 | | - return None |
64 | | - |
65 | | - |
66 | | - |
67 | | - |
68 | | - |
69 | | - |
70 | | - |
71 | 37 |
|
| 38 | + r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies) |
| 39 | + r.encoding = chardet.detect(r.content)['encoding'] |
| 40 | + if (not r.ok) or len(r.content) < 500: |
| 41 | + raise ConnectionError |
| 42 | + else: |
| 43 | + return r.text |
| 44 | + except Exception: |
| 45 | + count += 1 |
72 | 46 |
|
| 47 | + return None |
0 commit comments