-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathjob-search-web-scraping.py
118 lines (96 loc) · 3.52 KB
/
job-search-web-scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import smtplib
import sys
import os
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from os.path import basename
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from config import cfg
def send_email(sender_email_address, email_password,
receiver_email_address, email_subject, email_smtp,
email_body, file_path):
"""
sender_email_address: sender email address
email_password: sender email password
receiver_email_address: receiver email password
email_subject: email subject
email_smtp: email smtp server
email_body: email body
file_path: file to attach in the email
"""
# create an email message object
message = MIMEMultipart()
# configure email headers
message['subject'] = email_subject
message['From'] = sender_email_address
message['To'] = receiver_email_address
# attach text in email body
text = MIMEText(open(file_path).read(), "plain")
message.attach(text)
message.attach(MIMEText(email_body))
# attach the text file
part = MIMEApplication(open(file_path).read())
part.add_header('Content-Disposition', 'attachment; filename="%s"' % basename(file_path))
message.attach(part)
# set smtp server and port
server = smtplib.SMTP(email_smtp, '587')
# identify this client to the SMTP server
server.ehlo()
# secure the SMTP connection
server.starttls()
server.ehlo()
# login to email account
server.login(sender_email_address, email_password)
# send email
server.send_message(message)
# close connection to server
server.quit()
def indeed_job_search(*args):
browser = None
# checking if os is windows
if os.name == 'nt':
PATH_TO_GECKO_DRIVER = './geckodriver.exe'
PATH_TO_CHROME_DRIVER = './chromedriver.exe'
else:
PATH_TO_GECKO_DRIVER = './geckodriver'
PATH_TO_CHROME_DRIVER = './chromedriver'
if Path(PATH_TO_GECKO_DRIVER).is_file():
options = webdriver.FirefoxOptions()
if 'headless' in args:
options.headless = True
browser = webdriver.Firefox(executable_path=PATH_TO_GECKO_DRIVER, options=options)
elif Path(PATH_TO_CHROME_DRIVER).is_file():
options = webdriver.ChromeOptions()
if 'headless' in args:
options.headless = True
browser = webdriver.Chrome(executable_path=PATH_TO_CHROME_DRIVER, options=options)
else:
print("Unable to find a webdriver.")
return
browser.get('https://www.indeed.com')
browser.implicitly_wait(5)
search_bar = browser.find_element_by_name('q')
search_bar.send_keys(cfg['keyword'])
search_bar = browser.find_element_by_name('l')
search_bar.send_keys(cfg['city'])
search_bar.send_keys(Keys.ENTER)
browser.implicitly_wait(5)
search_results = browser.find_elements_by_xpath('//h2/a')
file = open("job_search.txt", 'a')
file.write("\n")
for job_element in search_results:
job_title = job_element.text
job_link = job_element.get_attribute('href')
file.write("%s | link: %s \n" % (job_title, job_link))
send_email(cfg["sender_email"], cfg["sender_password"],
cfg["receiver_email"],
cfg["subject_email"],
cfg["email_smtp"],
cfg["body_email"],
"job_search.txt")
browser.close()
if __name__ == "__main__":
indeed_job_search(*sys.argv)