新学习了selenium,啪一下腾讯招聘
1 from lxml import etree
2 from selenium import WEBdriver
3 import pyMysql
4 def Geturl(fullurl):#获取每个招聘网页的链接
5 browser.get(fullurl)
6 shouye_html_text = browser.page_source
7 shouye_ele = etree.HTML(shouye_html_text)
8 zp_list = shouye_ele.xpath('//*[@id="position"]/div[1]/table/tbody/tr/td/a/@href')#链接url
9 zp_url_list = []
10 for zp_url_lost in zp_list:
11 zp_url = 'https://hr.tencent.com/'+zp_url_lost
12 zp_url_list.append(zp_url)
13 return zp_url_list
14 def Getinfo(zp_url_list):#获取每个招聘链接内部的内容
15 for zp_url in zp_url_list:
16 browser.get(zp_url)
17 zp_info_html = browser.page_source
18 zp_ele = etree.HTML(zp_info_html)
19 zp_info_title = str(zp_ele.xpath('//*[@id="sharetitle"]/text()')[0])
20 zp_info_location = str(zp_ele.xpath('//*[@id="position_detail"]/div/table/tbody/tr[2]/td[1]/text()')[0])
21 zp_info_type = str(zp_ele.xpath('//*[@id="position_detail"]/div/table/tbody/tr[2]/td[2]/text()')[0])
22 zp_info_num = str(zp_ele.xpath('//*[@id="position_detail"]/div/table/tbody/tr[2]/td[3]/text()')[0])
23 zp_info_need = str(zp_ele.xpath('//*[@id="position_detail"]/div/table/tbody/tr[3]/td/ul/li/text()'))
24 connection = pymysql.connect(host='localhost', user='root', passWord='1234', db='txzp', )
25 try:
26 with connection.cursor() as cursor:
27 sql = "INSERT INTO `txzp_info` (`title`, `location`,`type`,`num`,`need`) VALUES (%s,%s,%s,%s, %s)"
28 cursor.execute(sql, (zp_info_title,zp_info_location,zp_info_type,zp_info_num,zp_info_need))
29 connection.commit()
30 finally:
31 connection.close()
32 print(zp_info_title,zp_info_location,zp_info_type,zp_info_num,zp_info_need)
33 if __name__ == '__main__':
34 browser = webdriver.Chrome()
35 pags = int(input('需要几页?'))
36 for i in range(0,pags):
37 url = 'Https://hr.tencent.com/position.PHP?keywords=&tid=0&start={}'
38 fullurl = url.fORMat(str(i*10))
39 zp_url_list = Geturl(fullurl)
40 Getinfo(zp_url_list)
41 browser.close()
0