mport requests
import urllib.parse
import re
from lxml import etree
import threading
unity_url = r'(info_html)
try:
job = e_html.xpath('/html/body/p[5]/p[1]/p[1]/h1')[0].text
jobs.append(job)
company = e_html.xpath('/html/body/p[5]/p[1]/p[1]/h2/a')[0].text
companies.append(company)
work_year = e_html.xpath('/html/body/p[6]/p[1]/ul/li[5]/strong')[0].text
work_years.append(work_year)
degree = e_html.xpath('/html/body/p[6]/p[1]/ul/li[6]/strong')[0].text
degrees.append(degree)
salary = e_html.xpath('/html/body/p[6]/p[1]/ul/li[1]/strong')[0].text
salarys.append(salary.split('元')[0])
place = e_html.xpath('/html/body/p[6]/p[1]/ul/li[2]/strong/a')[0].text
places.append(place)
except:
pass
return jobs, companies, work_years, degrees, salarys, places
if __name__ == '__main__':
t = threading.thread(target=get_infos)
t.start()
此代码只是获取了一些职位信息的url后写入列表中,后续还有写入数据库,提取数据做数据可视化的例子
后面数据可视化的例子
