应用介绍
import requests
import pandas as pd
from lxml import etree
class TeachInChina(object):
def __init__(self, max_page):
self.start_urls = ['http://www.jobleadchina.com/job?job_industry=Teaching' \
'&company_name=&page={}'.format(page) for page in range(1, max_page+1)]
def get_data(self):
for url in self.start_urls:
res = requests.get(url)
page = url.split('=')[-1]
self.parse_data(res, page)
print('成功爬取并保存第{}页数据!'.format(page))
@staticmethod
def parse_data(res, page):
if res.status_code == 200:
parsed = etree.HTML(res.text)
title = parsed.xpath('//*[@class="positionTitle"]/a/text()')
link = parsed.xpath('//*[@class="positionTitle"]/a/@href')
salary = [slr.strip() for slr in parsed.xpath('//*[@class="salaryRange"]/text()')]
company = parsed.xpath('//*[@class="companyName"]/a/text()')
area = parsed.xpath('//*[@class="jobThumbnailCompanyIndustry"]/span[3]/text()')
update_time = parsed.xpath('//*[@class="post-time"]/text()')
exp_title = parsed.xpath('//*[@class="jobThumbnailPositionRequire"]/span[3]/text()')
education = parsed.xpath('//*[@class="jobThumbnailPositionRequire"]/span[1]/text()')
com_type = parsed.xpath('//*[@class="jobThumbnailCompanyIndustry"]/span[1]/text()')
data = pd.DataFrame({'title': title, 'link': link, 'salary': salary,
'company': company, 'area': area, 'update_time': update_time,
'exp_title': exp_title, 'education': education,
'com_type': com_type})
if page == '1':
data.to_csv('jobleadchina.csv', index=False, mode='a', header=True)
else:
data.to_csv('jobleadchina.csv', index=False, mode='a', header=False)
else:
print('链接{}请求不成功!'.format(res.url))
if __name__ == '__main__':
job = TeachInChina(96)
job.get_data()
此项目主要爬取外籍英语老师招聘数据822条,英语老师招聘数据6242条,并分析外教教师的招聘状况.
适应于Python数据分析学习者、Python爬虫学习者、Pandas使用者、数据可视化学习者
©版权声明:本文内容由互联网用户自发贡献,版权归原创作者所有,本站不拥有所有权,也不承担相关法律责任。如果您发现本站中有涉嫌抄袭的内容,欢迎发送邮件至: [email protected] 进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。
转载请注明出处: apollocode » Python爬虫英语老师招聘数据
文件列表(部分)
名称 | 大小 | 修改日期 |
---|---|---|
data_gm.csv | 11.95 KB | 2019-12-30 |
jobleadchina.csv | 18.06 KB | 2019-12-30 |
jobleadchina.py | 0.83 KB | 2019-12-30 |
local_english_teacher.py | 0.81 KB | 2019-12-30 |
wechat_group_member.py | 0.34 KB | 2019-12-30 |
数据分析.ipynb | 58.49 KB | 2019-12-30 |
外语培训.csv | 8.97 KB | 2019-12-30 |
幼儿园.csv | 5.81 KB | 2019-12-30 |
职业院校.csv | 1.27 KB | 2019-12-30 |
中小学.csv | 91.32 KB | 2019-12-30 |
发表评论 取消回复