Python + selenium 爬取选股宝首页新闻

利用Python + Selenium 爬取选股宝首页新闻及利好,利空消息.

直接附上源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from selenium import webdriver
import time


class XuanGuBao():
def __init__(self):
self.start_url = "https://xuangubao.cn/"
self.driver = webdriver.Chrome()

def get_content_lsit(self):
"""提取页面内容"""
next_page = self.driver.find_elements_by_xpath("//span[@class='home-news-footer-loadmore']")
next_page = next_page[0] if next_page else None

while next_page:
next_page.click()
time.sleep(1)

# for test use
# i = 0
# while i < 10 and next_page:
# next_page.click()
# time.sleep(1)
# i += 1

li_list = self.driver.find_elements_by_xpath("//ul[@class='home-news-container']/li")
content_list = []
bear_list = []
for li in li_list:
item = {}
item['id'] = li.get_attribute('id')
title = li.find_elements_by_xpath(".//div[@class='news-item-title']/a/span")
title = title[0].text if title else None
item['title'] = title
detail = li.find_elements_by_xpath(".//div[@class='news-item-detail']/pre")
detail = detail[0].text if detail else None
item['detail'] = detail
# 利空消息
bear = li.find_elements_by_xpath(".//span[@class='bullish-and-bear bear']")
if bear:
print('found news of bear.')
tag_list = []
tags = li.find_elements_by_xpath(".//a[@class='news-item-intro-topic']")
for tag in tags:
tag_list.append(tag.text)
item['isBear'] = True
item['tags'] = tag_list
# 利好消息
bullish = li.find_elements_by_xpath(".//span[@class='bullish-and-bear bullish']")
if bear:
print('found news of bullish.')
tag_list = []
tags = li.find_elements_by_xpath(".//a[@class='news-item-intro-topic']")
for tag in tags:
tag_list.append(tag.text)
item['isBullish'] = True
item['tags'] = tag_list
print(item)

content_list.append(item)

return content_list

def run(self):
# 发送请求
self.driver.get(self.start_url)
content_list = self.get_content_lsit()
self.driver.quit()


if __name__ == '__main__':
xuangubao = XuanGuBao()
xuangubao.run()

本文标题:Python + selenium 爬取选股宝首页新闻

文章作者:Vincent Zheng

发布时间:2019年01月12日 - 12:01

最后更新:2019年01月12日 - 12:01

原始链接:https://zws910.github.io/2019/01/12/xuangubao/

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。

0%