crawler-MiniProgram/dagongPro/pipelines.py

76 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
# 本地存储管道
class DagongwangproPipeline:
def process_item(self, item, spider):
# print(item)
title = item['title']
content = item['content']
date = item['date']
news_path = './新闻/内地新闻/' + title + '.txt'
with open(news_path, 'w', encoding='utf-8') as fp:
fp.write(date+'\n'+content)
return item
# 数据库存储管道
class mysqlPileLine(object):
conn = None
cursor = None
def open_spider(self, spider):
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='woaini', db='dagong', charset='utf8')
def process_item(self, item, spider):
self.cursor = self.conn.cursor()
# 4.11新增新闻板块代码index
# print(item['index']) # 成功打印说明可以将index的值从dagong.py传递到此
index = item['index']
try:
# 4.11新增新闻板块代码index此处可依据index代码将新闻分类保存至mysql数据库的不同新闻表中
if index == 0:
self.cursor.execute('insert into nd_news values("%s", "%s", "%s", "%s", "%s", "%s")' % (
item['news_id'], item['title'], item['date'], item['author'], item['content'], item['src']))
self.conn.commit()
elif index == 2:
self.cursor.execute('insert into xg_news values("%s", "%s", "%s", "%s", "%s", "%s")' % (
item['news_id'], item['title'], item['date'], item['author'], item['content'], item['src']))
self.conn.commit()
elif index == 4:
self.cursor.execute('insert into tw_news values("%s", "%s", "%s", "%s", "%s", "%s")' % (
item['news_id'], item['title'], item['date'], item['author'], item['content'], item['src']))
self.conn.commit()
elif index == 6:
self.cursor.execute('insert into gj_news values("%s", "%s", "%s", "%s", "%s", "%s")' % (
item['news_id'], item['title'], item['date'], item['author'], item['content'], item['src']))
self.conn.commit()
elif index == 8:
self.cursor.execute('insert into js_news values("%s", "%s", "%s", "%s", "%s", "%s")' % (
item['news_id'], item['title'], item['date'], item['author'], item['content'], item['src']))
self.conn.commit()
except Exception as e:
print(e)
self.conn.rollback()
return item
def close_spider(self, spider):
self.cursor.close()
self.conn.close()