crawler-MiniProgram/zjjtPro/pipelines.py

70 lines
3.1 KiB
Python

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
class ZjjtproPipeline:
def process_item(self, item, spider):
return item
# 数据库存储管道
class mysqlPileLine(object):
conn = None # 链接对象
cursor = None # 游标对象
def open_spider(self, spider):
# 实例化链接对象
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='root', db='zjjt', charset='utf8')
def process_item(self, item, spider):
# 实例化游标对象
self.cursor = self.conn.cursor()
index = item['index']
try:
if index == 0:
self.cursor.execute('insert into lqxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 1:
self.cursor.execute('insert into qcxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 2:
self.cursor.execute('insert into hyxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 3:
self.cursor.execute('insert into hkxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 4:
self.cursor.execute('insert into gdxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 5:
self.cursor.execute('insert into zhxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 6:
self.cursor.execute('insert into ysxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
elif index == 7:
self.cursor.execute('insert into rwxy values("%s", "%s", "%s", "%s", "%s")'%(
item['news_id'], item['title'], item['date'], item['content'], item['img_src']))
self.conn.commit()
except Exception as e:
print(e)
self.conn.rollback()
return item
def close_spider(self, spider):
# self.cursor.close()
# self.conn.close()
pass