# Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html # useful for handling different item types with a single interface from itemadapter import ItemAdapter import pymysql # 本地存储管道 class DagongwangproPipeline: def process_item(self, item, spider): # print(item) title = item['title'] content = item['content'] date = item['date'] news_path = './新闻/内地新闻/' + title + '.txt' with open(news_path, 'w', encoding='utf-8') as fp: fp.write(date+'\n'+content) return item # 数据库存储管道 class mysqlPileLine(object): conn = None cursor = None def open_spider(self, spider): self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='woaini', db='dagong', charset='utf8') def process_item(self, item, spider): self.cursor = self.conn.cursor() try: self.cursor.execute('insert into tw_news values("%s", "%s", "%s", "%s", "%s", "%s")'%(item['news_id'], item['title'], item['date'], item['author'], item['content'], item['src'])) self.conn.commit() except Exception as e: print(e) self.conn.rollback() return item def close_spider(self, spider): self.cursor.close() self.conn.close()