From 7230145007f4651df4b32ab7db4c1684bd9d7c29 Mon Sep 17 00:00:00 2001 From: link_1999 <1402246900@qq.com> Date: Wed, 6 Apr 2022 21:45:29 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'dagongPro/spiders'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增:将图片url保存到MySQL --- dagongPro/spiders/dagong.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dagongPro/spiders/dagong.py b/dagongPro/spiders/dagong.py index c1bdf74..201b4d6 100644 --- a/dagongPro/spiders/dagong.py +++ b/dagongPro/spiders/dagong.py @@ -32,6 +32,8 @@ class DagongSpider(scrapy.Spider): div_list = response.xpath('//div[@class="wrapper clearfix"]/div[1]/dl') for div in div_list: name = div.xpath('./dd[1]/a/text()').extract_first() + # 图片对应的地址 + src = div.xpath('./dt/a/img/@src').extract_first() # 取出标题中的特殊字符 title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?\\|.*?", "", name) @@ -41,6 +43,7 @@ class DagongSpider(scrapy.Spider): item = DagongwangproItem() item['title'] = title + item['src'] = src # 对新闻详情页url发起请求 yield scrapy.Request(url=new_detail_url, callback=self.parse_detail, meta={'item': item})