上传文件至 'dagongPro/spiders'

新增:将图片url保存到MySQL
This commit is contained in:
link_1999 2022-04-06 21:45:29 +08:00
parent 5dd1363076
commit 7230145007
1 changed files with 3 additions and 0 deletions

View File

@ -32,6 +32,8 @@ class DagongSpider(scrapy.Spider):
div_list = response.xpath('//div[@class="wrapper clearfix"]/div[1]/dl') div_list = response.xpath('//div[@class="wrapper clearfix"]/div[1]/dl')
for div in div_list: for div in div_list:
name = div.xpath('./dd[1]/a/text()').extract_first() name = div.xpath('./dd[1]/a/text()').extract_first()
# 图片对应的地址
src = div.xpath('./dt/a/img/@src').extract_first()
# 取出标题中的特殊字符 # 取出标题中的特殊字符
title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?\\|.*?", "", name) title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?\\|.*?", "", name)
@ -41,6 +43,7 @@ class DagongSpider(scrapy.Spider):
item = DagongwangproItem() item = DagongwangproItem()
item['title'] = title item['title'] = title
item['src'] = src
# 对新闻详情页url发起请求 # 对新闻详情页url发起请求
yield scrapy.Request(url=new_detail_url, callback=self.parse_detail, meta={'item': item}) yield scrapy.Request(url=new_detail_url, callback=self.parse_detail, meta={'item': item})