parent
5dd1363076
commit
7230145007
|
@ -32,6 +32,8 @@ class DagongSpider(scrapy.Spider):
|
||||||
div_list = response.xpath('//div[@class="wrapper clearfix"]/div[1]/dl')
|
div_list = response.xpath('//div[@class="wrapper clearfix"]/div[1]/dl')
|
||||||
for div in div_list:
|
for div in div_list:
|
||||||
name = div.xpath('./dd[1]/a/text()').extract_first()
|
name = div.xpath('./dd[1]/a/text()').extract_first()
|
||||||
|
# 图片对应的地址
|
||||||
|
src = div.xpath('./dt/a/img/@src').extract_first()
|
||||||
|
|
||||||
# 取出标题中的特殊字符
|
# 取出标题中的特殊字符
|
||||||
title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?\\|.*?", "", name)
|
title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?\\|.*?", "", name)
|
||||||
|
@ -41,6 +43,7 @@ class DagongSpider(scrapy.Spider):
|
||||||
|
|
||||||
item = DagongwangproItem()
|
item = DagongwangproItem()
|
||||||
item['title'] = title
|
item['title'] = title
|
||||||
|
item['src'] = src
|
||||||
|
|
||||||
# 对新闻详情页url发起请求
|
# 对新闻详情页url发起请求
|
||||||
yield scrapy.Request(url=new_detail_url, callback=self.parse_detail, meta={'item': item})
|
yield scrapy.Request(url=new_detail_url, callback=self.parse_detail, meta={'item': item})
|
||||||
|
|
Loading…
Reference in New Issue