From eace6a5c1312ddc048cdea30c76cec711f1b951b Mon Sep 17 00:00:00 2001
From: link_1999 <1402246900@qq.com>
Date: Wed, 6 Apr 2022 20:15:47 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20'dagongPro/spiders'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

新增：将新闻标题、发布单位、发布日期、正文内容添加到MySQL数据库功能
---
 dagongPro/spiders/dagong.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dagongPro/spiders/dagong.py b/dagongPro/spiders/dagong.py
index 799a06a..c1bdf74 100644
--- a/dagongPro/spiders/dagong.py
+++ b/dagongPro/spiders/dagong.py
@@ -18,7 +18,7 @@ class DagongSpider(scrapy.Spider):
 
     def parse(self, response):
         li_list = response.xpath('/html/body/div[2]/div/div[2]/ul[2]/li[2]/div/ul/li')
-        alist = [6]  # [0内地, 2香港, 4两岸, 6国际, 8军事]
+        alist = [8]  # [0内地, 2香港, 4两岸, 6国际, 8军事]
         for index in alist:
             model_url = li_list[index].xpath('./a/@href').extract_first()
             self.models_urls.append(model_url)
@@ -34,7 +34,7 @@ class DagongSpider(scrapy.Spider):
             name = div.xpath('./dd[1]/a/text()').extract_first()
 
             # 取出标题中的特殊字符
-            title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?", "", name)
+            title = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\*.*?|\\:.*?|\\/.*?|\\\".*?|\\\\.*?|\\?.*?\\|.*?", "", name)
 
             new_detail_url = div.xpath('./dd[1]/a/@href').extract_first()
             # print(title)
@@ -50,6 +50,7 @@ class DagongSpider(scrapy.Spider):
         title = response.xpath('//div[@class="wrap_left"]/h2/text()').extract_first()
         content = response.xpath('//div[@class="wrap_left"]/div[3]//text()').extract()
         content = ''.join(content)
+        print(content)
         date = response.xpath('//div[@class="wrap_left"]/div[1]/div[1]//text()').extract()
         date = ''.join(date)