明天的筆記本‧Tomorrow notebook

使用 python、BeautifulSoup 採集奇摩新聞裡指定關鍵字的超連結及標題

# -*- coding: utf-8 -*-
"""
採集奇摩新聞裡指定關鍵字的超連結
@author: note.i360.tw
"""
import requests #HTTP請求庫
from bs4 import BeautifulSoup
import re #正則式


#取得文章=============================================
def get_urls(url):


    # 目標網頁
    r = requests.get(url)
    r.encoding = 'utf8'
    #print(requests)
    
    #顯示所有內容
    #print(r.text)
    
    # 確認是否下載成功
    if r.status_code == requests.codes.ok:
        
        # 以 BeautifulSoup 解析 HTML 程式碼
        soup = BeautifulSoup(r.text, 'html5lib')
        
        #輸出標題
        _web_title = str(soup.title)
        print("目前網頁名稱：" + _web_title)

        #採集超連結內容==================================================
        stories = soup.find_all('a',string=re.compile(r"總統"))
        
        #判斷是否找到關鍵字
        if len(stories) <= 0:
            print("    沒找到關鍵字")

        else:
            
            for s in stories:
                # 新聞標題
                print("標題：" + s.text)
                # 新聞網址
                print("網址：" + s.get('href'))
        
       


#執行 ============================================================
if __name__ == '__main__':
    
    url = 'https://tw.news.yahoo.com/'
    print("目前採集網址："+str(url))
    get_urls(url)

作者：林明天

2019-09-27 00:11