test webscrapping

2023-02-28 21:52:12 +01:00 · 2023-02-28 21:52:12 +01:00 · 64118a3c20
commit 64118a3c20
parent 0262e99d22
1 changed files with 12 additions and 5 deletions
--- a/web_scrap.py
+++ b/web_scrap.py
@ -3,10 +3,17 @@
 # Python 3
 # Extraction des liens d'une page web
 from bs4 import BeautifulSoup
-import urllib.request
+import requests

-with urllib.request.urlopen('https://www.clarissariviere.com/') as response:
-    webpage = response.read()
-    soup = BeautifulSoup(webpage, 'html.parser')
+page = requests.get("https://www.clarissariviere.com")
+
+if page.status_code == 200:
+    soup = BeautifulSoup(page.text, 'html.parser')
    for anchor in soup.find_all('a'):
-        print(anchor.get('href', '/'))
+        print(anchor.get('href', '/'))
+#with urllib.request.urlopen('https://www.clarissariviere.com/index.html') as response:
+#    print(response)
+    #webpage = response.read()
+    #soup = BeautifulSoup(webpage, 'html.parser')
+    #for anchor in soup.find_all('a'):
+    #    print(anchor.get('href', '/'))