From 64118a3c2074f7e05a114887d2990df73a216e3c Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Tue, 28 Feb 2023 21:52:12 +0100 Subject: [PATCH] test webscrapping --- web_scrap.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/web_scrap.py b/web_scrap.py index 06ed902..f5a7ccf 100644 --- a/web_scrap.py +++ b/web_scrap.py @@ -3,10 +3,17 @@ # Python 3 # Extraction des liens d'une page web from bs4 import BeautifulSoup -import urllib.request +import requests -with urllib.request.urlopen('https://www.clarissariviere.com/') as response: - webpage = response.read() - soup = BeautifulSoup(webpage, 'html.parser') +page = requests.get("https://www.clarissariviere.com") + +if page.status_code == 200: + soup = BeautifulSoup(page.text, 'html.parser') for anchor in soup.find_all('a'): - print(anchor.get('href', '/')) \ No newline at end of file + print(anchor.get('href', '/')) +#with urllib.request.urlopen('https://www.clarissariviere.com/index.html') as response: +# print(response) + #webpage = response.read() + #soup = BeautifulSoup(webpage, 'html.parser') + #for anchor in soup.find_all('a'): + # print(anchor.get('href', '/')) \ No newline at end of file