diff --git a/web_scrap.py b/web_scrap.py index 06ed902..f5a7ccf 100644 --- a/web_scrap.py +++ b/web_scrap.py @@ -3,10 +3,17 @@ # Python 3 # Extraction des liens d'une page web from bs4 import BeautifulSoup -import urllib.request +import requests -with urllib.request.urlopen('https://www.clarissariviere.com/') as response: - webpage = response.read() - soup = BeautifulSoup(webpage, 'html.parser') +page = requests.get("https://www.clarissariviere.com") + +if page.status_code == 200: + soup = BeautifulSoup(page.text, 'html.parser') for anchor in soup.find_all('a'): - print(anchor.get('href', '/')) \ No newline at end of file + print(anchor.get('href', '/')) +#with urllib.request.urlopen('https://www.clarissariviere.com/index.html') as response: +# print(response) + #webpage = response.read() + #soup = BeautifulSoup(webpage, 'html.parser') + #for anchor in soup.find_all('a'): + # print(anchor.get('href', '/')) \ No newline at end of file