commit 0262e99d22d48f1fef90318dbc410a2239a88510 Author: Valentin CZERYBA Date: Tue Feb 28 21:42:21 2023 +0100 first init diff --git a/web_scrap.py b/web_scrap.py new file mode 100644 index 0000000..06ed902 --- /dev/null +++ b/web_scrap.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +# Python 3 +# Extraction des liens d'une page web +from bs4 import BeautifulSoup +import urllib.request + +with urllib.request.urlopen('https://www.clarissariviere.com/') as response: + webpage = response.read() + soup = BeautifulSoup(webpage, 'html.parser') + for anchor in soup.find_all('a'): + print(anchor.get('href', '/')) \ No newline at end of file