From 0262e99d22d48f1fef90318dbc410a2239a88510 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Tue, 28 Feb 2023 21:42:21 +0100 Subject: [PATCH] first init --- web_scrap.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 web_scrap.py diff --git a/web_scrap.py b/web_scrap.py new file mode 100644 index 0000000..06ed902 --- /dev/null +++ b/web_scrap.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +# Python 3 +# Extraction des liens d'une page web +from bs4 import BeautifulSoup +import urllib.request + +with urllib.request.urlopen('https://www.clarissariviere.com/') as response: + webpage = response.read() + soup = BeautifulSoup(webpage, 'html.parser') + for anchor in soup.find_all('a'): + print(anchor.get('href', '/')) \ No newline at end of file