2023-02-28 21:42:21 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
# Python 3
|
|
|
|
# Extraction des liens d'une page web
|
|
|
|
from bs4 import BeautifulSoup
|
2023-02-28 21:52:12 +01:00
|
|
|
import requests
|
2023-02-28 21:42:21 +01:00
|
|
|
|
2023-02-28 21:52:12 +01:00
|
|
|
page = requests.get("https://www.clarissariviere.com")
|
|
|
|
|
|
|
|
if page.status_code == 200:
|
|
|
|
soup = BeautifulSoup(page.text, 'html.parser')
|
2023-02-28 21:42:21 +01:00
|
|
|
for anchor in soup.find_all('a'):
|
2023-02-28 21:52:12 +01:00
|
|
|
print(anchor.get('href', '/'))
|
|
|
|
#with urllib.request.urlopen('https://www.clarissariviere.com/index.html') as response:
|
|
|
|
# print(response)
|
|
|
|
#webpage = response.read()
|
|
|
|
#soup = BeautifulSoup(webpage, 'html.parser')
|
|
|
|
#for anchor in soup.find_all('a'):
|
|
|
|
# print(anchor.get('href', '/'))
|