web_scrap/web_scrap.py

17 lines
469 B
Python
Raw Normal View History

2023-02-28 21:42:21 +01:00
#!/usr/bin/python3
# Python 3
# Extraction des liens d'une page web
from bs4 import BeautifulSoup
2023-02-28 21:52:12 +01:00
import requests
2023-02-28 21:42:21 +01:00
2023-02-28 21:52:12 +01:00
page = requests.get("https://www.clarissariviere.com")
if page.status_code == 200:
soup = BeautifulSoup(page.text, 'html.parser')
2023-02-28 22:03:03 +01:00
ul = soup.find_all("ul", id="listsmooth")
for anchor in ul[0].find_all("a"):
href = anchor.get('href', '/')
if href != "#" and href != "http://www.clarissariviere.com/":
print(href)