get href article archive

This commit is contained in:
Valentin CZERYBA 2023-02-28 22:24:16 +01:00
parent a03489ee21
commit 991590f808

View File

@ -14,3 +14,18 @@ if page.status_code == 200:
href = anchor.get('href', '/') href = anchor.get('href', '/')
if href != "#" and href != "http://www.clarissariviere.com/": if href != "#" and href != "http://www.clarissariviere.com/":
print(href) print(href)
for i in range(1,100):
paging = i * 10
page = requests.get("https://www.clarissariviere.com/archives/p{0}-10.html".format(i))
soup = BeautifulSoup(page.text, 'html.parser')
if page.status_code == 200:
h2 = soup.find_all("h2")
for title in h2:
print(title.find_all("a")[0].get("href", "/"))