From 991590f8083353fba9d098392d5c65a1c3f19670 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Tue, 28 Feb 2023 22:24:16 +0100 Subject: [PATCH] get href article archive --- web_scrap.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/web_scrap.py b/web_scrap.py index dff226d..5571050 100644 --- a/web_scrap.py +++ b/web_scrap.py @@ -14,3 +14,18 @@ if page.status_code == 200: href = anchor.get('href', '/') if href != "#" and href != "http://www.clarissariviere.com/": print(href) + + +for i in range(1,100): + paging = i * 10 + page = requests.get("https://www.clarissariviere.com/archives/p{0}-10.html".format(i)) + soup = BeautifulSoup(page.text, 'html.parser') + if page.status_code == 200: + h2 = soup.find_all("h2") + for title in h2: + print(title.find_all("a")[0].get("href", "/")) + + + + +