get href article archive

2023-02-28 22:24:16 +01:00 · 2023-02-28 22:24:16 +01:00 · 991590f808
commit 991590f808
parent a03489ee21
1 changed files with 15 additions and 0 deletions
--- a/web_scrap.py
+++ b/web_scrap.py
@ -14,3 +14,18 @@ if page.status_code == 200:
        href = anchor.get('href', '/')
        if href != "#" and href != "http://www.clarissariviere.com/":
            print(href)
 for i in range(1,100):
    paging = i * 10
    page = requests.get("https://www.clarissariviere.com/archives/p{0}-10.html".format(i))
    soup = BeautifulSoup(page.text, 'html.parser')
    if page.status_code == 200:
        h2 = soup.find_all("h2")
        for title in h2:
            print(title.find_all("a")[0].get("href", "/"))