get href article archive
This commit is contained in:
parent
a03489ee21
commit
991590f808
15
web_scrap.py
15
web_scrap.py
@ -14,3 +14,18 @@ if page.status_code == 200:
|
|||||||
href = anchor.get('href', '/')
|
href = anchor.get('href', '/')
|
||||||
if href != "#" and href != "http://www.clarissariviere.com/":
|
if href != "#" and href != "http://www.clarissariviere.com/":
|
||||||
print(href)
|
print(href)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(1,100):
|
||||||
|
paging = i * 10
|
||||||
|
page = requests.get("https://www.clarissariviere.com/archives/p{0}-10.html".format(i))
|
||||||
|
soup = BeautifulSoup(page.text, 'html.parser')
|
||||||
|
if page.status_code == 200:
|
||||||
|
h2 = soup.find_all("h2")
|
||||||
|
for title in h2:
|
||||||
|
print(title.find_all("a")[0].get("href", "/"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user