add array unique of webpage
This commit is contained in:
parent
e42ffd98ae
commit
d21af4f60a
@ -18,6 +18,7 @@ if page.status_code == 200:
|
|||||||
if href != "#":
|
if href != "#":
|
||||||
page_url.append(href)
|
page_url.append(href)
|
||||||
|
|
||||||
|
webpage = []
|
||||||
for i in page_url:
|
for i in page_url:
|
||||||
page = requests.get(i)
|
page = requests.get(i)
|
||||||
if page.status_code == 200:
|
if page.status_code == 200:
|
||||||
@ -43,7 +44,11 @@ for i in page_url:
|
|||||||
soup = BeautifulSoup(page.text, 'html.parser')
|
soup = BeautifulSoup(page.text, 'html.parser')
|
||||||
h2 = soup.find_all("h2")
|
h2 = soup.find_all("h2")
|
||||||
for title in h2:
|
for title in h2:
|
||||||
print(title.find_all("a")[0].get("href", "/"))
|
href = title.find_all("a")[0].get("href", "/")
|
||||||
|
if href not in webpage:
|
||||||
|
webpage.append(href)
|
||||||
|
|
||||||
|
print(webpage)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user