add array unique of webpage
This commit is contained in:
parent
e42ffd98ae
commit
d21af4f60a
@ -18,6 +18,7 @@ if page.status_code == 200:
|
||||
if href != "#":
|
||||
page_url.append(href)
|
||||
|
||||
webpage = []
|
||||
for i in page_url:
|
||||
page = requests.get(i)
|
||||
if page.status_code == 200:
|
||||
@ -43,7 +44,11 @@ for i in page_url:
|
||||
soup = BeautifulSoup(page.text, 'html.parser')
|
||||
h2 = soup.find_all("h2")
|
||||
for title in h2:
|
||||
print(title.find_all("a")[0].get("href", "/"))
|
||||
href = title.find_all("a")[0].get("href", "/")
|
||||
if href not in webpage:
|
||||
webpage.append(href)
|
||||
|
||||
print(webpage)
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user