diff --git a/web_scrap.py b/web_scrap.py index 8ea6977..a96d90f 100644 --- a/web_scrap.py +++ b/web_scrap.py @@ -18,6 +18,7 @@ if page.status_code == 200: if href != "#": page_url.append(href) +webpage = [] for i in page_url: page = requests.get(i) if page.status_code == 200: @@ -43,7 +44,11 @@ for i in page_url: soup = BeautifulSoup(page.text, 'html.parser') h2 = soup.find_all("h2") for title in h2: - print(title.find_all("a")[0].get("href", "/")) + href = title.find_all("a")[0].get("href", "/") + if href not in webpage: + webpage.append(href) + +print(webpage)