From 64118a3c2074f7e05a114887d2990df73a216e3c Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Tue, 28 Feb 2023 21:52:12 +0100
Subject: [PATCH 01/11] test webscrapping

---
 web_scrap.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index 06ed902..f5a7ccf 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -3,10 +3,17 @@
 # Python 3
 # Extraction des liens d'une page web
 from bs4 import BeautifulSoup
-import urllib.request
+import requests
 
-with urllib.request.urlopen('https://www.clarissariviere.com/') as response:
-    webpage = response.read()
-    soup = BeautifulSoup(webpage, 'html.parser')
+page = requests.get("https://www.clarissariviere.com")
+
+if page.status_code == 200:
+    soup = BeautifulSoup(page.text, 'html.parser')
     for anchor in soup.find_all('a'):
-        print(anchor.get('href', '/'))
\ No newline at end of file
+        print(anchor.get('href', '/'))
+#with urllib.request.urlopen('https://www.clarissariviere.com/index.html') as response:
+#    print(response)
+    #webpage = response.read()
+    #soup = BeautifulSoup(webpage, 'html.parser')
+    #for anchor in soup.find_all('a'):
+    #    print(anchor.get('href', '/'))
\ No newline at end of file

From a03489ee2141b2584c07d0505254791c5e962ce7 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Tue, 28 Feb 2023 22:03:03 +0100
Subject: [PATCH 02/11] menu nav list

---
 web_scrap.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index f5a7ccf..dff226d 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -9,11 +9,8 @@ page = requests.get("https://www.clarissariviere.com")
 
 if page.status_code == 200:
     soup = BeautifulSoup(page.text, 'html.parser')
-    for anchor in soup.find_all('a'):
-        print(anchor.get('href', '/'))
-#with urllib.request.urlopen('https://www.clarissariviere.com/index.html') as response:
-#    print(response)
-    #webpage = response.read()
-    #soup = BeautifulSoup(webpage, 'html.parser')
-    #for anchor in soup.find_all('a'):
-    #    print(anchor.get('href', '/'))
\ No newline at end of file
+    ul = soup.find_all("ul", id="listsmooth")
+    for anchor in ul[0].find_all("a"):
+        href = anchor.get('href', '/')
+        if href != "#" and href != "http://www.clarissariviere.com/":
+            print(href)

From 991590f8083353fba9d098392d5c65a1c3f19670 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Tue, 28 Feb 2023 22:24:16 +0100
Subject: [PATCH 03/11] get href article archive

---
 web_scrap.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/web_scrap.py b/web_scrap.py
index dff226d..5571050 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -14,3 +14,18 @@ if page.status_code == 200:
         href = anchor.get('href', '/')
         if href != "#" and href != "http://www.clarissariviere.com/":
             print(href)
+
+
+for i in range(1,100):
+    paging = i * 10
+    page = requests.get("https://www.clarissariviere.com/archives/p{0}-10.html".format(i))
+    soup = BeautifulSoup(page.text, 'html.parser')
+    if page.status_code == 200:
+        h2 = soup.find_all("h2")
+        for title in h2:
+            print(title.find_all("a")[0].get("href", "/"))
+            
+
+
+    
+

From e42ffd98ae4bc5ca0e3360f944d024a133ed0438 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Thu, 2 Mar 2023 23:28:04 +0100
Subject: [PATCH 04/11] scrap href all page from gouter

---
 web_scrap.py | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index 5571050..8ea6977 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -5,25 +5,45 @@
 from bs4 import BeautifulSoup
 import requests
 
-page = requests.get("https://www.clarissariviere.com")
+URL = "www.clarissariviere.com"
 
+page = requests.get("https://{0}".format(URL))
+
+page_url = []
 if page.status_code == 200:
     soup = BeautifulSoup(page.text, 'html.parser')
     ul = soup.find_all("ul", id="listsmooth")
     for anchor in ul[0].find_all("a"):
         href = anchor.get('href', '/')
-        if href != "#" and href != "http://www.clarissariviere.com/":
-            print(href)
+        if href != "#":
+            page_url.append(href)
 
-
-for i in range(1,100):
-    paging = i * 10
-    page = requests.get("https://www.clarissariviere.com/archives/p{0}-10.html".format(i))
-    soup = BeautifulSoup(page.text, 'html.parser')
+for i in page_url:
+    page = requests.get(i)
     if page.status_code == 200:
-        h2 = soup.find_all("h2")
-        for title in h2:
-            print(title.find_all("a")[0].get("href", "/"))
+        print("page : {0}".format(i))
+        soup = BeautifulSoup(page.text, 'html.parser')
+        class_div = pagingfirstline = soup.find_all("div", class_="pagingfirstline")
+        if len(class_div) > 0:
+            pagingfirstline = class_div[0].find_all("a")
+            if len(pagingfirstline) > 1:
+                lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/")
+                element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
+                number_page = element_lastpage.split("-")[0].split("p")[1]
+                number_lastpage = int(number_page) / 10
+                for j in range(1,int(number_lastpage)):
+                    paging = j * 10
+                    categorie = i.split("/")
+                    url_paging = "https://{0}/archives/p{1}-10.html".format(URL, paging)
+                    if len(categorie) != 4:
+                        url_paging = "https://{0}/archives/{1}/p{2}-10.html".format(URL, categorie[4], paging)
+                    print(url_paging)
+                    page = requests.get(url_paging)
+                    if page.status_code == 200:
+                        soup = BeautifulSoup(page.text, 'html.parser')
+                        h2 = soup.find_all("h2")
+                        for title in h2:
+                            print(title.find_all("a")[0].get("href", "/"))
             
 
 

From d21af4f60aff64a6618408db8e1c2a711e1dadae Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Fri, 3 Mar 2023 20:03:48 +0100
Subject: [PATCH 05/11] add array unique of webpage

---
 web_scrap.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/web_scrap.py b/web_scrap.py
index 8ea6977..a96d90f 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -18,6 +18,7 @@ if page.status_code == 200:
         if href != "#":
             page_url.append(href)
 
+webpage = []
 for i in page_url:
     page = requests.get(i)
     if page.status_code == 200:
@@ -43,7 +44,11 @@ for i in page_url:
                         soup = BeautifulSoup(page.text, 'html.parser')
                         h2 = soup.find_all("h2")
                         for title in h2:
-                            print(title.find_all("a")[0].get("href", "/"))
+                            href = title.find_all("a")[0].get("href", "/")
+                            if href not in webpage:
+                                webpage.append(href)
+
+print(webpage)
             
 
 

From 3c76cab9a7566c46021846b7351bed829f1f5915 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Sat, 4 Mar 2023 16:12:42 +0100
Subject: [PATCH 06/11] add urlparse

---
 web_scrap.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index a96d90f..caf2537 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -3,6 +3,7 @@
 # Python 3
 # Extraction des liens d'une page web
 from bs4 import BeautifulSoup
+from urllib.parse import urlparse
 import requests
 
 URL = "www.clarissariviere.com"
@@ -21,6 +22,8 @@ if page.status_code == 200:
 webpage = []
 for i in page_url:
     page = requests.get(i)
+    o = urlparse(i)
+    print(o.path)
     if page.status_code == 200:
         print("page : {0}".format(i))
         soup = BeautifulSoup(page.text, 'html.parser')
@@ -34,10 +37,10 @@ for i in page_url:
                 number_lastpage = int(number_page) / 10
                 for j in range(1,int(number_lastpage)):
                     paging = j * 10
-                    categorie = i.split("/")
+                    categorie = urlparse(i).path.split("/")
                     url_paging = "https://{0}/archives/p{1}-10.html".format(URL, paging)
-                    if len(categorie) != 4:
-                        url_paging = "https://{0}/archives/{1}/p{2}-10.html".format(URL, categorie[4], paging)
+                    if len(categorie) > 2:
+                        url_paging = "https://{0}/archives/{1}/p{2}-10.html".format(URL, categorie[2], paging)
                     print(url_paging)
                     page = requests.get(url_paging)
                     if page.status_code == 200:

From 6794f77df2da4e63dcabc8b75071c151286c38ff Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Sat, 4 Mar 2023 18:35:06 +0100
Subject: [PATCH 07/11] create dir for every path

---
 web_scrap.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index caf2537..c381606 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -1,10 +1,23 @@
 #!/usr/bin/python3
-
 # Python 3
 # Extraction des liens d'une page web
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
-import requests
+import requests, os
+
+def mkdir_path(path_dir):
+    if not os.path.exists(path_dir):
+        makedir = []
+        pathh = path_dir.split("/")
+        for i in pathh:
+            makedir.append(i)
+            repath = "/".join(makedir)
+            if not os.path.exists(repath):
+                os.mkdir(repath)
+
+BACKUP_DIR = "backup"
+
+mkdir_path(BACKUP_DIR)
 
 URL = "www.clarissariviere.com"
 
@@ -22,8 +35,6 @@ if page.status_code == 200:
 webpage = []
 for i in page_url:
     page = requests.get(i)
-    o = urlparse(i)
-    print(o.path)
     if page.status_code == 200:
         print("page : {0}".format(i))
         soup = BeautifulSoup(page.text, 'html.parser')
@@ -51,9 +62,9 @@ for i in page_url:
                             if href not in webpage:
                                 webpage.append(href)
 
-print(webpage)
-            
-
-
-    
-
+for i in webpage:
+    o = urlparse(i)
+    path_web = o.path.split("/")
+    path_web.pop(len(path_web)-1)
+    dir_page_web = "/".join(path_web)
+    mkdir_path("{0}/{1}".format(BACKUP_DIR, dir_page_web))
\ No newline at end of file

From 4de811c607b86f9a7a09a847714cefd0ff6dd684 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Sat, 4 Mar 2023 18:45:32 +0100
Subject: [PATCH 08/11] fix placement variable

---
 web_scrap.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index c381606..c43c2a5 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -5,6 +5,10 @@ from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 import requests, os
 
+BACKUP_DIR = "backup"
+URL = "www.clarissariviere.com"
+
+
 def mkdir_path(path_dir):
     if not os.path.exists(path_dir):
         makedir = []
@@ -15,11 +19,8 @@ def mkdir_path(path_dir):
             if not os.path.exists(repath):
                 os.mkdir(repath)
 
-BACKUP_DIR = "backup"
-
 mkdir_path(BACKUP_DIR)
 
-URL = "www.clarissariviere.com"
 
 page = requests.get("https://{0}".format(URL))
 

From a3aceccba70fca34c2a3bd42bc5b262a172f5f98 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Sun, 5 Mar 2023 20:12:58 +0100
Subject: [PATCH 09/11] create function for every task

---
 web_scrap.py | 106 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 62 insertions(+), 44 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index c43c2a5..18e38c5 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -19,53 +19,71 @@ def mkdir_path(path_dir):
             if not os.path.exists(repath):
                 os.mkdir(repath)
 
-mkdir_path(BACKUP_DIR)
 
 
-page = requests.get("https://{0}".format(URL))
 
-page_url = []
-if page.status_code == 200:
-    soup = BeautifulSoup(page.text, 'html.parser')
-    ul = soup.find_all("ul", id="listsmooth")
-    for anchor in ul[0].find_all("a"):
-        href = anchor.get('href', '/')
-        if href != "#":
-            page_url.append(href)
-
-webpage = []
-for i in page_url:
-    page = requests.get(i)
+def getUrlPage(url):
+    print(url)
+    page = requests.get(url)
+    page_url = []
     if page.status_code == 200:
-        print("page : {0}".format(i))
         soup = BeautifulSoup(page.text, 'html.parser')
-        class_div = pagingfirstline = soup.find_all("div", class_="pagingfirstline")
-        if len(class_div) > 0:
-            pagingfirstline = class_div[0].find_all("a")
-            if len(pagingfirstline) > 1:
-                lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/")
-                element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
-                number_page = element_lastpage.split("-")[0].split("p")[1]
-                number_lastpage = int(number_page) / 10
-                for j in range(1,int(number_lastpage)):
-                    paging = j * 10
-                    categorie = urlparse(i).path.split("/")
-                    url_paging = "https://{0}/archives/p{1}-10.html".format(URL, paging)
-                    if len(categorie) > 2:
-                        url_paging = "https://{0}/archives/{1}/p{2}-10.html".format(URL, categorie[2], paging)
-                    print(url_paging)
-                    page = requests.get(url_paging)
-                    if page.status_code == 200:
-                        soup = BeautifulSoup(page.text, 'html.parser')
-                        h2 = soup.find_all("h2")
-                        for title in h2:
-                            href = title.find_all("a")[0].get("href", "/")
-                            if href not in webpage:
-                                webpage.append(href)
+        ul = soup.find_all("ul", id="listsmooth")
+        for anchor in ul[0].find_all("a"):
+            href = anchor.get('href', '/')
+            if href != "#":
+                page_url.append(href)
 
-for i in webpage:
-    o = urlparse(i)
-    path_web = o.path.split("/")
-    path_web.pop(len(path_web)-1)
-    dir_page_web = "/".join(path_web)
-    mkdir_path("{0}/{1}".format(BACKUP_DIR, dir_page_web))
\ No newline at end of file
+    webpage = []
+    for i in page_url:
+        page = requests.get(i)
+        if page.status_code == 200:
+            print("page : {0}".format(i))
+            if i not in webpage:
+                webpage.append(i)
+            soup = BeautifulSoup(page.text, 'html.parser')
+            class_div = pagingfirstline = soup.find_all("div", class_="pagingfirstline")
+            if len(class_div) > 0:
+                pagingfirstline = class_div[0].find_all("a")
+                if len(pagingfirstline) > 1:
+                    lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/")
+                    element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
+                    number_page = element_lastpage.split("-")[0].split("p")[1]
+                    number_lastpage = int(number_page) / 10
+                    for j in range(1,int(number_lastpage)):
+                        paging = j * 10
+                        categorie = urlparse(i).path.split("/")
+                        url_paging = "{0}/archives/p{1}-10.html".format(url, paging)
+                        if len(categorie) > 2:
+                            url_paging = "{0}/archives/{1}/p{2}-10.html".format(url, categorie[2], paging)
+                        print(url_paging)
+                        if url_paging not in webpage:
+                            webpage.append(url_paging)
+                        page = requests.get(url_paging)
+                        if page.status_code == 200:
+                            soup = BeautifulSoup(page.text, 'html.parser')
+                            h2 = soup.find_all("h2")
+                            for title in h2:
+                                href = title.find_all("a")[0].get("href", "/")
+                                if href not in webpage:
+                                    o = urlparse(href)
+                                    o = o._replace(scheme="https").geturl()
+                                    webpage.append(o)
+        return webpage
+
+
+def downloadPage(url):
+    o = urlparse(url)
+    o = o._replace(scheme="https")
+    o = o._replace(fragment="")
+    webpage = getUrlPage(o.geturl().replace(":///", "://"))
+    for i in webpage:
+        o = urlparse(i)
+        path_web = o.path.split("/")
+        path_web.pop(len(path_web)-1)
+        dir_page_web = "/".join(path_web)
+        mkdir_path("{0}/{1}".format(BACKUP_DIR, dir_page_web))
+
+
+if __name__ == '__main__':
+    downloadPage(URL)
\ No newline at end of file

From c7dc2d626f5995ad8470b51a4579c861d3c6323e Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Sun, 5 Mar 2023 21:44:30 +0100
Subject: [PATCH 10/11] Download file html

---
 web_scrap.py | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index 18e38c5..fb8fc32 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -1,13 +1,7 @@
 #!/usr/bin/python3
-# Python 3
-# Extraction des liens d'une page web
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
-import requests, os
-
-BACKUP_DIR = "backup"
-URL = "www.clarissariviere.com"
-
+import requests, os, argparse
 
 def mkdir_path(path_dir):
     if not os.path.exists(path_dir):
@@ -20,10 +14,7 @@ def mkdir_path(path_dir):
                 os.mkdir(repath)
 
 
-
-
 def getUrlPage(url):
-    print(url)
     page = requests.get(url)
     page_url = []
     if page.status_code == 200:
@@ -69,21 +60,35 @@ def getUrlPage(url):
                                     o = urlparse(href)
                                     o = o._replace(scheme="https").geturl()
                                     webpage.append(o)
-        return webpage
+    return webpage
 
 
-def downloadPage(url):
+def downloadPage(url, backup_dir):
     o = urlparse(url)
     o = o._replace(scheme="https")
-    o = o._replace(fragment="")
     webpage = getUrlPage(o.geturl().replace(":///", "://"))
-    for i in webpage:
-        o = urlparse(i)
+    for i in range(0, len(webpage)):
+        o = urlparse(webpage[i])
         path_web = o.path.split("/")
+        filePageWeb = path_web[len(path_web)-1]
         path_web.pop(len(path_web)-1)
         dir_page_web = "/".join(path_web)
-        mkdir_path("{0}/{1}".format(BACKUP_DIR, dir_page_web))
+        mkdir_path("{0}/{1}".format(backup_dir, dir_page_web))
+        r = requests.get(webpage[i])
+        if r.status_code == 200:
+            fileDownload = "{0}/index.html".format(backup_dir)
+            if len(dir_page_web) > 0 and len(filePageWeb) > 0:
+                fileDownload = "{0}/{1}/{2}".format(backup_dir, dir_page_web, filePageWeb)
+            print("{0}/{1} : {2}".format(i, len(webpage), fileDownload))
+            open(fileDownload, "wb").write(r.content)
 
 
 if __name__ == '__main__':
-    downloadPage(URL)
\ No newline at end of file
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--url", help="canblog URL to be scraping", required=True)
+    parser.add_argument("--dir",
+                      default="backup",
+                      help="backup file path")
+    parser.add_argument("--verbosity", help="Verbosity", action="store_false")
+    args = parser.parse_args()
+    downloadPage(args.url, args.dir)
\ No newline at end of file

From 3ccebbac3619055f8e5d0e42a69882e7f484de19 Mon Sep 17 00:00:00 2001
From: Valentin CZERYBA <valcze80@gmail.com>
Date: Mon, 6 Mar 2023 22:54:32 +0100
Subject: [PATCH 11/11] logger web_scrap

---
 web_scrap.py | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/web_scrap.py b/web_scrap.py
index fb8fc32..d08dfbd 100644
--- a/web_scrap.py
+++ b/web_scrap.py
@@ -1,9 +1,9 @@
 #!/usr/bin/python3
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
-import requests, os, argparse
+import requests, os, argparse, logging
 
-def mkdir_path(path_dir):
+def mkdir_path(path_dir, logger):
     if not os.path.exists(path_dir):
         makedir = []
         pathh = path_dir.split("/")
@@ -11,10 +11,11 @@ def mkdir_path(path_dir):
             makedir.append(i)
             repath = "/".join(makedir)
             if not os.path.exists(repath):
+                logger.debug("Dossier crée : {0}".format(repath))
                 os.mkdir(repath)
 
 
-def getUrlPage(url):
+def getUrlPage(url, logger):
     page = requests.get(url)
     page_url = []
     if page.status_code == 200:
@@ -29,7 +30,7 @@ def getUrlPage(url):
     for i in page_url:
         page = requests.get(i)
         if page.status_code == 200:
-            print("page : {0}".format(i))
+            logger.info("page : {0}".format(i))
             if i not in webpage:
                 webpage.append(i)
             soup = BeautifulSoup(page.text, 'html.parser')
@@ -47,7 +48,7 @@ def getUrlPage(url):
                         url_paging = "{0}/archives/p{1}-10.html".format(url, paging)
                         if len(categorie) > 2:
                             url_paging = "{0}/archives/{1}/p{2}-10.html".format(url, categorie[2], paging)
-                        print(url_paging)
+                        logger.info(url_paging)
                         if url_paging not in webpage:
                             webpage.append(url_paging)
                         page = requests.get(url_paging)
@@ -63,23 +64,23 @@ def getUrlPage(url):
     return webpage
 
 
-def downloadPage(url, backup_dir):
+def downloadPage(url, backup_dir, logger):
     o = urlparse(url)
     o = o._replace(scheme="https")
-    webpage = getUrlPage(o.geturl().replace(":///", "://"))
+    webpage = getUrlPage(o.geturl().replace(":///", "://"), logger)
     for i in range(0, len(webpage)):
         o = urlparse(webpage[i])
         path_web = o.path.split("/")
         filePageWeb = path_web[len(path_web)-1]
         path_web.pop(len(path_web)-1)
         dir_page_web = "/".join(path_web)
-        mkdir_path("{0}/{1}".format(backup_dir, dir_page_web))
+        mkdir_path("{0}/{1}".format(backup_dir, dir_page_web), logger)
         r = requests.get(webpage[i])
         if r.status_code == 200:
             fileDownload = "{0}/index.html".format(backup_dir)
             if len(dir_page_web) > 0 and len(filePageWeb) > 0:
                 fileDownload = "{0}/{1}/{2}".format(backup_dir, dir_page_web, filePageWeb)
-            print("{0}/{1} : {2}".format(i, len(webpage), fileDownload))
+            logger.info("{0}/{1} : {2}".format(i, len(webpage), fileDownload))
             open(fileDownload, "wb").write(r.content)
 
 
@@ -89,6 +90,20 @@ if __name__ == '__main__':
     parser.add_argument("--dir",
                       default="backup",
                       help="backup file path")
-    parser.add_argument("--verbosity", help="Verbosity", action="store_false")
+    parser.add_argument("--debug", help="Verbosity",  action="store_true")
     args = parser.parse_args()
-    downloadPage(args.url, args.dir)
\ No newline at end of file
+    logger = logging.getLogger('web_scrap')
+    ch = logging.StreamHandler()
+
+    if args.debug is not None:
+        logger.setLevel(logging.DEBUG)
+        ch.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+        ch.setLevel(logging.INFO)
+
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+
+    downloadPage(args.url, args.dir, logger)
\ No newline at end of file