From 0029898e6e799d28561ea95d0f621d45e462bc1e Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Mon, 1 May 2023 15:45:34 +0200 Subject: [PATCH] add debug message + fix error directory list --- import_export_canalblog.py | 4 +++- lib/WPExport.py | 2 +- lib/WPImport.py | 12 +++++++++--- wp-navigation | 19 +++++++++++++++++++ 4 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 wp-navigation diff --git a/import_export_canalblog.py b/import_export_canalblog.py index 2d3271a..8d49f10 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -36,6 +36,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas exit(1) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser) webpage = exportWp.getUrlPage(name_thread, max_thread) + for j in wordpress: importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser) importWp.fromUrl(webpage) @@ -59,7 +60,8 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial): name = "Thread-{0}".format(int(name_thread) + 1) - directory = args.directory.split(",") + directory = directory.split(",") + wordpress = wordpress.split(",") if serial is False: for i in wordpress: importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser) diff --git a/lib/WPExport.py b/lib/WPExport.py index 7cf709c..7f738d4 100644 --- a/lib/WPExport.py +++ b/lib/WPExport.py @@ -103,7 +103,7 @@ class WPExport: setPageDivided = int(number_lastpage) / max_thread setPagePart = setPageDivided * (index_thread + 1) - firstPagePart = (setPagePart - setPageDivided) + 1 + firstPagePart = (setPagePart - setPageDivided) self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage))) self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart))) diff --git a/lib/WPImport.py b/lib/WPImport.py index ec8df45..03883c8 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -36,7 +36,7 @@ class WPimport: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err)) exit(1) if r.status_code == 200: - self._logger.info("{0} : ({1}/{2} : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i])) + self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i])) soup = BeautifulSoup(r.content, self._parser) articlebody = soup.find_all("div", class_="articlebody") if len(articlebody) > 0: @@ -72,6 +72,7 @@ class WPimport: self._logger.info("{0} : File is being processed : {1}".format(self._name, files[i])) with open(files[i], 'r') as f: content = f.read() + self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content))) soup = BeautifulSoup(content, self._parser) articlebody = soup.find_all("div", class_="articlebody") self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody))) @@ -189,12 +190,14 @@ class WPimport: media = {"id":"", "rendered":""} split_fileimg = href_img.split("/") img_name = split_fileimg[len(split_fileimg)-1] + self._logger.debug("{0} : Search for image {1} with URL {2}".format(self._name, img_name, "http://{0}/wp-json/wp/v2/media".format(self._wordpress))) params = { "search": img_name} try: r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params) except Exception as err: self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err)) exit(1) + self._logger.debug("{0} : Search for image {1} and his status code {2}".format(self._name, img_name, r.status_code)) if r.status_code == 200: res = r.json() if len(res) > 0: @@ -359,6 +362,7 @@ class WPimport: listelement[i] = [] articletitle = soup.find_all("h2", class_="articletitle") + self._logger.debug("{0} : Title of the article : {1}".format(self._name, articletitle)) articlebody = soup.find_all("div", class_="articlebody") articledate = soup.find_all("span", class_="articledate") articleacreator = soup.find_all("span", class_="articlecreator") @@ -366,10 +370,12 @@ class WPimport: itemfooter = soup.find_all("div", class_="itemfooter") comment = soup.find_all("li", class_="comment") img_a = articlebody[0].find_all("a", {"target": "_blank"}) + self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a))) list_img = [] for i in img_a: new_img = {} img = i.find_all("img") + self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) if len(img) > 0: href_a = i.get("href") href_img = img[0].get("src") @@ -387,7 +393,7 @@ class WPimport: except Exception as err: self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) exit(1) - + self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) if page_img.status_code == 200: media=self._addOrUpdateMedia(href_img, page_img) new_img["id"]=media["id"] @@ -402,7 +408,7 @@ class WPimport: self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) self._logger.debug("{0} : {1}".format(self._name, page_img.content)) - + self._debug("{0} : Number of image : {1}".format(self._name, len(list_img))) comment_post = self._getComment(comment) a = itemfooter[0].find_all("a", {"rel": True}) diff --git a/wp-navigation b/wp-navigation new file mode 100644 index 0000000..39425ef --- /dev/null +++ b/wp-navigation @@ -0,0 +1,19 @@ +"\n\n +\n +\n +" + +URL de requête: http://localhost:8080/wp-json/wp/v2/navigation/161?_locale=user + +"\n\n +\n +\n\n +\n +" + +{"id":161,"content":"\n\n\n\n\n\n"} + + +curl -vvv -u v4l3n71n -X POST -d '{\"content\":\"< wp:page-list /-->\n\n< wp:navigation-submenu {\"label\":\"Toto\",\"type\":\"page\",\"id\":2,\"url\":\"http://localhost:8080/page-d-exemple/\",\"kind\":\"post-type\"} -->\n\n\n\n"}' http://localhost:8080/wp-json/wp/v2/navigation + +curl http://localhost:8080/wp-json/wp/v2/navigation |jq . \ No newline at end of file