diff --git a/lib/WPExport.py b/lib/WPExport.py index 58827da..90080b7 100644 --- a/lib/WPExport.py +++ b/lib/WPExport.py @@ -103,7 +103,7 @@ class WPExport: if i not in webpage[section]["page"]: webpage[section]["page"].append(i) soup = BeautifulSoup(page.text, self._parser) - class_div = pagingfirstline = soup.find_all("div", class_="pagingfirstline") + class_div = soup.find_all("div", class_="pagingfirstline") if len(class_div) > 0: pagingfirstline = class_div[0].find_all("a") if len(pagingfirstline) > 1: @@ -115,14 +115,16 @@ class WPExport: number_lastpage = int(number_page) / 10 setPageDivided = int(number_lastpage) / max_thread - setPagePart = setPageDivided * (index_thread + 1) + 1 + if setPageDivided > int(setPageDivided): + setPageDivided = setPageDivided + 1 + setPagePart = setPageDivided * (index_thread + 1) firstPagePart = (setPagePart - setPageDivided) self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage))) self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart))) self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart))) - for j in range(int(firstPagePart),int(setPagePart)): + for j in range(int(firstPagePart),int(setPagePart)+1): paging = j * 10 categorie = urlparse(i).path.split("/") url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging) @@ -135,7 +137,9 @@ class WPExport: if page.status_code == 200: soup = BeautifulSoup(page.text, self._parser) h2 = soup.find_all("h2") + self._logger.debug("{0} : {1} H2 : {2}".format(self._name, url_paging, h2)) for title in h2: + self._logger.debug("{0} : {1} a : {2}".format(self._name, url_paging, title.find_all("a"))) href = title.find_all("a")[0].get("href", "/") if href not in webpage[section]["article"]: try: diff --git a/lib/WPImport.py b/lib/WPImport.py index 6108582..8e27a0e 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -489,12 +489,15 @@ class WPimport: data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} params = {"search":author} try: - self._logger.info("{0} : Get author : {1}".format(self._name, author)) + self._logger.info("{0} : Search author : {1}".format(self._name, author)) page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params) - except Exception as err: + except ConnectionError as err: self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err)) exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err)) if page.status_code == 200: + self._logger.info("{0} : Get author id : {1}".format(self._name, result[0]["id"])) result = page.json() data["author"] = result[0]["id"] else: @@ -503,45 +506,51 @@ class WPimport: params = {"search": title} try: + self._logger.info("{0} : Search post : {1}".format(self._name, title)) page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params) - except Exception as err: + except ConnectionError as err: self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err)) exit(1) - page_is_exist = True + except Exception as err: + self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err)) + page_is_exist = False if page.status_code == 200: result = page.json() - if len(result) == 0: - page_is_exist = False - else: - for i in result: - if i["title"]["rendered"] == title: - post_id = i["id"] - self._logger.debug("{0} : Data for post to update : {1}".format(self._name, result[0])) - self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) + self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result))) + for i in result: + self._logger.info("{0} : Search title posts for {2} : {1}".format(self._name, i["title"]["rendered"], title)) + if i["title"]["rendered"] == title: + page_is_exist = True + post_id = i["id"] + self._logger.debug("{0} : Data for post to update : {1}".format(self._name, result[0])) + self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) - try: - page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) - except Exception as err: - self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) - exit(1) - if page.status_code == 200: - result = page.json() - self._logger.info("{0} : Post updated : {1}".format(self._name, title)) - self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) - self._linkImgPost(result["title"]["raw"], list_img, result["id"]) - else: - self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) - self._logger.debug("{0} : {1}".format(self._name, page.content)) + try: + page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) + except Exception as err: + self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) + exit(1) + if page.status_code == 200: + result = page.json() + self._logger.info("{0} : Post updated : {1}".format(self._name, title)) + self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) + self._linkImgPost(result["title"]["raw"], list_img, result["id"]) + else: + self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page.content)) else: self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) if page_is_exist == False: try: + self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"])) page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) - except Exception as err: + except ConnectionError as err: self._logger.error("{0} : Connection error for create post : {1}".format(self._name, err)) exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for create post : {1}".format(self._name, err)) if page.status_code == 201: result = page.json() self._logger.info("{0} : Post added : {1}".format(self._name, result["title"]["raw"]))