diff --git a/lib/WPImport.py b/lib/WPImport.py index 8e27a0e..7b78cad 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -5,6 +5,7 @@ from urllib.parse import urlparse import requests, os, logging, re, json from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from slugify import slugify class WPimport: # Constructor @@ -14,7 +15,7 @@ class WPimport: self._wordpress = wordpress self._logger = logger self._parser = parser - self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'} + self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'} self._request = requests.Session() @@ -487,7 +488,7 @@ class WPimport: hour = articledate[0].text time = dateheader[0].text.split(" ") data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} - params = {"search":author} + params = {"search":author, "per_page":100} try: self._logger.info("{0} : Search author : {1}".format(self._name, author)) page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params) @@ -503,46 +504,85 @@ class WPimport: else: self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(page.content)) - - params = {"search": title} - try: - self._logger.info("{0} : Search post : {1}".format(self._name, title)) - page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params) - except ConnectionError as err: - self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err)) - exit(1) - except Exception as err: - self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err)) + if title[len(title)-1] == " ": + title = title[:-1] page_is_exist = False - if page.status_code == 200: - result = page.json() - self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result))) - for i in result: - self._logger.info("{0} : Search title posts for {2} : {1}".format(self._name, i["title"]["rendered"], title)) - if i["title"]["rendered"] == title: - page_is_exist = True - post_id = i["id"] - self._logger.debug("{0} : Data for post to update : {1}".format(self._name, result[0])) - self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) - - try: - page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) - except Exception as err: - self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) - exit(1) - if page.status_code == 200: - result = page.json() - self._logger.info("{0} : Post updated : {1}".format(self._name, title)) - self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) - self._linkImgPost(result["title"]["raw"], list_img, result["id"]) - else: - self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) - self._logger.debug("{0} : {1}".format(self._name, page.content)) - else: - self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code)) - self._logger.debug("{0} : {1}".format(self._name, page.content)) - if page_is_exist == False: + for index in range(1,10): + params = {"search": title, "per_page":100, "page": index} + try: + self._logger.info("{0} : Search post : {1}".format(self._name, title)) + page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params, headers=self._headers_json) + except ConnectionError as err: + self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err)) + if page.status_code == 200: + self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding)) + page.encoding = "utf-8" + result = page.json() + if len(result) == 0: + break + self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result))) + count = 0 + for i in result: + + self._logger.info("{0} : Search title posts for |{2}| : |{1}|".format(self._name, i["title"]["rendered"], title)) + title_rendered = i["title"]["rendered"].replace('’', "'") + title_rendered = title_rendered.replace('–', '-') + title_rendered = title_rendered.replace('…', '...') + title_rendered = title_rendered.replace('« ', '"') + title_rendered = title_rendered.replace(' »', '"') + self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered))) + if title_rendered == title: + page_is_exist = True + post_id = i["id"] + count = count + 1 + if count > 1: + self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title)) + try: + params = {"force":1} + page = self._request.delete("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, params=params) + except ConnectionError as err: + self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err)) + if page.status_code == 200: + self._logger.info("{0} : Post deleted : {1}".format(self._name, title)) + else: + self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page.content)) + + else: + self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i)) + self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) + + try: + page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) + except ConnectionError as err: + self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err)) + if page.status_code == 200: + result = page.json() + self._logger.info("{0} : Post updated : {1}".format(self._name, title)) + self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) + self._linkImgPost(result["title"]["raw"], list_img, result["id"]) + else: + self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page.content)) + if page.status_code == 400: + self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page.content)) + break + else: + self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page.content)) + + if page_is_exist is False: try: self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"])) page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data))