diff --git a/lib/WPExport.py b/lib/WPExport.py index fffc32a..da4f809 100644 --- a/lib/WPExport.py +++ b/lib/WPExport.py @@ -69,6 +69,10 @@ class WPExport: href = anchor.get('href', '/') if href != "#": page_url.append(href) + else: + self._logger.error("Url did not get due status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + webpage = [] for i in page_url: @@ -113,6 +117,10 @@ class WPExport: self._logger.error("parsing error : {0}".format(err)) exit(1) webpage.append(o) + else: + self._logger.error("web didn't get due status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + return webpage @@ -179,6 +187,10 @@ class WPExport: o = o._replace(netloc=u.netloc) o = o._replace(scheme=u.scheme) page_url.append(o.geturl()) + else: + self._logger.error("JS or CSS did not get due status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + return page_url # Get image @@ -201,6 +213,10 @@ class WPExport: if src not in page_img: self._logger.info("image: {0} : ".format(src)) page_img.append(src) + else: + self._logger.error("Image did not get due status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + return page_img @@ -232,4 +248,7 @@ class WPExport: open(fileDownload, "wb").write(r.content) except Exception as err: self._logger.error("file error : {0}".format(err)) - exit(1) \ No newline at end of file + exit(1) + else: + self._logger.error("Not download due status code : {0}".format(r.status_code)) + self._logger.debug(r.content) diff --git a/lib/WPImport.py b/lib/WPImport.py index 39b1228..d925f67 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -13,6 +13,7 @@ class WPimport: self._wordpress = wordpress self._logger = logger self._parser = parser + self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'} self._request = requests.Session() @@ -41,6 +42,9 @@ class WPimport: self._addOrUpdatePost(soup) else: self._addOrUpdateFeaturedMedia(soup) + else: + self._logger.error("Connection error with status code : {0}".format(r.status_code)) + self._logger.debug(r.content) def fromDirectory(self, directory): @@ -122,18 +126,33 @@ class WPimport: res = page.json() if len(res) > 0: id_media = res[0]["id"] - headers = {'Content-Type': 'application/json', 'Accept':'application/json'} data = {"featured_media": id_media} try: - r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=headers, data=json.dumps(data)) + r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) except Exception as err: self._logger.error("Connection error : {0}".format(err)) exit(1) if r.status_code == 200: self._logger.info("Ajout media featured : {0}".format(r.json()["title"]["raw"])) + else: + self._logger.error("Connection error with status code : {0}".format(r.status_code)) + self._logger.debug(r.content) + else: self._logger.info("Aucun media trouvé pour {0}".format(h2)) - + else: + self._logger.error("Connection error with status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + + else: + self._logger.error("Connection error with status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + + else: + self._logger.error("Connection error with status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + + ## Association image to post def _linkImgPost(self, title, list_img, post_id): @@ -146,6 +165,10 @@ class WPimport: exit(1) if r.status_code == 200: self._logger.info("Association d'une image à l'article {0}".format(title)) + else: + self._logger.error("Connection error with status code : {0}".format(r.status_code)) + self._logger.debug(r.content) + ## Add or update img @@ -169,7 +192,11 @@ class WPimport: self._logger.error("Connection error : {0}".format(err)) exit(1) if r.status_code == 200: - self._logger.info("Image supprimé {0}".format(img_name)) + self._logger.info("Image removed {0}".format(img_name)) + else: + self._logger.error("Image not removed due status code : {0}".format(r.status_code)) + self._logger.debug(r.content) + data = page.content img_type = "image/png" if img_name.split(".")[1] == "jpg" or img_name.split(".")[1] == "jpeg": @@ -181,48 +208,129 @@ class WPimport: self._logger.error("Connection error : {0}".format(err)) exit(1) if r.status_code == 201: - self._logger.info("Ajout d'image {0}".format(img_name)) + self._logger.info("Image added {0}".format(img_name)) res = r.json() media["id"] = res["id"] media["rendered"] = res["guid"]["rendered"] + else: + self._logger.error("Image not added due status code : {0}".format(r.status_code)) + self._logger.debug(r.content) + + else: + self._logger.error("Connection error with status code : {0}".format(r.status_code)) + self._logger.debug(r.content) + return media ## Add or update comment def _addOrUpdateComment(self, post, comment, title): - params = {"post": post} - block = True - try: - page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params) - except Exception as err: - self._logger.error("Connection error : {0}".format(err)) - exit(1) - if page.status_code == 200: - result = page.json() for i in comment: - comment_exist = False - for j in result: - if i["author"] == j["author_name"] and i["date"] == j["date"]: - comment_exist = True - id_comment = j["id"] - data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"]} - if comment_exist is True: + + try: + params = {"post": post, "author_name":i["author"], "date":i["date"]} + page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params) + except Exception as err: + self._logger.error("Connection error : {0}".format(err)) + exit(1) + if page.status_code == 200: + result = page.json() + for j in result: + try: + params = {"force":1} + page = self._request.delete("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"]), params=params, auth=self._basic) + except Exception as err: + self._logger.error("Connection error : {0}".format(err)) + exit(1) + if page.status_code == 200: + self._logger.info("Comment deleted for {0}".format(title)) + self._logger.debug("Comment deleted : {0}".format(j)) + else: + self._logger.error("Comment not deleted for {0} due status code : {1}".format(title, page.status_code)) + self._logger.debug(page.content) + + else: + self._logger.error("Comment not listed for {0} due status code : {1}".format(title, page.status_code)) + self._logger.debug(page.content) + + for i in comment: + data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"], "status": "approved"} + + if i["parent_id"] != -1: + parent_id = int(i["parent_id"]) + params = {"post": post, "author_name":comment[parent_id]["author"], "date":comment[parent_id]["date"]} try: - page = page = self._request.post("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, id_comment), auth=self._basic, data=data) + page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params) except Exception as err: self._logger.error("Connection error : {0}".format(err)) exit(1) if page.status_code == 200: - self._logger.info("Commentaire mise à jour pour {0}".format(title)) + result = page.json() + if len(result) > 0: + data["parent"]=result[0]["id"] + else: + self._logger.error("Connection error for parent comment with status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + + try: + page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data) + except Exception as err: + self._logger.error("Connection error : {0}".format(err)) + exit(1) + if page.status_code == 201: + self._logger.info("Comment added for {0}".format(title)) + self._logger.debug("Data : {0}".format(data)) else: - try: - page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data) - except Exception as err: - self._logger.error("Connection error : {0}".format(err)) - exit(1) - if page.status_code == 201: - self._logger.info("Commentaire ajoute pour {0}".format(title)) + self._logger.error("Comment not added for {0} due status code : {1}".format(title, page.status_code)) + self._logger.debug(page.content) + ## Check class name + + def _hasClassName(self, tag, className): + for i in tag["class"]: + if i == className: + return True + return False + + ## Get class name + + def _getClassName(self, tag, className): + for i in tag["class"]: + if re.match(className, i): + return i + return "" + + ## Get all comments + + def _getComment(self, comment): + comment_post = [] + for i in range(0, len(comment)): + comment_div = comment[i].find("div", class_="comment_item") + comment_item = comment_div.text.split("\n") + footer = comment_div.find_all("div", class_="itemfooter") + comment_author = footer[0].text.split(",")[0].replace("Posté par ", "") + comment_date = footer[0].find_all("abbr")[0].get("title") + comment_content = "

" + for j in range(0, len(comment_item)-2): + if len(comment_item[j]) > 0: + comment_content = comment_content + comment_item[j] + "
" + comment_content = comment_content + "

" + parent = -1 + if self._hasClassName(comment[i], "level-1") is False: + block = False + className = self._getClassName(comment[i], "level-").split("-") + level = 1 + if len(className) > 0: + level = int(className[1]) + for j in range(i-1, 0, -1): + if block is False: + levelName = "level-{0}".format(level - 1) + if self._hasClassName(comment[j], levelName) is True: + parent = j + block = True + + comment_post.append({"author": comment_author, "date": comment_date, "content": comment_content, "parent_id":parent}) + return comment_post ## Add or Update post def _addOrUpdatePost(self, soup): @@ -234,15 +342,8 @@ class WPimport: listelement = {} for i in liste: - try: - page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress,i)) - except Exception as err: - self._logger.error("Connection error : {0}".format(err)) - exit(1) - if page.status_code == 200: - elements[i] = page.json() - element[i] = [] - listelement[i] = [] + element[i] = [] + listelement[i] = [] articletitle = soup.find_all("h2", class_="articletitle") articlebody = soup.find_all("div", class_="articlebody") @@ -250,7 +351,7 @@ class WPimport: articleacreator = soup.find_all("span", class_="articlecreator") dateheader = soup.find_all("div", class_="dateheader") itemfooter = soup.find_all("div", class_="itemfooter") - comment = soup.find_all("div", class_="comment_item") + comment = soup.find_all("li", class_="comment") img_a = articlebody[0].find_all("a", {"target": "_blank"}) list_img = [] for i in img_a: @@ -273,6 +374,7 @@ class WPimport: except Exception as err: self._logger.error("Connection error : {0}".format(err)) exit(1) + if page_img.status_code == 200: media=self._addOrUpdateMedia(href_img, page_img) new_img["id"]=media["id"] @@ -283,19 +385,13 @@ class WPimport: new_img["id"]=media["id"] new_img["new_src"]=media["rendered"] list_img.append(new_img) + if page_img.status_code not in [200, 404]: + self._logger.error("Connection error with status code : {0}".format(page_img.status_code)) + self._logger.debug(page_img.content) + - comment_post = [] - for i in comment: - comment_item = i.text.split("\n") - footer = i.find_all("div", class_="itemfooter") - comment_author = footer[0].text.split(",")[0].replace("Posté par ", "") - comment_date = footer[0].find_all("abbr")[0].get("title") - comment_content = "

" - for j in range(0, len(comment_item)-2): - if len(comment_item[j]) > 0: - comment_content = comment_content + comment_item[j] + "
" - comment_content = comment_content + "

" - comment_post.append({"author": comment_author, "date": comment_date, "content": comment_content}) + comment_post = self._getComment(comment) + a = itemfooter[0].find_all("a", {"rel": True}) for i in a: rel = i.get("rel") @@ -308,20 +404,38 @@ class WPimport: for i in liste: for j in element[i]: element_exist = False - for k in elements[i]: - if k["name"] == j: - element_exist = True - listelement[i].append(k["id"]) + try: + params = {"params":j} + page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, params=params) + except Exception as err: + self._logger.error("Connection error : {0}".format(err)) + exit(1) + if page.status_code == 200: + element_exist = True + result = page.json() + listelement[i].append(result[0]["id"]) + + else: + self._logger.error("{0} not found due status code : {1}".format(i, page.status_code)) + self._logger.debug(page.content) + + if element_exist is False: data = {"name": j} + self._logger.debug("URL : {0} ".format("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i))) + self._logger.debug("data : {0}".format(data)) + self._logger.debug("headers : {0}".format(self._headers_form)) try: - page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, data=data) + page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, headers=self._headers_json, data=data) except Exception as err: self._logger.error("Connection error : {0}".format(err)) exit(1) if page.status_code == 201: result = page.json() listelement[i].append(result["id"]) + else: + self._logger.error("{0} not added due status code : {1}".format(i, page.status_code)) + self._logger.debug(page.content) title = articletitle[0].text author = articleacreator[0].text.lower() @@ -349,6 +463,10 @@ class WPimport: if page.status_code == 200: result = page.json() data["author"] = result[0]["id"] + else: + self._logger.error("Connection error with status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + params = {"search":title} try: @@ -375,6 +493,14 @@ class WPimport: self._logger.info("Article mis à jour : {0}".format(result["title"]["raw"])) self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) self._linkImgPost(result["title"]["raw"], list_img, result["id"]) + else: + self._logger.error("Post not updated due status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + + else: + self._logger.error("Connection for update post error with status code : {0}".format(page.status_code)) + self._logger.debug(page.content) + if page_exist == False: try: @@ -386,4 +512,7 @@ class WPimport: result = page.json() self._logger.info("Article ajoute : {0}".format(result["title"]["raw"])) self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) - self._linkImgPost(result["title"]["raw"], list_img, result["id"]) \ No newline at end of file + self._linkImgPost(result["title"]["raw"], list_img, result["id"]) + else: + self._logger.error("Post not added due status code : {0}".format(r.status_code)) + self._logger.debug(r.content)