Merge pull request 'parent-comment' (#8) from parent-comment into master

Reviewed-on: #8
This commit is contained in:
v4l3n71n 2023-04-20 19:30:45 +00:00
commit 5a5658d955
2 changed files with 207 additions and 59 deletions

View File

@ -69,6 +69,10 @@ class WPExport:
href = anchor.get('href', '/') href = anchor.get('href', '/')
if href != "#": if href != "#":
page_url.append(href) page_url.append(href)
else:
self._logger.error("Url did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
webpage = [] webpage = []
for i in page_url: for i in page_url:
@ -113,6 +117,10 @@ class WPExport:
self._logger.error("parsing error : {0}".format(err)) self._logger.error("parsing error : {0}".format(err))
exit(1) exit(1)
webpage.append(o) webpage.append(o)
else:
self._logger.error("web didn't get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
return webpage return webpage
@ -179,6 +187,10 @@ class WPExport:
o = o._replace(netloc=u.netloc) o = o._replace(netloc=u.netloc)
o = o._replace(scheme=u.scheme) o = o._replace(scheme=u.scheme)
page_url.append(o.geturl()) page_url.append(o.geturl())
else:
self._logger.error("JS or CSS did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
return page_url return page_url
# Get image # Get image
@ -201,6 +213,10 @@ class WPExport:
if src not in page_img: if src not in page_img:
self._logger.info("image: {0} : ".format(src)) self._logger.info("image: {0} : ".format(src))
page_img.append(src) page_img.append(src)
else:
self._logger.error("Image did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
return page_img return page_img
@ -232,4 +248,7 @@ class WPExport:
open(fileDownload, "wb").write(r.content) open(fileDownload, "wb").write(r.content)
except Exception as err: except Exception as err:
self._logger.error("file error : {0}".format(err)) self._logger.error("file error : {0}".format(err))
exit(1) exit(1)
else:
self._logger.error("Not download due status code : {0}".format(r.status_code))
self._logger.debug(r.content)

View File

@ -13,6 +13,7 @@ class WPimport:
self._wordpress = wordpress self._wordpress = wordpress
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'}
self._request = requests.Session() self._request = requests.Session()
@ -41,6 +42,9 @@ class WPimport:
self._addOrUpdatePost(soup) self._addOrUpdatePost(soup)
else: else:
self._addOrUpdateFeaturedMedia(soup) self._addOrUpdateFeaturedMedia(soup)
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
def fromDirectory(self, directory): def fromDirectory(self, directory):
@ -122,18 +126,33 @@ class WPimport:
res = page.json() res = page.json()
if len(res) > 0: if len(res) > 0:
id_media = res[0]["id"] id_media = res[0]["id"]
headers = {'Content-Type': 'application/json', 'Accept':'application/json'}
data = {"featured_media": id_media} data = {"featured_media": id_media}
try: try:
r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=headers, data=json.dumps(data)) r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
if r.status_code == 200: if r.status_code == 200:
self._logger.info("Ajout media featured : {0}".format(r.json()["title"]["raw"])) self._logger.info("Ajout media featured : {0}".format(r.json()["title"]["raw"]))
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
else: else:
self._logger.info("Aucun media trouvé pour {0}".format(h2)) self._logger.info("Aucun media trouvé pour {0}".format(h2))
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
## Association image to post ## Association image to post
def _linkImgPost(self, title, list_img, post_id): def _linkImgPost(self, title, list_img, post_id):
@ -146,6 +165,10 @@ class WPimport:
exit(1) exit(1)
if r.status_code == 200: if r.status_code == 200:
self._logger.info("Association d'une image à l'article {0}".format(title)) self._logger.info("Association d'une image à l'article {0}".format(title))
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
## Add or update img ## Add or update img
@ -169,7 +192,11 @@ class WPimport:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
if r.status_code == 200: if r.status_code == 200:
self._logger.info("Image supprimé {0}".format(img_name)) self._logger.info("Image removed {0}".format(img_name))
else:
self._logger.error("Image not removed due status code : {0}".format(r.status_code))
self._logger.debug(r.content)
data = page.content data = page.content
img_type = "image/png" img_type = "image/png"
if img_name.split(".")[1] == "jpg" or img_name.split(".")[1] == "jpeg": if img_name.split(".")[1] == "jpg" or img_name.split(".")[1] == "jpeg":
@ -181,48 +208,129 @@ class WPimport:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
if r.status_code == 201: if r.status_code == 201:
self._logger.info("Ajout d'image {0}".format(img_name)) self._logger.info("Image added {0}".format(img_name))
res = r.json() res = r.json()
media["id"] = res["id"] media["id"] = res["id"]
media["rendered"] = res["guid"]["rendered"] media["rendered"] = res["guid"]["rendered"]
else:
self._logger.error("Image not added due status code : {0}".format(r.status_code))
self._logger.debug(r.content)
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
return media return media
## Add or update comment ## Add or update comment
def _addOrUpdateComment(self, post, comment, title): def _addOrUpdateComment(self, post, comment, title):
params = {"post": post}
block = True
try:
page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
if page.status_code == 200:
result = page.json()
for i in comment: for i in comment:
comment_exist = False
for j in result: try:
if i["author"] == j["author_name"] and i["date"] == j["date"]: params = {"post": post, "author_name":i["author"], "date":i["date"]}
comment_exist = True page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params)
id_comment = j["id"] except Exception as err:
data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"]} self._logger.error("Connection error : {0}".format(err))
if comment_exist is True: exit(1)
if page.status_code == 200:
result = page.json()
for j in result:
try:
params = {"force":1}
page = self._request.delete("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"]), params=params, auth=self._basic)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
if page.status_code == 200:
self._logger.info("Comment deleted for {0}".format(title))
self._logger.debug("Comment deleted : {0}".format(j))
else:
self._logger.error("Comment not deleted for {0} due status code : {1}".format(title, page.status_code))
self._logger.debug(page.content)
else:
self._logger.error("Comment not listed for {0} due status code : {1}".format(title, page.status_code))
self._logger.debug(page.content)
for i in comment:
data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"], "status": "approved"}
if i["parent_id"] != -1:
parent_id = int(i["parent_id"])
params = {"post": post, "author_name":comment[parent_id]["author"], "date":comment[parent_id]["date"]}
try: try:
page = page = self._request.post("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, id_comment), auth=self._basic, data=data) page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
if page.status_code == 200: if page.status_code == 200:
self._logger.info("Commentaire mise à jour pour {0}".format(title)) result = page.json()
if len(result) > 0:
data["parent"]=result[0]["id"]
else:
self._logger.error("Connection error for parent comment with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
try:
page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
if page.status_code == 201:
self._logger.info("Comment added for {0}".format(title))
self._logger.debug("Data : {0}".format(data))
else: else:
try: self._logger.error("Comment not added for {0} due status code : {1}".format(title, page.status_code))
page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data) self._logger.debug(page.content)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
if page.status_code == 201:
self._logger.info("Commentaire ajoute pour {0}".format(title))
## Check class name
def _hasClassName(self, tag, className):
for i in tag["class"]:
if i == className:
return True
return False
## Get class name
def _getClassName(self, tag, className):
for i in tag["class"]:
if re.match(className, i):
return i
return ""
## Get all comments
def _getComment(self, comment):
comment_post = []
for i in range(0, len(comment)):
comment_div = comment[i].find("div", class_="comment_item")
comment_item = comment_div.text.split("\n")
footer = comment_div.find_all("div", class_="itemfooter")
comment_author = footer[0].text.split(",")[0].replace("Posté par ", "")
comment_date = footer[0].find_all("abbr")[0].get("title")
comment_content = "<p>"
for j in range(0, len(comment_item)-2):
if len(comment_item[j]) > 0:
comment_content = comment_content + comment_item[j] + "<br />"
comment_content = comment_content + "</p>"
parent = -1
if self._hasClassName(comment[i], "level-1") is False:
block = False
className = self._getClassName(comment[i], "level-").split("-")
level = 1
if len(className) > 0:
level = int(className[1])
for j in range(i-1, 0, -1):
if block is False:
levelName = "level-{0}".format(level - 1)
if self._hasClassName(comment[j], levelName) is True:
parent = j
block = True
comment_post.append({"author": comment_author, "date": comment_date, "content": comment_content, "parent_id":parent})
return comment_post
## Add or Update post ## Add or Update post
def _addOrUpdatePost(self, soup): def _addOrUpdatePost(self, soup):
@ -234,15 +342,8 @@ class WPimport:
listelement = {} listelement = {}
for i in liste: for i in liste:
try: element[i] = []
page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress,i)) listelement[i] = []
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
if page.status_code == 200:
elements[i] = page.json()
element[i] = []
listelement[i] = []
articletitle = soup.find_all("h2", class_="articletitle") articletitle = soup.find_all("h2", class_="articletitle")
articlebody = soup.find_all("div", class_="articlebody") articlebody = soup.find_all("div", class_="articlebody")
@ -250,7 +351,7 @@ class WPimport:
articleacreator = soup.find_all("span", class_="articlecreator") articleacreator = soup.find_all("span", class_="articlecreator")
dateheader = soup.find_all("div", class_="dateheader") dateheader = soup.find_all("div", class_="dateheader")
itemfooter = soup.find_all("div", class_="itemfooter") itemfooter = soup.find_all("div", class_="itemfooter")
comment = soup.find_all("div", class_="comment_item") comment = soup.find_all("li", class_="comment")
img_a = articlebody[0].find_all("a", {"target": "_blank"}) img_a = articlebody[0].find_all("a", {"target": "_blank"})
list_img = [] list_img = []
for i in img_a: for i in img_a:
@ -273,6 +374,7 @@ class WPimport:
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
if page_img.status_code == 200: if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img) media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"] new_img["id"]=media["id"]
@ -283,19 +385,13 @@ class WPimport:
new_img["id"]=media["id"] new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"] new_img["new_src"]=media["rendered"]
list_img.append(new_img) list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("Connection error with status code : {0}".format(page_img.status_code))
self._logger.debug(page_img.content)
comment_post = [] comment_post = self._getComment(comment)
for i in comment:
comment_item = i.text.split("\n")
footer = i.find_all("div", class_="itemfooter")
comment_author = footer[0].text.split(",")[0].replace("Posté par ", "")
comment_date = footer[0].find_all("abbr")[0].get("title")
comment_content = "<p>"
for j in range(0, len(comment_item)-2):
if len(comment_item[j]) > 0:
comment_content = comment_content + comment_item[j] + "<br />"
comment_content = comment_content + "</p>"
comment_post.append({"author": comment_author, "date": comment_date, "content": comment_content})
a = itemfooter[0].find_all("a", {"rel": True}) a = itemfooter[0].find_all("a", {"rel": True})
for i in a: for i in a:
rel = i.get("rel") rel = i.get("rel")
@ -308,20 +404,38 @@ class WPimport:
for i in liste: for i in liste:
for j in element[i]: for j in element[i]:
element_exist = False element_exist = False
for k in elements[i]: try:
if k["name"] == j: params = {"params":j}
element_exist = True page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, params=params)
listelement[i].append(k["id"]) except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
if page.status_code == 200:
element_exist = True
result = page.json()
listelement[i].append(result[0]["id"])
else:
self._logger.error("{0} not found due status code : {1}".format(i, page.status_code))
self._logger.debug(page.content)
if element_exist is False: if element_exist is False:
data = {"name": j} data = {"name": j}
self._logger.debug("URL : {0} ".format("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i)))
self._logger.debug("data : {0}".format(data))
self._logger.debug("headers : {0}".format(self._headers_form))
try: try:
page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, data=data) page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, headers=self._headers_json, data=data)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
if page.status_code == 201: if page.status_code == 201:
result = page.json() result = page.json()
listelement[i].append(result["id"]) listelement[i].append(result["id"])
else:
self._logger.error("{0} not added due status code : {1}".format(i, page.status_code))
self._logger.debug(page.content)
title = articletitle[0].text title = articletitle[0].text
author = articleacreator[0].text.lower() author = articleacreator[0].text.lower()
@ -349,6 +463,10 @@ class WPimport:
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
data["author"] = result[0]["id"] data["author"] = result[0]["id"]
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
params = {"search":title} params = {"search":title}
try: try:
@ -375,6 +493,14 @@ class WPimport:
self._logger.info("Article mis à jour : {0}".format(result["title"]["raw"])) self._logger.info("Article mis à jour : {0}".format(result["title"]["raw"]))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"]) self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("Post not updated due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
else:
self._logger.error("Connection for update post error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
if page_exist == False: if page_exist == False:
try: try:
@ -386,4 +512,7 @@ class WPimport:
result = page.json() result = page.json()
self._logger.info("Article ajoute : {0}".format(result["title"]["raw"])) self._logger.info("Article ajoute : {0}".format(result["title"]["raw"]))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"]) self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("Post not added due status code : {0}".format(r.status_code))
self._logger.debug(r.content)