2 Commits

Author SHA1 Message Date
41e192f903 fix bug import 2023-09-11 22:40:41 +02:00
eaec1ba9d4 fix webpage 2023-09-05 22:06:14 +02:00
3 changed files with 25 additions and 10 deletions

View File

@@ -55,7 +55,7 @@ def remove(index, number, args, basic, logger, ssl_wordpress):
def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp): def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp):
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog)
if not revert: if revert is False:
exportWp.getUrlPage(name_thread, max_thread) exportWp.getUrlPage(name_thread, max_thread)
for i in ["article", "page"]: for i in ["article", "page"]:
for j in ["publications", "principal"]: for j in ["publications", "principal"]:

View File

@@ -58,14 +58,26 @@ class WPExport:
# Download HTML # Download HTML
def downloadHTML(self, first, second): def downloadHTML(self, first, second):
self._downloadPage(webpage[first][second], self._dir) try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage = json.loads(file.read())
self._downloadPage(webpage[first][second], self._dir)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
# Download Image # Download Image
def downloadImg(self, first, second): def downloadImg(self, first, second):
page_src = self._getImg(webpage[first][second]) try:
o = urlparse(self._url) with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img")) webpage = json.loads(file.read())
page_src = self._getImg(webpage[first][second])
o = urlparse(self._url)
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img"))
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
# Get URL # Get URL

View File

@@ -302,7 +302,7 @@ class WPimport:
exit(1) exit(1)
except Exception as err: except Exception as err:
self._logger.error("{0} : Exception error for update page : {1}".format(self._name, err)) self._logger.error("{0} : Exception error for update page : {1}".format(self._name, err))
if page.status_code == 400: elif page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
break break
@@ -817,7 +817,7 @@ class WPimport:
listelement[i].append(k["id"]) listelement[i].append(k["id"])
else: else:
break break
if page.status_code == 400: elif page.status_code == 400:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code)) self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
break break
@@ -867,12 +867,15 @@ class WPimport:
bodyhtml = bodyhtml.replace(i["old_href"], o.path) bodyhtml = bodyhtml.replace(i["old_href"], o.path)
bodyhtml = bodyhtml.replace(i["old_src"], o.path) bodyhtml = bodyhtml.replace(i["old_src"], o.path)
hour = articledate[0].text hour = articledate[0].text
time = dateheader[0].text.split(" ") time = ""
if len(dateheader) > 0:
time = dateheader[0].text.split(" ")
self._logger.debug("{0} : Title post : |{1}|".format(self._name, title)) self._logger.debug("{0} : Title post : |{1}|".format(self._name, title))
title = self._removeSpace(title) title = self._removeSpace(title)
self._logger.debug("{0} : Rendered Title post : |{1}|".format(self._name, title)) self._logger.debug("{0} : Rendered Title post : |{1}|".format(self._name, title))
data = {"title":title, "content":bodyhtml, "status":"publish", "tags": listelement["tags"], "categories": listelement["categories"]}
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} if len(dateheader) > 0:
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data)) self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data))
data["author"] = self._getAuthor(author) data["author"] = self._getAuthor(author)