From eaec1ba9d4ad5fd4dc5a0f0f0e32f9cf9558b494 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Tue, 5 Sep 2023 22:06:14 +0200 Subject: [PATCH] fix webpage --- import_export_canalblog.py | 2 +- lib/WPExport.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/import_export_canalblog.py b/import_export_canalblog.py index ce2068e..918d5b1 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -55,7 +55,7 @@ def remove(index, number, args, basic, logger, ssl_wordpress): def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp): exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog) - if not revert: + if revert is False: exportWp.getUrlPage(name_thread, max_thread) for i in ["article", "page"]: for j in ["publications", "principal"]: diff --git a/lib/WPExport.py b/lib/WPExport.py index 53dac23..cd9dbdc 100644 --- a/lib/WPExport.py +++ b/lib/WPExport.py @@ -58,14 +58,26 @@ class WPExport: # Download HTML def downloadHTML(self, first, second): - self._downloadPage(webpage[first][second], self._dir) + try: + with open("{0}/{1}.json".format(self._tmp, self._name)) as file: + webpage = json.loads(file.read()) + self._downloadPage(webpage[first][second], self._dir) + except Exception as ex: + self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) # Download Image def downloadImg(self, first, second): - page_src = self._getImg(webpage[first][second]) - o = urlparse(self._url) - self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img")) + try: + with open("{0}/{1}.json".format(self._tmp, self._name)) as file: + webpage = json.loads(file.read()) + page_src = self._getImg(webpage[first][second]) + o = urlparse(self._url) + self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img")) + except Exception as ex: + self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) + + # Get URL