From 418bea37783e51e99208d944541a8af39f748ff7 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Thu, 17 Aug 2023 00:18:45 +0200 Subject: [PATCH] add album test wip --- import_export_canalblog.py | 3 +- lib/WPImport.py | 100 ++++++++++++++----------------------- 2 files changed, 40 insertions(+), 63 deletions(-) diff --git a/import_export_canalblog.py b/import_export_canalblog.py index 4948b94..cb6ea4f 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -263,8 +263,9 @@ if __name__ == '__main__': for i in wordpress: importWp.setUrl(i) importWp.fromFile(files=args.file.split(",")) + exit(0) menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress) - menuWp.fromFile("{0}/index.html".format(args.file.split(",")[0])) + menuWp.fromFile("{0}".format(args.file.split(",")[0])) if len(args.directory) > 0: try: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: diff --git a/lib/WPImport.py b/lib/WPImport.py index 16200e4..97397e1 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -120,73 +120,49 @@ class WPimport: albumtitle = albumbody.find("h2").get_text() self._logger.debug("{0} : Title of the album : {1}".format(self._name, albumtitle)) albumdesc = albumbody.find("div", class_="albumdesc").find("p") - img_a = albumbody.find_all("a", {"target": "_blank"}) - + img_a = albumbody.find_all("img") + list_img = [] if self._no_image is False: + self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img_a))) + for i in img_a: new_img = {} - img = i.find_all("img") - self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) - if len(img) > 0: - href_a = i.get("href") - href_img = img[0].get("src") - href_a_o = urlparse(href_a) - href_img_o = urlparse(href_img) - new_img["old_src"]=href_img - new_img["old_href"]=href_a - try: - if len(href_img_o.netloc) > 0: - img_ok = False - page_img = self._request.get(href_img) - - if page_img.status_code == 404: - href_img = href_a - try: - page_img = self._request.get(href_a) - if page_img.status_code == 200: - img_ok = True - except ConnectionError as err: - self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) - exit(1) - except Exception as err: - self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) - exit(1) - - else: - if os.path.exists("{0}/..{1}".format(self._directory, href_img)): - page_img = open("{0}/..{1}".format(self._directory, href_img), "r") - img_ok = True - else: - if os.path.exists("{0}/..{1}".format(self._directory, href_a)): - page_img = open("{0}/..{1}".format(self._directory, href_a), "r") - img_ok = True - self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) - if img_ok is True: - media=self._addOrUpdateMedia(href_img, page_img) - new_img["id"]=media["id"] - new_img["new_src"]=media["rendered"] - list_img.append(new_img) - if href_img != href_a: - media=self._addOrUpdateMedia(href_a, page_img) - new_img["id"]=media["id"] - new_img["new_src"]=media["rendered"] - list_img.append(new_img) - if page_img.status_code not in [200, 404]: - self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) - self._logger.debug("{0} : {1}".format(self._name, page_img.content)) - except ConnectionError as err: - self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) - exit(1) - except Exception as err: - self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) - exit(1) - content_html = "

{0}

\n\n".format(albumdesc) + href_img = i.get("src") + href_img_o = urlparse(href_img) + new_img["old_src"]=href_img + try: + if len(href_img_o.netloc) > 0: + img_ok = False + page_img = self._request.get(href_img) + if page_img.status_code == 200: + img_ok = True + else: + if os.path.exists("{0}/..{1}".format(self._directory, href_img)): + page_img = open("{0}/..{1}".format(self._directory, href_img), "r") + img_ok = True + self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) + if img_ok is True: + media=self._addOrUpdateMedia(href_img, page_img) + new_img["id"]=media["id"] + new_img["new_src"]=media["rendered"] + list_img.append(new_img) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) + content_html = "" + if len(list_img) > 0: + content_html = "

{0}

\n\n".format(albumdesc) - content_html = content_html + "