From e48b262d7e4b2aaf1ebffbe36d081eaf65afaddd Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 3 Jun 2023 09:07:33 +0200 Subject: [PATCH] add parameter no-image --- import_export_canalblog.py | 22 ++++++----- lib/WPImport.py | 80 +++++++++++++++++++------------------- 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/import_export_canalblog.py b/import_export_canalblog.py index bc428ba..c14d9f7 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -47,7 +47,7 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img, -def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update): +def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image): canalblog = canalblog.split(",") wordpress = wordpress.split(",") name = "Thread-{0}".format(int(name_thread) + 1) @@ -67,7 +67,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas webpage = exportWp.getUrlPage(name_thread, max_thread) del exportWp for j in wordpress: - importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update) + importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) for k in ["article", "page"]: for l in ["publications", "principal"]: importWp.fromUrl(webpage[l][k]) @@ -88,7 +88,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog) webpage = exportWp.getUrlPage(name_thread, max_thread) del exportWp - importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update) + importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) for k in ["article", "page"]: for l in ["publications", "principal"]: @@ -97,13 +97,13 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas del importWp -def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update): +def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image): name = "Thread-{0}".format(int(name_thread) + 1) directory = directory.split(",") wordpress = wordpress.split(",") if serial is False: for i in wordpress: - importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update) + importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) for j in directory: importWp.fromDirectory(j, name_thread, max_thread) del importWp @@ -113,7 +113,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres logger.error("{0} : Error : Number directory is differant than wordpress".format(name)) exit(1) for i in range(0, len(wordpress)-1): - importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update) + importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) importWp.fromDirectory(directory[i]) del importWp @@ -143,8 +143,10 @@ if __name__ == '__main__': import_parser.add_argument("--remove-categories", help="Remove all categories", dest="categories", action="store_true") import_parser.add_argument("--remove-tags", help="Remove all tags", dest="tags", action="store_true") import_parser.add_argument("--remove-media", help="Remove all media", dest="media", action="store_true") - import_parser.add_argument("--no-create", help="No create post", dest="create", action="store_true") - import_parser.add_argument("--no-update", help="No update post", dest="update", action="store_true") + import_parser.add_argument("--no-create", help="No create post", dest="create", default="store_false", action="store_true") + import_parser.add_argument("--no-update", help="No update post", dest="update", default="store_false", action="store_true") + import_parser.add_argument("--no-image", help="No image add or update", dest="image", default="store_false", action="store_true") + remove_parser = subparsers.add_parser("remove") @@ -228,7 +230,7 @@ if __name__ == '__main__': wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait(wait_for, return_when=ALL_COMPLETED) wait_for = [ - ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, create, update) + ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image) for i in range(0, int(args.parallel)) ] except Exception as err: @@ -239,7 +241,7 @@ if __name__ == '__main__': wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait(wait_for, return_when=ALL_COMPLETED) wait_for = [ - ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, create, update) + ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image) for i in range(0, int(args.parallel)) ] diff --git a/lib/WPImport.py b/lib/WPImport.py index 040f9a7..4bf897d 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry class WPimport: # Constructor - def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False): + def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False): self._name = name self._basic = basic self._wordpress = wordpress @@ -26,6 +26,7 @@ class WPimport: self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries)) self._no_create = no_create self._no_update = no_update + self._no_image = no_image # Destructor def __del__(self): @@ -436,48 +437,49 @@ class WPimport: img_a = articlebody[0].find_all("a", {"target": "_blank"}) self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a))) list_img = [] - for i in img_a: - new_img = {} - img = i.find_all("img") - self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) - if len(img) > 0: - href_a = i.get("href") - href_img = img[0].get("src") - new_img["old_src"]=href_img - new_img["old_href"]=href_a - try: - page_img = self._request.get(href_img) + if self._no_image is False: + for i in img_a: + new_img = {} + img = i.find_all("img") + self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) + if len(img) > 0: + href_a = i.get("href") + href_img = img[0].get("src") + new_img["old_src"]=href_img + new_img["old_href"]=href_a + try: + page_img = self._request.get(href_img) - if page_img.status_code == 404: - href_img = href_a - try: - page_img = self._request.get(href_a) - except ConnectionError as err: - self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) - exit(1) - except Exception as err: - self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) - exit(1) - self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) - if page_img.status_code == 200: - media=self._addOrUpdateMedia(href_img, page_img) - new_img["id"]=media["id"] - new_img["new_src"]=media["rendered"] - list_img.append(new_img) - if href_img != href_a: - media=self._addOrUpdateMedia(href_a, page_img) + if page_img.status_code == 404: + href_img = href_a + try: + page_img = self._request.get(href_a) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) + self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) + if page_img.status_code == 200: + media=self._addOrUpdateMedia(href_img, page_img) new_img["id"]=media["id"] new_img["new_src"]=media["rendered"] list_img.append(new_img) - if page_img.status_code not in [200, 404]: - self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) - self._logger.debug("{0} : {1}".format(self._name, page_img.content)) - except ConnectionError as err: - self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) - exit(1) - except Exception as err: - self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) - exit(1) + if href_img != href_a: + media=self._addOrUpdateMedia(href_a, page_img) + new_img["id"]=media["id"] + new_img["new_src"]=media["rendered"] + list_img.append(new_img) + if page_img.status_code not in [200, 404]: + self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page_img.content)) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img))) comment_post = self._getComment(comment)