diff --git a/import_export_canalblog.py b/import_export_canalblog.py index 418df05..a5d60ed 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -5,7 +5,7 @@ from urllib.parse import urlparse from concurrent import futures from concurrent.futures import as_completed, wait, ALL_COMPLETED -import argparse, logging, threading +import argparse, logging, threading, os, glob from lib.WPImport import WPimport from lib.WPExport import WPExport from lib.WPRemove import WPRemove @@ -40,9 +40,10 @@ def remove(index, number, args, basic, logger, ssl_wordpress): del removeWp -def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog): +def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp): exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog) - webpage = exportWp.getUrlPage(name_thread, max_thread) + if not revert: + webpage = exportWp.getUrlPage(name_thread, max_thread) for i in ["article", "page"]: for j in ["publications", "principal"]: if html is False: @@ -54,7 +55,7 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img, -def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image): +def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp): canalblog = canalblog.split(",") wordpress = wordpress.split(",") name = "Thread-{0}".format(int(name_thread) + 1) @@ -71,7 +72,8 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas logger.error("{0} : parsing error : {1}".format(name, err)) exit(1) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog) - webpage = exportWp.getUrlPage(name_thread, max_thread) + if not revert: + webpage = exportWp.getUrlPage(name_thread, max_thread) del exportWp for j in wordpress: importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) @@ -93,7 +95,8 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas logger.error("parsing error : {0}".format(err)) exit(1) exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog) - webpage = exportWp.getUrlPage(name_thread, max_thread) + if not revert: + webpage = exportWp.getUrlPage(name_thread, max_thread) del exportWp importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) @@ -127,6 +130,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres if __name__ == '__main__': + TMP = "/tmp/import_export_canablog" parser = argparse.ArgumentParser() parser.add_argument("--debug", help="Verbosity", action="store_true") parser.add_argument("--logfile", help="Log file", default="") @@ -134,6 +138,7 @@ if __name__ == '__main__': parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1) parser.add_argument("--no-ssl", help="No ssl for canalblog and/or wordpress (example wordpress,canalblog)", dest="ssl", default="") + parser.add_argument("--revert", help="Restart a work from stopping work", action="store_true") subparsers = parser.add_subparsers(dest="command") @@ -188,7 +193,9 @@ if __name__ == '__main__': help="File") - + if not os.path.exists(TMP): + os.mkdir(TMP) + args = parser.parse_args() logger = logging.getLogger('import export canalblog') @@ -256,7 +263,7 @@ if __name__ == '__main__': wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait(wait_for, return_when=ALL_COMPLETED) wait_for = [ - ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image) + ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, TMP) for i in range(0, int(args.parallel)) ] @@ -294,7 +301,7 @@ if __name__ == '__main__': try: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: wait_for = [ - ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog) + ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog, args.revert, TMP) for i in range(0, int(args.parallel)) ] except Exception as err: diff --git a/lib/WPExport.py b/lib/WPExport.py index f0775e7..5577ddb 100644 --- a/lib/WPExport.py +++ b/lib/WPExport.py @@ -6,7 +6,7 @@ from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry class WPExport: - def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True): + def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True, tmp="/tmp/import_export_canablog"): self._url = url self._logger = logger self._parser = parser @@ -22,7 +22,7 @@ class WPExport: status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries)) - + self._tmp = tmp # Destructor def __del__(self): diff --git a/lib/WPImport.py b/lib/WPImport.py index 42d6596..bc82824 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry class WPimport: # Constructor - def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False): + def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog"): self._name = name self._basic = basic self._wordpress = wordpress @@ -28,6 +28,7 @@ class WPimport: self._no_create = no_create self._no_update = no_update self._no_image = no_image + self._tmp = tmp # Destructor def __del__(self):