#!/usr/bin/python3 from requests.auth import HTTPBasicAuth from getpass import getpass from urllib.parse import urlparse from concurrent import futures import argparse, logging, threading from lib.WPImport import WPimport from lib.WPExport import WPExport def download(name_thread, max_thread, url, logger, parser, directory, html, img): exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory) webpage = exportWp.getUrlPage(name_thread, max_thread) if html is False: exportWp.downloadHTML(webpage) if args.img is False: exportWp.downloadImg(webpage) def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial): canalblog = canalblog.split(",") wordpress = wordpress.split(",") name = "Thread-{0}".format(int(name_thread) + 1) if serial is False: for canal in canalblog: try: o = urlparse(canal) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("{0} : parsing error : {1}".format(name, err)) exit(1) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser) webpage = exportWp.getUrlPage(name_thread, max_thread) for j in wordpress: importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser) importWp.fromUrl(webpage) else: if len(canalblog) != len(wordpress): logger.error("{0} : ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress".format(name)) exit(1) for i in range(0, len(canalblog)-1): try: o = urlparse(canalblog[i]) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp = WPExport(name=name, url=url, logger=logger, parser=parser) webpage = exportWp.getUrlPage(name_thread, max_thread) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser) importWp.fromUrl(webpage) def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial): name = "Thread-{0}".format(int(name_thread) + 1) directory = args.directory.split(",") if serial is False: for i in wordpress: importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser) for j in directory: importWp.fromDirectory(j, name_thread, max_thread) else: if len(directory) != len(wordpress): logger.error("{0} : Error : Number directory is differant than wordpress".format(name)) exit(1) for i in range(0, len(wordpress)-1): importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser) importWp.fromDirectory(directory[i]) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--debug", help="Verbosity", action="store_true") parser.add_argument("--logfile", help="Log file", default="") parser.add_argument("--quiet", help="No console output", action="store_true") parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1) subparsers = parser.add_subparsers(dest="command") import_parser = subparsers.add_parser("import") import_parser.add_argument("--user", help="wordpress user", required=True) import_parser.add_argument("--file", help="HTML file", default="") import_parser.add_argument("--directory", help="HTML directory", default="") import_parser.add_argument("--canalblog", help="URL Canalblog", default="") import_parser.add_argument("--wordpress", help="URL Wordpress", required=True) import_parser.add_argument("--serial", help="Serial execution", action="store_true") export_parser = subparsers.add_parser("export") export_parser.add_argument("--url", help="canblog URL to be scraping", required=True) export_parser.add_argument("--directory", default="backup", help="backup file path") export_parser.add_argument("--no-css", help="No CSS", dest="css", action="store_true") export_parser.add_argument("--no-js", help="No JS", dest="js", action="store_true") export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true") export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true") args = parser.parse_args() logger = logging.getLogger('import export canalblog') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.quiet is False: ch = logging.StreamHandler() if args.debug is True: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(ch) if len(args.logfile) > 0: fileHandler = logging.FileHandler(args.logfile) if args.debug is True: fileHandler.setLevel(logging.DEBUG) else: fileHandler.setLevel(logging.INFO) fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) if args.command == "import": password = getpass() if len(password) == 0: logger.error("No password error !!! ") exit(1) basic = HTTPBasicAuth(args.user, password) wordpress = args.wordpress.split(",") importWp = WPimport(basic, "", logger, args.parser) if len(args.file) > 0: for i in wordpress: importWp.setUrl(i) importWp.fromFile(files=args.file.split(",")) exit(0) if len(args.directory) > 0: try: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: wait_for = [ ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial) for i in range(0, int(args.parallel)) ] except Exception as err: logger.error("Threading error : {0}".format(err)) exit(0) if len(args.canalblog) > 0: try: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: wait_for = [ ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial) for i in range(0, int(args.parallel)) ] except Exception as err: logger.error("Threading error : {0}".format(err)) if args.command == "export": canalblog = args.url.split(",") exportWp = WPExport(logger=logger, parser=args.parser, directory=args.directory) for canal in canalblog: try: o = urlparse(canal) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) if args.js is False: exportWp.downloadJs() if args.css is False: exportWp.downloadCss() if args.html is False or args.img is False: try: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: wait_for = [ ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img) for i in range(0, int(args.parallel)) ] except Exception as err: logger.error("Threading error : {0}".format(err)) exit(0)