#!/usr/bin/python3 from requests.auth import HTTPBasicAuth from getpass import getpass from urllib.parse import urlparse from concurrent import futures import argparse, logging, threading from lib.WPImport import WPimport from lib.WPExport import WPExport def download(name_thread, max_thread, exportWp, html, img): #def download(args): webpage = exportWp.getUrlPage(name_thread, max_thread) if html is False: exportWp.downloadHTML(webpage) if args.img is False: exportWp.downloadImg(webpage) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--debug", help="Verbosity", action="store_true") parser.add_argument("--logfile", help="Log file", default="") parser.add_argument("--quiet", help="No console output", action="store_true") parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1) subparsers = parser.add_subparsers(dest="command") import_parser = subparsers.add_parser("import") import_parser.add_argument("--user", help="wordpress user", required=True) import_parser.add_argument("--file", help="HTML file", default="") import_parser.add_argument("--directory", help="HTML directory", default="") import_parser.add_argument("--canalblog", help="URL Canalblog", default="") import_parser.add_argument("--wordpress", help="URL Wordpress", required=True) import_parser.add_argument("--serial", help="Serial execution", action="store_true") export_parser = subparsers.add_parser("export") export_parser.add_argument("--url", help="canblog URL to be scraping", required=True) export_parser.add_argument("--directory", default="backup", help="backup file path") export_parser.add_argument("--no-css", help="No CSS", dest="css", action="store_true") export_parser.add_argument("--no-js", help="No JS", dest="js", action="store_true") export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true") export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true") args = parser.parse_args() logger = logging.getLogger('import export canalblog') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.quiet is False: ch = logging.StreamHandler() if args.debug is True: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(ch) if len(args.logfile) > 0: fileHandler = logging.FileHandler(args.logfile) if args.debug is True: fileHandler.setLevel(logging.DEBUG) else: fileHandler.setLevel(logging.INFO) fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) if args.command == "import": password = getpass() if len(password) == 0: logger.error("No password error !!! ") exit(1) basic = HTTPBasicAuth(args.user, password) wordpress = args.wordpress.split(",") importWp = WPimport(basic, "", logger, args.parser) if len(args.file) > 0: for i in wordpress: importWp.setUrl(i) importWp.fromFile(args.file.split(",")) exit(0) if len(args.directory) > 0: directory = args.directory.split(",") if args.serial is False: for i in wordpress: importWp.setUrl(i) for j in directory: importWp.fromDirectory(j) else: if len(directory) != len(wordpress): logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress") exit(1) for i in range(0, len(wordpress)-1): importWp.setUrl(wordpress[i]) importWp.fromDirectory(directory[i]) exit(0) if len(args.canalblog) > 0: exportWp = WPExport("", logger, args.parser, args.directory) canalblog = args.canalblog.split(",") wordpress = args.wordpress.split(",") if args.serial is False: for canal in canalblog: try: o = urlparse(canal) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) webpage = exportWp.getUrlPage() for j in wordpress: importWp.setUrl(j) importWp.fromUrl(webpage) else: if len(canalblog) != len(wordpress): logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress") exit(1) for i in range(0, len(canalblog)-1): try: o = urlparse(canalblog[i]) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) webpage = exportWp.getUrlPage() importWp.setUrl(wordpress[i]) importWp.fromUrl(webpage) if args.command == "export": canalblog = args.url.split(",") exportWp = WPExport("", logger, args.parser, args.directory) for canal in canalblog: try: o = urlparse(canal) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) if args.js is False: exportWp.downloadJs() if args.css is False: exportWp.downloadCss() if args.html is False or args.img is False: ex = futures.ThreadPoolExecutor(max_workers=int(args.parallel)) #args = [int(args.parallel), exportWp, args.html, args.img] #f = ex.map(download, args) #print("wait !!!") wait_for = [ ex.submit(download, i, int(args.parallel), exportWp, args.html, args.img) for i in range(0, int(args.parallel)) ] #for i in range(0, int(args.parallel)): # threading.Thread(name="Thread-{0}".format(i + 1), target=download, args=(i, 3, exportWp,args.html, args.img)).start() exit(0)