#!/usr/bin/python3 from requests.auth import HTTPBasicAuth from getpass import getpass from urllib.parse import urlparse import argparse, logging, threading import multiprocessing from lib.WPImport import WPimport from lib.WPExport import WPExport def download(name_thread, max_thread, exportWp, html, img): exportWp.setName(name_thread) webpage = exportWp.getUrlPage(name_thread, max_thread) if html is False: exportWp.downloadHTML(webpage) if args.img is False: exportWp.downloadImg(webpage) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--debug", help="Verbosity", action="store_true") parser.add_argument("--logfile", help="Log file", default="") parser.add_argument("--quiet", help="No console output", action="store_true") parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1) subparsers = parser.add_subparsers(dest="command") import_parser = subparsers.add_parser("import") import_parser.add_argument("--user", help="wordpress user", required=True) import_parser.add_argument("--file", help="HTML file", default="") import_parser.add_argument("--directory", help="HTML directory", default="") import_parser.add_argument("--canalblog", help="URL Canalblog", default="") import_parser.add_argument("--wordpress", help="URL Wordpress", required=True) import_parser.add_argument("--serial", help="Serial execution", action="store_true") export_parser = subparsers.add_parser("export") export_parser.add_argument("--url", help="canblog URL to be scraping", required=True) export_parser.add_argument("--directory", default="backup", help="backup file path") export_parser.add_argument("--no-css", help="No CSS", dest="css", action="store_true") export_parser.add_argument("--no-js", help="No JS", dest="js", action="store_true") export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true") export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true") args = parser.parse_args() logger = logging.getLogger('import export canalblog') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.quiet is False: ch = logging.StreamHandler() if args.debug is True: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(ch) if len(args.logfile) > 0: fileHandler = logging.FileHandler(args.logfile) if args.debug is True: fileHandler.setLevel(logging.DEBUG) else: fileHandler.setLevel(logging.INFO) fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) if args.command == "import": password = getpass() if len(password) == 0: logger.error("No password error !!! ") exit(1) basic = HTTPBasicAuth(args.user, password) wordpress = args.wordpress.split(",") importWp = WPimport(basic, "", logger, args.parser) if len(args.file) > 0: for i in wordpress: importWp.setUrl(i) importWp.fromFile(args.file.split(",")) exit(0) if len(args.directory) > 0: directory = args.directory.split(",") if args.serial is False: for i in wordpress: importWp.setUrl(i) for j in directory: importWp.fromDirectory(j) else: if len(directory) != len(wordpress): logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress") exit(1) for i in range(0, len(wordpress)-1): importWp.setUrl(wordpress[i]) importWp.fromDirectory(directory[i]) exit(0) if len(args.canalblog) > 0: exportWp = WPExport("", logger, args.parser, args.directory) canalblog = args.canalblog.split(",") wordpress = args.wordpress.split(",") if args.serial is False: for canal in canalblog: try: o = urlparse(canal) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) webpage = exportWp.getUrlPage() for j in wordpress: importWp.setUrl(j) importWp.fromUrl(webpage) else: if len(canalblog) != len(wordpress): logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress") exit(1) for i in range(0, len(canalblog)-1): try: o = urlparse(canalblog[i]) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) webpage = exportWp.getUrlPage() importWp.setUrl(wordpress[i]) importWp.fromUrl(webpage) if args.command == "export": canalblog = args.url.split(",") exportWp = WPExport("", logger, args.parser, args.directory) for canal in canalblog: try: o = urlparse(canal) o = o._replace(scheme="https") url = o.geturl().replace(":///", "://") except Exception as err: logger.error("parsing error : {0}".format(err)) exit(1) exportWp.setUrl(url) if args.js is False: exportWp.downloadJs() if args.css is False: exportWp.downloadCss() if args.html is False or args.img is False: threads = [] for i in range(0, int(args.parallel)): t1 = multiprocessing.Process(name="Process-{0}".format(i + 1), target=download, args=(i, 3, exportWp,args.html, args.img)) threads.append(t1) for thread in threads: thread.start() thread.join() exit(0)