diff --git a/import_export_canalblog.py b/import_export_canalblog.py index c14d9f7..418df05 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -9,6 +9,13 @@ import argparse, logging, threading from lib.WPImport import WPimport from lib.WPExport import WPExport from lib.WPRemove import WPRemove +from lib.WPChange import WPChange + +def change(index, number, args, logger): + changeWp = WPChange(logger=logger, index_name=index, number_thread=number) + changeWp.fromDirectory(args.directory) + + del changeWp def remove(index, number, args, basic, logger, ssl_wordpress): removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress, index_name=index, number_thread=number) @@ -172,7 +179,15 @@ if __name__ == '__main__': export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true") export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true") - + change_parser = subparsers.add_parser("change") + change_parser.add_argument("--directory", + default="", + help="Directory") + change_parser.add_argument("--file", + default="", + help="File") + + args = parser.parse_args() @@ -293,4 +308,17 @@ if __name__ == '__main__': wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] except Exception as err: logger.error("Thread error for remove : {0}".format(err)) + exit(0) + + if args.command == "change": + if len(args.directory) > 0: + try: + with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: + wait_for = [ ex.submit(change, i, args.parallel, args, logger) for i in range(0, int(args.parallel)) ] + except Exception as err: + logger.error("Thread error for remove : {0}".format(err)) + if len(args.file) > 0: + changeWp = WPChange(logger=logger) + for filei in args.file.split(","): + changeWp.fromFile(filei) exit(0) \ No newline at end of file diff --git a/lib/WPChange.py b/lib/WPChange.py new file mode 100644 index 0000000..8d2b626 --- /dev/null +++ b/lib/WPChange.py @@ -0,0 +1,128 @@ +from bs4 import BeautifulSoup +from urllib.parse import urlparse +import requests, os, logging, re, json + +class WPChange: + # Constructor + def __init__(self, index_name=1, number_thread=1, logger=None, parser="html.parser"): + self._name = "Thread-{0}".format(index_name) + self._logger = logger + self._number_thread = number_thread + self._parser = parser + + # Destructor + def __del__(self): + print("{0} : Import finished".format(self._name)) + + + # Public method + + ## from file + + def fromFile(self, files=[], number_thread=1, max_thread=1): + divFiles = int(len(files) / max_thread) + currentRangeFiles = int(divFiles * (number_thread)) + firstRange = int(currentRangeFiles - divFiles) + self._logger.debug("{0} : index : {1}".format(self._name,number_thread)) + + + self._logger.debug("{0} : first range : {1}".format(self._name,firstRange)) + self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles)) + + for i in range(firstRange, currentRangeFiles): + + if os.path.exists(files[i]): + self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i])) + self._change(files[i]) + + + ## From directory + + def fromDirectory(self, directory="", number_thread=1, max_thread=1): + directory = "{0}/archives".format(directory) + directories = self._getDirectories([], "{0}".format(directory)) + if len(directories) > 0: + files = self._getFiles(directories) + self.fromFile(files, number_thread, max_thread) + else: + self._logger.error("{0} : No files for {1}".format(self._name, directory)) + + + # Private method + + ## Get all files + + def _getFiles(self, item): + files = [] + for i in item: + for j in os.listdir(i): + if os.path.isfile("{0}/{1}".format(i, j)): + files.append("{0}/{1}".format(i, j)) + return files + + + ## Get directories + + def _getDirectories(self, subdirectory, item): + sub = subdirectory + for i in os.listdir(item): + if os.path.isdir("{0}/{1}".format(item, i)): + sub.append("{0}/{1}".format(item, i)) + subdirectory = self._getDirectories(sub, "{0}/{1}".format(item, i)) + return subdirectory + + ## Change path img file + + def _change(self, file): + ext_img = ["png", "svg", "gif", "jpg", "jpeg"] + try: + with open(file, 'r') as f: + content = f.read() + soup = BeautifulSoup(content, self._parser) + img = soup.find_all("img") + for i in img: + src = i.get("src") + o = urlparse(src) + if len(o.netloc) > 0: + self._logger.info("{0} : Change source image {1} /img/{2}/{3}".format(self._name, src, o.netloc, o.path)) + content = content.replace(src, "/img/{0}/{1}".format(o.netloc, o.path)) + script = soup.find_all("script", {"type": "text/javascript"}) + for i in script: + src = i.get("src") + if src is not None: + o = urlparse(src) + if len(o.netloc) > 0: + self._logger.info("{0} : Change source js {1} /dists/js/{2}/{3}".format(self._name, src, o.netloc, o.path)) + content = content.replace(src, "/dists/js/{0}/{1}".format(o.netloc, o.path)) + link = soup.find_all("link", {"rel": "stylesheet"}) + for i in link: + href = i.get("href") + if href is not None: + o = urlparse(href) + if len(o.netloc) > 0: + self._logger.info("{0} : Change source css {1} /dists/css/{2}/{3}".format(self._name, href, o.netloc, o.path)) + content = content.replace(href, "/dists/css/{0}/{1}".format(o.netloc, o.path)) + + a = soup.find_all("a", {"target": "_blank"}) + for i in a: + href = i.get("href") + if href is not None: + o = urlparse(href) + if len(o.netloc) > 0: + ext = o.path.split(".")[len(o.path.split("."))-1] + if ext in ext_img: + self._logger.info("{0} : Change a img {1} /img/{2}/{3}".format(self._name, href, o.netloc, o.path)) + content = content.replace(href, "/img/{0}/{1}".format(o.netloc, o.path)) + try: + with open(file, "w") as f: + self._logger.info("{0} : File write : {1}".format(self._name, file)) + f.write(content) + except Exception as ex: + self._logger.error("{0} : Error for write file {1} : {2}".format(self._name, file, ex)) + + except Exception as ex: + self._logger.error("{0} : Error for read file {1} : {2}".format(self._name, file, ex)) + + + +