Merge pull request 'wpchange' (#18) from wpchange into master

Reviewed-on: #18
This commit is contained in:
v4l3n71n 2023-06-12 22:48:57 +00:00
commit f01a69a1e7
2 changed files with 157 additions and 1 deletions

View File

@ -9,6 +9,13 @@ import argparse, logging, threading
from lib.WPImport import WPimport from lib.WPImport import WPimport
from lib.WPExport import WPExport from lib.WPExport import WPExport
from lib.WPRemove import WPRemove from lib.WPRemove import WPRemove
from lib.WPChange import WPChange
def change(index, number, args, logger):
changeWp = WPChange(logger=logger, index_name=index, number_thread=number)
changeWp.fromDirectory(args.directory)
del changeWp
def remove(index, number, args, basic, logger, ssl_wordpress): def remove(index, number, args, basic, logger, ssl_wordpress):
removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress, index_name=index, number_thread=number) removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress, index_name=index, number_thread=number)
@ -172,7 +179,15 @@ if __name__ == '__main__':
export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true") export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true")
export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true") export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true")
change_parser = subparsers.add_parser("change")
change_parser.add_argument("--directory",
default="",
help="Directory")
change_parser.add_argument("--file",
default="",
help="File")
args = parser.parse_args() args = parser.parse_args()
@ -293,4 +308,17 @@ if __name__ == '__main__':
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
except Exception as err: except Exception as err:
logger.error("Thread error for remove : {0}".format(err)) logger.error("Thread error for remove : {0}".format(err))
exit(0)
if args.command == "change":
if len(args.directory) > 0:
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(change, i, args.parallel, args, logger) for i in range(0, int(args.parallel)) ]
except Exception as err:
logger.error("Thread error for remove : {0}".format(err))
if len(args.file) > 0:
changeWp = WPChange(logger=logger)
for filei in args.file.split(","):
changeWp.fromFile(filei)
exit(0) exit(0)

128
lib/WPChange.py Normal file
View File

@ -0,0 +1,128 @@
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
class WPChange:
# Constructor
def __init__(self, index_name=1, number_thread=1, logger=None, parser="html.parser"):
self._name = "Thread-{0}".format(index_name)
self._logger = logger
self._number_thread = number_thread
self._parser = parser
# Destructor
def __del__(self):
print("{0} : Import finished".format(self._name))
# Public method
## from file
def fromFile(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i]))
self._change(files[i])
## From directory
def fromDirectory(self, directory="", number_thread=1, max_thread=1):
directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0:
files = self._getFiles(directories)
self.fromFile(files, number_thread, max_thread)
else:
self._logger.error("{0} : No files for {1}".format(self._name, directory))
# Private method
## Get all files
def _getFiles(self, item):
files = []
for i in item:
for j in os.listdir(i):
if os.path.isfile("{0}/{1}".format(i, j)):
files.append("{0}/{1}".format(i, j))
return files
## Get directories
def _getDirectories(self, subdirectory, item):
sub = subdirectory
for i in os.listdir(item):
if os.path.isdir("{0}/{1}".format(item, i)):
sub.append("{0}/{1}".format(item, i))
subdirectory = self._getDirectories(sub, "{0}/{1}".format(item, i))
return subdirectory
## Change path img file
def _change(self, file):
ext_img = ["png", "svg", "gif", "jpg", "jpeg"]
try:
with open(file, 'r') as f:
content = f.read()
soup = BeautifulSoup(content, self._parser)
img = soup.find_all("img")
for i in img:
src = i.get("src")
o = urlparse(src)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source image {1} /img/{2}/{3}".format(self._name, src, o.netloc, o.path))
content = content.replace(src, "/img/{0}/{1}".format(o.netloc, o.path))
script = soup.find_all("script", {"type": "text/javascript"})
for i in script:
src = i.get("src")
if src is not None:
o = urlparse(src)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source js {1} /dists/js/{2}/{3}".format(self._name, src, o.netloc, o.path))
content = content.replace(src, "/dists/js/{0}/{1}".format(o.netloc, o.path))
link = soup.find_all("link", {"rel": "stylesheet"})
for i in link:
href = i.get("href")
if href is not None:
o = urlparse(href)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source css {1} /dists/css/{2}/{3}".format(self._name, href, o.netloc, o.path))
content = content.replace(href, "/dists/css/{0}/{1}".format(o.netloc, o.path))
a = soup.find_all("a", {"target": "_blank"})
for i in a:
href = i.get("href")
if href is not None:
o = urlparse(href)
if len(o.netloc) > 0:
ext = o.path.split(".")[len(o.path.split("."))-1]
if ext in ext_img:
self._logger.info("{0} : Change a img {1} /img/{2}/{3}".format(self._name, href, o.netloc, o.path))
content = content.replace(href, "/img/{0}/{1}".format(o.netloc, o.path))
try:
with open(file, "w") as f:
self._logger.info("{0} : File write : {1}".format(self._name, file))
f.write(content)
except Exception as ex:
self._logger.error("{0} : Error for write file {1} : {2}".format(self._name, file, ex))
except Exception as ex:
self._logger.error("{0} : Error for read file {1} : {2}".format(self._name, file, ex))