This commit is contained in:
Valentin CZERYBA 2023-05-01 21:58:47 +02:00
parent b359521001
commit 1f6bd96a8e
4 changed files with 19 additions and 4 deletions

5
.gitignore vendored
View File

@ -1,5 +1,4 @@
backup/ backup*/
backup1/ wp-navigation
backup2/
web_scrap.log web_scrap.log
__pycache__/ __pycache__/

View File

@ -19,6 +19,7 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img)
if args.img is False: if args.img is False:
exportWp.downloadImg(webpage) exportWp.downloadImg(webpage)
del exportWp
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial): def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial):
canalblog = canalblog.split(",") canalblog = canalblog.split(",")
@ -36,10 +37,11 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
exit(1) exit(1)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
webpage = exportWp.getUrlPage(name_thread, max_thread) webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp
for j in wordpress: for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
importWp.fromUrl(webpage) importWp.fromUrl(webpage)
del importWp
else: else:
if len(canalblog) != len(wordpress): if len(canalblog) != len(wordpress):
logger.error("{0} : ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress".format(name)) logger.error("{0} : ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress".format(name))
@ -54,8 +56,10 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
exit(1) exit(1)
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser) exportWp = WPExport(name=name, url=url, logger=logger, parser=parser)
webpage = exportWp.getUrlPage(name_thread, max_thread) webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser)
importWp.fromUrl(webpage) importWp.fromUrl(webpage)
del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial): def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial):
@ -67,6 +71,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser)
for j in directory: for j in directory:
importWp.fromDirectory(j, name_thread, max_thread) importWp.fromDirectory(j, name_thread, max_thread)
del importWp
else: else:
if len(directory) != len(wordpress): if len(directory) != len(wordpress):
@ -75,6 +80,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres
for i in range(0, len(wordpress)-1): for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser)
importWp.fromDirectory(directory[i]) importWp.fromDirectory(directory[i])
del importWp
@ -187,6 +193,7 @@ if __name__ == '__main__':
if args.css is False: if args.css is False:
exportWp.downloadCss() exportWp.downloadCss()
del exportWp
if args.html is False or args.img is False: if args.html is False or args.img is False:
try: try:

View File

@ -21,6 +21,10 @@ class WPExport:
self._request.mount('http://', HTTPAdapter(max_retries=retries)) self._request.mount('http://', HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
self._logger.info("{0} : Export finished for {1}".format(self._name, self._url))
# Public method # Public method
# Set name # Set name

View File

@ -23,6 +23,11 @@ class WPimport:
self._request.mount('http://', HTTPAdapter(max_retries=retries)) self._request.mount('http://', HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
self._logger.info("{0} : Import finished for {1}".format(self._name, self._wordpress))
# Public method # Public method
def setUrl(self, wordpress): def setUrl(self, wordpress):