Compare commits

...

3 Commits

Author SHA1 Message Date
a67ff868f3 fix json read file 2023-06-26 23:52:03 +02:00
8e0abc40bd check files tmp 2023-06-26 23:09:54 +02:00
9149a6c5cb rollback webpage 2023-06-26 22:44:42 +02:00
3 changed files with 51 additions and 51 deletions

View File

@ -54,7 +54,6 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img,
del exportWp del exportWp
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp): def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp):
canalblog = canalblog.split(",") canalblog = canalblog.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
@ -107,23 +106,23 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
del importWp del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image, tmp, revert): def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image):
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",") directory = directory.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
if serial is False: if serial is False:
for i in wordpress: for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp, revert=revert) importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
for j in directory: for j in directory:
importWp.fromDirectory(j, name_thread, max_thread) importWp.fromDirectory(j, name_thread, max_thread)
del importWp del importWp
else: else:
if len(directory) != len(wordpress): if len(directory) != len(wordpress):
logger.error("{0} : Error : Number directory is differant than wordpress".format(name)) logger.error("{0} : Error : Number directory is different than wordpress".format(name))
exit(1) exit(1)
for i in range(0, len(wordpress)-1): for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp, revert=revert) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
importWp.fromDirectory(directory[i]) importWp.fromDirectory(directory[i])
del importWp del importWp
@ -252,7 +251,7 @@ if __name__ == '__main__':
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED) wait(wait_for, return_when=ALL_COMPLETED)
wait_for = [ wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image, TMP, revert) ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
except Exception as err: except Exception as err:
@ -262,6 +261,13 @@ if __name__ == '__main__':
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED) wait(wait_for, return_when=ALL_COMPLETED)
if args.revert is True:
files_tmp = glob.glob("{0}/*.json".format(tmp))
if len(files_tmp) > 0:
if len(files_tmp) != args.parallel:
for file_r in files_tmp:
os.remove(file_r)
wait_for = [ wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, TMP) ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, TMP)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))

View File

@ -1,7 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
import requests, os, argparse, logging import requests, os, argparse, logging, json
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry

View File

@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport: class WPimport:
# Constructor # Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog", revert=False): def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog"):
self._name = name self._name = name
self._basic = basic self._basic = basic
self._wordpress = wordpress self._wordpress = wordpress
@ -29,7 +29,6 @@ class WPimport:
self._no_update = no_update self._no_update = no_update
self._no_image = no_image self._no_image = no_image
self._tmp = tmp self._tmp = tmp
self._revert = revert
# Destructor # Destructor
def __del__(self): def __del__(self):
@ -42,8 +41,9 @@ class WPimport:
def fromUrl(self, first, second): def fromUrl(self, first, second):
try: try:
content_file = open("{0}/{1}.json".format(self._name, self._tmp)) with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage_content = json.loads(content_file) webpage_content = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(webpage_content)))
webpage = webpage_content[first][second] webpage = webpage_content[first][second]
for i in range(0, len(webpage)): for i in range(0, len(webpage)):
try: try:
@ -57,6 +57,7 @@ class WPimport:
else: else:
self._addOrUpdateFeaturedMedia(soup) self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i] del webpage_content[first][second][i]
webpage_content = json.dumps(webpage_content)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content) open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
else: else:
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code)) self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
@ -69,28 +70,21 @@ class WPimport:
exit(1) exit(1)
except Exception as err: except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err)) self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
except Exception as ex: except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def fromDirectory(self, directory="", number_thread=1, max_thread=1): def fromDirectory(self, directory="", number_thread=1, max_thread=1):
if self._revert:
self._directory = directory self._directory = directory
directory = "{0}/archives".format(directory) directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory)) directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0: if len(directories) > 0:
files = self._getFiles(directories) files = self._getFiles(directories)
self.fromFile(files, number_thread, max_thread) self.fromFile(files=files, number_thread=number_thread, max_thread=max_thread)
else: else:
self._logger.error("{0} : No files for {1}".format(self._name, directory)) self._logger.error("{0} : No files for {1}".format(self._name, directory))
else:
try:
files = open("{0}/{1}.json".format(self._name, self._tmp))
self.fromFile(files, number_thread, max_thread)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))