From ab3720fbbc438440722aa2986a1a2c98fc417820 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 29 Apr 2023 22:26:47 +0200 Subject: [PATCH] fix directory in thread --- import_export_canalblog.py | 4 ++-- lib/WPImport.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/import_export_canalblog.py b/import_export_canalblog.py index 12e98fc..2d3271a 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -64,7 +64,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres for i in wordpress: importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser) for j in directory: - importWp.fromDirectory(j) + importWp.fromDirectory(j, name_thread, max_thread) else: if len(directory) != len(wordpress): @@ -152,7 +152,7 @@ if __name__ == '__main__': try: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: wait_for = [ - ex.submit(importDi, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial) + ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial) for i in range(0, int(args.parallel)) ] except Exception as err: diff --git a/lib/WPImport.py b/lib/WPImport.py index 42c3b5c..ec8df45 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -51,14 +51,22 @@ class WPimport: def fromDirectory(self, directory="", number_thread=1, max_thread=1): directory = "{0}/archives".format(directory) directories = self._getDirectories([], "{0}".format(directory)) - files = self._getFiles(directories) - self.fromFile(files) + if len(directories) > 0: + files = self._getFiles(directories) + self.fromFile(files, number_thread, max_thread) + else: + self._logger.error("{0} : No files for {1}".format(self._name, directory)) def fromFile(self, files=[], number_thread=1, max_thread=1): divFiles = int(len(files) / max_thread) - currentRangeFiles = int(divFiles * number_thread) + currentRangeFiles = int(divFiles * (number_thread+1)) firstRange = int(currentRangeFiles - divFiles) + self._logger.debug("{0} : index : {1}".format(self._name,number_thread)) + + self._logger.debug("{0} : first range : {1}".format(self._name,firstRange)) + self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles)) + for i in range(firstRange, currentRangeFiles): if os.path.exists(files[i]): self._logger.info("{0} : File is being processed : {1}".format(self._name, files[i])) @@ -66,6 +74,7 @@ class WPimport: content = f.read() soup = BeautifulSoup(content, self._parser) articlebody = soup.find_all("div", class_="articlebody") + self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody))) if len(articlebody) > 0: self._addOrUpdatePost(soup) else: