From 193b0e6ef72e36ef0908189917be529e86b774e6 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Tue, 27 Jun 2023 14:37:45 +0200 Subject: [PATCH] add tmp files wip --- lib/WPImport.py | 60 +++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/lib/WPImport.py b/lib/WPImport.py index 432d50d..bad58cb 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -74,22 +74,45 @@ class WPimport: self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) - def fromDirectory(self, directory="", number_thread=1, max_thread=1): + def fromDirectory(self, directory="", number_thread=1, max_thread=1, revert=False): self._directory = directory directory = "{0}/archives".format(directory) directories = self._getDirectories([], "{0}".format(directory)) if len(directories) > 0: files = self._getFiles(directories) - self.fromFile(files=files, number_thread=number_thread, max_thread=max_thread) + if args.revert is False: + self._tmpFiles(files=files, number_thread=number_thread, max_thread=max_thread) + self.fromFile() else: self._logger.error("{0} : No files for {1}".format(self._name, directory)) - + + def fromFile(self, files=[]): + try: + with open("{0}/{1}.json".format(self._tmp, self._name)) as file: + files = json.loads(file.read()) + self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(webpage))) + for i in range(0, len(files)): + if os.path.exists(files[i]): + self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i])) + with open(files[i], 'r') as f: + content = f.read() + self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content))) + soup = BeautifulSoup(content, self._parser) + articlebody = soup.find_all("div", class_="articlebody") + self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody))) + if len(articlebody) > 0: + self._addOrUpdatePost(soup) + else: + self._addOrUpdateFeaturedMedia(soup) + except Exception as ex: + self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) + + + # Private method - - - def fromFile(self, files=[], number_thread=1, max_thread=1): + def _tmpFiles(self, number_thread=1, max_thread=1): divFiles = int(len(files) / max_thread) currentRangeFiles = int(divFiles * (number_thread+1)) firstRange = int(currentRangeFiles - divFiles) @@ -97,23 +120,16 @@ class WPimport: self._logger.debug("{0} : first range : {1}".format(self._name,firstRange)) self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles)) - + webpage = [] for i in range(firstRange, currentRangeFiles): - if os.path.exists(files[i]): - self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i])) - with open(files[i], 'r') as f: - content = f.read() - self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content))) - soup = BeautifulSoup(content, self._parser) - articlebody = soup.find_all("div", class_="articlebody") - self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody))) - if len(articlebody) > 0: - self._addOrUpdatePost(soup) - else: - self._addOrUpdateFeaturedMedia(soup) - - - # Private method + webpage.append(files[i]) + + try: + string_webpage = json.dumps(webpage) + open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage) + except Exception as ex: + self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex)) + ## replace caracter