diff --git a/lib/WPExport.py b/lib/WPExport.py index 814f1f9..df44100 100644 --- a/lib/WPExport.py +++ b/lib/WPExport.py @@ -161,7 +161,14 @@ class WPExport: except Exception as err: self._logger.error("{0} : Exception error : {1}".format(self._name, err)) exit(1) - return webpage + try: + string_webpage = json.dumps(webpage) + open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage) + except Exception as ex: + self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex)) + + + diff --git a/lib/WPImport.py b/lib/WPImport.py index 671ba83..c600990 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -40,25 +40,38 @@ class WPimport: self._wordpress = wordpress def fromUrl(self, first, second): - for i in range(0, len(webpage[first][second])): - try: - r = self._request.get(webpage[i]) - if r.status_code == 200: - self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i])) - soup = BeautifulSoup(r.content, self._parser) - articlebody = soup.find_all("div", class_="articlebody") - if len(articlebody) > 0: - self._addOrUpdatePost(soup) + try: + content_file = open("{0}/{1}.json".format(self._name, self._tmp)) + webpage_content = json.loads(content_file) + webpage = webpage_content[first][second] + for i in range(0, len(webpage)): + try: + r = self._request.get(webpage[i]) + if r.status_code == 200: + self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i])) + soup = BeautifulSoup(r.content, self._parser) + articlebody = soup.find_all("div", class_="articlebody") + if len(articlebody) > 0: + self._addOrUpdatePost(soup) + else: + self._addOrUpdateFeaturedMedia(soup) + del webpage_content[first][second][i] + open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content) else: - self._addOrUpdateFeaturedMedia(soup) - else: - self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code)) - self._logger.debug("{0} : {1}".format(self._name, r.content)) - except ConnectionError as err: - self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err)) - exit(1) - except Exception as err: - self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err)) + self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code)) + self._logger.debug("{0} : {1}".format(self._name, r.content)) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err)) + exit(1) + except IOError as err: + self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err)) + + + except Exception as ex: + self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) def fromDirectory(self, directory="", number_thread=1, max_thread=1):