fix json read file

This commit is contained in:
Valentin CZERYBA 2023-06-26 23:52:03 +02:00
parent 8e0abc40bd
commit a67ff868f3
2 changed files with 29 additions and 29 deletions

View File

@ -1,7 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
import requests, os, argparse, logging import requests, os, argparse, logging, json
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry

View File

@ -41,35 +41,35 @@ class WPimport:
def fromUrl(self, first, second): def fromUrl(self, first, second):
try: try:
content_file = open("{0}/{1}.json".format(self._name, self._tmp)) with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage_content = json.loads(content_file) webpage_content = json.loads(file.read())
webpage = webpage_content[first][second] self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(webpage_content)))
for i in range(0, len(webpage)): webpage = webpage_content[first][second]
try: for i in range(0, len(webpage)):
r = self._request.get(webpage[i]) try:
if r.status_code == 200: r = self._request.get(webpage[i])
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i])) if r.status_code == 200:
soup = BeautifulSoup(r.content, self._parser) self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
articlebody = soup.find_all("div", class_="articlebody") soup = BeautifulSoup(r.content, self._parser)
if len(articlebody) > 0: articlebody = soup.find_all("div", class_="articlebody")
self._addOrUpdatePost(soup) if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i]
webpage_content = json.dumps(webpage_content)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
else: else:
self._addOrUpdateFeaturedMedia(soup) self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
del webpage_content[first][second][i] self._logger.debug("{0} : {1}".format(self._name, r.content))
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content) except ConnectionError as err:
else: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code)) exit(1)
self._logger.debug("{0} : {1}".format(self._name, r.content)) except IOError as err:
except ConnectionError as err: self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err))
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err)) exit(1)
exit(1) except Exception as err:
except IOError as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
except Exception as ex: except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))