fix json read file

This commit is contained in:
Valentin CZERYBA 2023-06-26 23:52:03 +02:00
parent 8e0abc40bd
commit a67ff868f3
2 changed files with 29 additions and 29 deletions

View File

@ -1,7 +1,7 @@
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, argparse, logging
import requests, os, argparse, logging, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

View File

@ -41,35 +41,35 @@ class WPimport:
def fromUrl(self, first, second):
try:
content_file = open("{0}/{1}.json".format(self._name, self._tmp))
webpage_content = json.loads(content_file)
webpage = webpage_content[first][second]
for i in range(0, len(webpage)):
try:
r = self._request.get(webpage[i])
if r.status_code == 200:
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
soup = BeautifulSoup(r.content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage_content = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(webpage_content)))
webpage = webpage_content[first][second]
for i in range(0, len(webpage)):
try:
r = self._request.get(webpage[i])
if r.status_code == 200:
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
soup = BeautifulSoup(r.content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i]
webpage_content = json.dumps(webpage_content)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
else:
self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i]
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
else:
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except IOError as err:
self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except IOError as err:
self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))