add parameter no-image

This commit is contained in:
2023-06-03 09:07:33 +02:00
parent 2f1c081823
commit e48b262d7e
2 changed files with 53 additions and 49 deletions

View File

@@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport:
# Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False):
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False):
self._name = name
self._basic = basic
self._wordpress = wordpress
@@ -26,6 +26,7 @@ class WPimport:
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
self._no_create = no_create
self._no_update = no_update
self._no_image = no_image
# Destructor
def __del__(self):
@@ -436,48 +437,49 @@ class WPimport:
img_a = articlebody[0].find_all("a", {"target": "_blank"})
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
list_img = []
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
page_img = self._request.get(href_img)
if self._no_image is False:
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
page_img = self._request.get(href_img)
if page_img.status_code == 404:
href_img = href_a
try:
page_img = self._request.get(href_a)
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if href_img != href_a:
media=self._addOrUpdateMedia(href_a, page_img)
if page_img.status_code == 404:
href_img = href_a
try:
page_img = self._request.get(href_a)
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
if href_img != href_a:
media=self._addOrUpdateMedia(href_a, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
comment_post = self._getComment(comment)