From e4eb1b6b68c7329da0d6df03f0c9b05b3e5b073a Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 5 Aug 2023 23:22:44 +0200 Subject: [PATCH] add image to media wordpress --- lib/WPImport.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/lib/WPImport.py b/lib/WPImport.py index 70843f7..3041514 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -120,6 +120,67 @@ class WPimport: albumtitle = albumbody.find("h2").get_text() self._logger.debug("{0} : Title of the album : {1}".format(self._name, albumtitle)) albumdesc = albumbody.find("div", class_="albumdesc").find("p") + img_a = albumbody.find_all("a", {"target": "_blank"}) + + if self._no_image is False: + for i in img_a: + new_img = {} + img = i.find_all("img") + self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) + if len(img) > 0: + href_a = i.get("href") + href_img = img[0].get("src") + href_a_o = urlparse(href_a) + href_img_o = urlparse(href_img) + new_img["old_src"]=href_img + new_img["old_href"]=href_a + try: + if len(href_img_o.netloc) > 0: + img_ok = False + page_img = self._request.get(href_img) + + if page_img.status_code == 404: + href_img = href_a + try: + page_img = self._request.get(href_a) + if page_img.status_code == 200: + img_ok = True + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) + + else: + if os.path.exists("{0}/..{1}".format(self._directory, href_img)): + page_img = open("{0}/..{1}".format(self._directory, href_img), "r") + img_ok = True + else: + if os.path.exists("{0}/..{1}".format(self._directory, href_a)): + page_img = open("{0}/..{1}".format(self._directory, href_a), "r") + img_ok = True + self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) + if img_ok is True: + media=self._addOrUpdateMedia(href_img, page_img) + new_img["id"]=media["id"] + new_img["new_src"]=media["rendered"] + list_img.append(new_img) + if href_img != href_a: + media=self._addOrUpdateMedia(href_a, page_img) + new_img["id"]=media["id"] + new_img["new_src"]=media["rendered"] + list_img.append(new_img) + if page_img.status_code not in [200, 404]: + self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page_img.content)) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) + def _fromFileTmp(self): try: