From 28b513e1b234a9120dacb1de618e553abf2d8d5a Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 5 Aug 2023 12:03:17 +0200 Subject: [PATCH 01/12] add private method album --- lib/WPImport.py | 15 +++++++++++++-- lib/WPMenu.py | 4 ++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/WPImport.py b/lib/WPImport.py index 9f0767a..60d6980 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -55,7 +55,11 @@ class WPimport: if len(articlebody) > 0: self._addOrUpdatePost(soup) else: - self._addOrUpdateFeaturedMedia(soup) + albumbody = soup.find_all("div", class_="albumbody") + if len(albumbody) > 0: + self._addOrUpdateAlbum(soup) + else: + self._addOrUpdateFeaturedMedia(soup) del webpage_content[first][second][i] webpage_content = json.dumps(webpage_content) open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content) @@ -100,12 +104,19 @@ class WPimport: if len(articlebody) > 0: self._addOrUpdatePost(soup) else: - self._addOrUpdateFeaturedMedia(soup) + albumbody = soup.find_all("div", class_="albumbody") + if len(albumbody) > 0: + self._addOrUpdateAlbum(soup) + else: + self._addOrUpdateFeaturedMedia(soup) # Private method + def _addOrUpdateAlbum(self, soup): + self._logger.info("{0} : Add/Update Album".format(self._name)) + def _fromFileTmp(self): try: with open("{0}/{1}.json".format(self._tmp, self._name)) as file: diff --git a/lib/WPMenu.py b/lib/WPMenu.py index bcb34a2..1bddf6e 100644 --- a/lib/WPMenu.py +++ b/lib/WPMenu.py @@ -86,8 +86,8 @@ class WPMenu: idMenu = {"id":0, "type":"", "link":""} soup = BeautifulSoup(content, self._parser) articletitle = soup.find("h2", class_="articletitle").get_text() - if len(articletitle) > 0: - articletitle = soup.find("h2").get_text() + if len(articletitle) == 0: + articletitle = soup.find("div", class_="albumbody").find("h2").get_text() exist = False for index in range(1,10): if exist is False: From 9ed5ffe3993da7ca72dadf4d3524e689ee336fb7 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 5 Aug 2023 12:28:33 +0200 Subject: [PATCH 02/12] add variable --- lib/WPImport.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/WPImport.py b/lib/WPImport.py index 60d6980..70843f7 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -116,6 +116,10 @@ class WPimport: def _addOrUpdateAlbum(self, soup): self._logger.info("{0} : Add/Update Album".format(self._name)) + albumbody = soup.find("div", class_="albumbody") + albumtitle = albumbody.find("h2").get_text() + self._logger.debug("{0} : Title of the album : {1}".format(self._name, albumtitle)) + albumdesc = albumbody.find("div", class_="albumdesc").find("p") def _fromFileTmp(self): try: From e4eb1b6b68c7329da0d6df03f0c9b05b3e5b073a Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 5 Aug 2023 23:22:44 +0200 Subject: [PATCH 03/12] add image to media wordpress --- lib/WPImport.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/lib/WPImport.py b/lib/WPImport.py index 70843f7..3041514 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -120,6 +120,67 @@ class WPimport: albumtitle = albumbody.find("h2").get_text() self._logger.debug("{0} : Title of the album : {1}".format(self._name, albumtitle)) albumdesc = albumbody.find("div", class_="albumdesc").find("p") + img_a = albumbody.find_all("a", {"target": "_blank"}) + + if self._no_image is False: + for i in img_a: + new_img = {} + img = i.find_all("img") + self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) + if len(img) > 0: + href_a = i.get("href") + href_img = img[0].get("src") + href_a_o = urlparse(href_a) + href_img_o = urlparse(href_img) + new_img["old_src"]=href_img + new_img["old_href"]=href_a + try: + if len(href_img_o.netloc) > 0: + img_ok = False + page_img = self._request.get(href_img) + + if page_img.status_code == 404: + href_img = href_a + try: + page_img = self._request.get(href_a) + if page_img.status_code == 200: + img_ok = True + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) + + else: + if os.path.exists("{0}/..{1}".format(self._directory, href_img)): + page_img = open("{0}/..{1}".format(self._directory, href_img), "r") + img_ok = True + else: + if os.path.exists("{0}/..{1}".format(self._directory, href_a)): + page_img = open("{0}/..{1}".format(self._directory, href_a), "r") + img_ok = True + self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) + if img_ok is True: + media=self._addOrUpdateMedia(href_img, page_img) + new_img["id"]=media["id"] + new_img["new_src"]=media["rendered"] + list_img.append(new_img) + if href_img != href_a: + media=self._addOrUpdateMedia(href_a, page_img) + new_img["id"]=media["id"] + new_img["new_src"]=media["rendered"] + list_img.append(new_img) + if page_img.status_code not in [200, 404]: + self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page_img.content)) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) + exit(1) + def _fromFileTmp(self): try: From 5959ab5b2ee7aefddca1b159a1862ddd896fcf34 Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 12 Aug 2023 00:06:09 +0200 Subject: [PATCH 04/12] add galery wip --- .gitignore | 1 + lib/WPImport.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index f82fde7..c986d66 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ backup*/ wp-navigation *.log __pycache__/ +wp-gallery \ No newline at end of file diff --git a/lib/WPImport.py b/lib/WPImport.py index 3041514..16200e4 100644 --- a/lib/WPImport.py +++ b/lib/WPImport.py @@ -180,6 +180,14 @@ class WPimport: except Exception as err: self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) exit(1) + content_html = "

{0}

\n\n".format(albumdesc) + + content_html = content_html + "