From 82f9acd658f1c78bd1436851a2928cf362334eac Mon Sep 17 00:00:00 2001 From: Valentin CZERYBA Date: Sat, 15 Jul 2023 13:55:01 +0200 Subject: [PATCH] get title from post --- lib/WPMenu.py | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/lib/WPMenu.py b/lib/WPMenu.py index ef7eb6a..ba9ef99 100644 --- a/lib/WPMenu.py +++ b/lib/WPMenu.py @@ -74,9 +74,44 @@ class WPMenu: except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, canalblog, err)) + ## replace caracter + + def _replaceCaracter(self, title_rendered): + list_replace = {'’': "'", '–': '-', '…': '...', '« ': '"', ' »': '"', '« ': '"', ' »': '"', '’': "'", '"‘': "'"} + for old, new in list_replace.items(): + title_rendered = title_rendered.replace(old, new) + return title_rendered + def _getIdfromTitlePost(self, content): idMenu = 0 - + soup = BeautifulSoup(content, self._parser) + articletitle = soup.find("h2", class_="articletitle") + for index in range(0,10): + params = {"search":articletitle, "per_page":"100", "page":index} + try: + page = self._request_wordpress.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), auth=self._basic, params=params) + if page.status_code == 200: + result = page.json() + self._logger.info("{0} : Get content post : {1}".format(self._name, len(result))) + for i in result: + title_rendered = i["name"] + if len(articletitle) != len(title_rendered): + title_rendered = self._replaceCaracter(title_rendered) + + if articletitle == title_rendered: + idMenu = i["id"] + + elif page.status_code == 400: + break + else: + self._logger.error("{0} : Post didn't get due status code : {1}".format(self._name, page.status_code)) + self._logger.debug("{0} : {1}".format(self._name, page.content)) + except ConnectionError as err: + self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, href, err)) + exit(1) + except Exception as err: + self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, href, err)) + return idMenu def _getIdFromPost(self, href): @@ -118,6 +153,7 @@ class WPMenu: self._logger.info("{0} link {1} title {2}".format(self._name, link, title)) if link == "index.html": idMenu = self._getId(title) + return idMenu def _getId(self, title): @@ -139,7 +175,8 @@ class WPMenu: self._logger.info("{0} : comparaison ok : {1} {2}".format(self._name, j["id"], i)) idMenu = j["id"] exist = True - + elif page.status_code == 400: + break else: self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, i)) self._logger.debug("{0} : {1}".format(self._name, page.content))