#!/usr/bin/python3 from bs4 import BeautifulSoup from urllib.parse import urlparse import requests, os, logging, re, json from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry class WPMenu: # Constructor def __init__(self, name="Thread-0", basic=None, canalblog="", wordpress="", logger=None, parser="html.parser", ssl_canalblog=True, ssl_wordpress=True): self._name = name self._basic = basic self._canalblog = canalblog self._wordpress = wordpress self._logger = logger self._parser = parser self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'} self._protocol_wordpress = "https" self._protocol_canalblog = "https" self._directory = "backup" if ssl_wordpress is False: self._protocol_wordpress = "http" if ssl_canalblog is False: self._protocol_canalblog = "http" self._request_canalblog = requests.Session() self._request_wordpress = requests.Session() retries = Retry(connect=10, read=10, redirect=5, status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) self._request_canalblog.mount('{0}://'.format(self._protocol_canalblog), HTTPAdapter(max_retries=retries)) self._request_wordpress.mount('{0}://'.format(self._protocol_wordpress), HTTPAdapter(max_retries=retries)) # Destructor def __del__(self): print("{0} : Import finished for {1}".format(self._name, self._wordpress)) # Public method ## From file def fromFile(self, files): if os.path.exists(files): with open(files, 'r') as f: self._logger.info("{0} : File is being processed : {1}".format(self._name, files)) content = f.read() self._menu(content) else: self._logger.error("{0} : File isn't exist : {1}".format(self._name, files)) ## Get from URL def fromUrl(self, canalblog): self._canalblog = canalblog try: o = urlparse(canalblog) o = o._replace(scheme=self._protocol_canalblog) i = o.geturl().replace(":///", "://") page = self._request_canalblog.get(i) if page.status_code == 200: self._logger.info("{0} : Page web is being processed : {1}".format(self._name, i)) self._menu(page.content) else: self._logger.error("{0} : index didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, canalblog, err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, canalblog, err)) ## replace caracter def _replaceCaracter(self, title_rendered): list_replace = {'’': "'", '–': '-', '…': '...', '« ': '"', ' »': '"', '« ': '"', ' »': '"', '’': "'", '"‘': "'"} for old, new in list_replace.items(): title_rendered = title_rendered.replace(old, new) return title_rendered def _getIdfromTitlePost(self, content): idMenu = {"id":0, "type":"", "link":""} soup = BeautifulSoup(content, self._parser) articletitle = soup.find("h2", class_="articletitle").get_text() exist = False for index in range(1,10): if exist is False: params = {"search":articletitle, "per_page":100, "page":index} try: self._logger.debug("{0} : Get Url for post : {1} {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol_wordpress), params)) page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params) if page.status_code == 200: result = page.json() self._logger.info("{0} : Get content post : {1}".format(self._name, len(result))) if len(result) > 0: for i in result: title_rendered = i["title"]["rendered"] if len(articletitle) != len(title_rendered): title_rendered = self._replaceCaracter(title_rendered) self._logger.debug("{0} : comparaison debug {1} {2}".format(self._name, articletitle, title_rendered)) if articletitle == title_rendered: self._logger.debug("{0} : get post id : {1}".format(self._name, i)) idMenu = {"id":i["id"], "type":"post", "link": i["link"]} exist = True else: self._logger.debug("{0} : {2} {1}".format(self._name, result, len(result))) break elif page.status_code == 400: self._logger.debug("{0} : {2} {1}".format(self._name, page.content, page.status_code)) break else: self._logger.error("{0} : Post didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get content : {1}".format(self._name, err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get content : {1} ".format(self._name, err)) return idMenu def _getIdFromPost(self, href): idMenu = {"id":0, "type":"", "link":""} o = urlparse(href) if len(o.netloc) > 0: try: page = self._request_canalblog.get(href) if page.status_code == 200: self._logger.info("{0} : Get content : {1}".format(self._name, href)) idMenu = self._getIdfromTitlePost(page.content) else: self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, href)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, href, err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, href, err)) else: if os.path.exists("{0}/..{1}".format(self._directory, o.path)): try: content = open("{0}/..{1}".format(self._directory, o.path), "r") idMenu = self._getIdfromTitlePost(content) except Exception as err: self._logger.error("{0} : Exception error for get file content {1} : {2}".format(self._name, href, err)) return idMenu def _getIdFromReverse(self, title, href): self._logger.info("{0} : get title {1} from href {2}".format(self._name, title, href)) idMenu = {"id":0, "type":"", "link":""} if href != "#": title = href[::-1] link = title.split("/")[0] link = link[::-1] title = title.split("/")[1] title = title[::-1] self._logger.info("{0} link {1} title {2}".format(self._name, link, title)) if link == "index.html": idMenu = self._getId(title) else: idMenu = self._getIdFromPost(href) return idMenu def _getId(self, title): idMenu = {"id": 0, "type":"", "link":""} exist = False if exist is False: for i in ["categories", "tags"]: typeId = "category" if i == "tags": typeId = "tag" for index in range(1,10): try: params = {"search":title, "per_page":"100", "page":index} self._logger.info("{0} Get menu {1} {2} {3}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), index, title)) page = self._request_wordpress.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), auth=self._basic, params=params) if page.status_code == 200: result = page.json() if len(result) > 0: for j in result: self._logger.info("{0} info : {1} {2} {3}".format(self._name, j["name"], j["slug"], title)) if j["name"] == title or j["slug"] == title: self._logger.info("{0} : comparaison ok : {1} {2}".format(self._name, j["id"], i)) idMenu = {"id": j["id"], "type": typeId, "link": j["link"]} exist = True else: break elif page.status_code == 400: break else: self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, i)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err)) return idMenu def _menu(self, content): soup = BeautifulSoup(content, self._parser) ul = soup.find("ul", id="listsmooth") menu = list() children = list() for anchor in ul.find_all("li"): parent = anchor.find("a").get_text().replace(" \xa0", "") href = anchor.find("a").get("href") if href == "{0}://{1}/".format(self._protocol_canalblog, self._canalblog): parent = "home" itemMenu = {"id":"", "type":"", "title": parent, "link":"", "href":href, "children":list()} if href == "#": li = anchor.find("ul").find_all("li") for child in li: a = child.find("a") href = a.get("href") self._logger.info("{0} Parent {1} : Child {2}".format(self._name, parent, a.get_text())) children.append({"title": a.get_text(), "parent": parent, "href":href, "link":""}) menu.append(itemMenu) for i in range(0, len(children)): self._logger.info("{0} : Child {1} {2}".format(self._name, children[i], i)) for j in range(0, len(menu)): if j < len(menu): if menu[j]["title"] == children[i]["title"]: self._logger.info("{0} : Parent {1} {2}".format(self._name, menu[j], j)) del menu[j] for j in range(0, len(menu)): self._logger.info("{0} : Children for : {1}".format(self._name, menu[j]["title"])) if menu[j]["title"] == children[i]["parent"]: menu[j]["children"].append({"id":"", "type":"", "title":children[i]["title"], "parent": children[i]["parent"], "link":"", "href":children[i]["href"]}) for i in range(0, len(menu)): self._logger.info("{0} : Menu {1} {2}".format(self._name, menu[i]["title"], len(menu[i]["children"]))) if menu[i]["title"] != "home": for j in range(0, len(menu[i]["children"])): idMenu = self._getId(menu[i]["children"][j]["title"]) if idMenu["id"] == 0: self._logger.debug("{0} : content children {1}".format(self._name, menu[i]["children"][j])) idMenu = self._getIdFromReverse(menu[i]["children"][j]["title"], menu[i]["children"][j]["href"]) if idMenu["id"] != 0: menu[i]["children"][j] = {"id":idMenu["id"], "type": idMenu["type"], "link": idMenu["link"], "title": menu[i]["children"][j]["title"], "parent": menu[i]["children"][j]["parent"]} idMenu = self._getId(menu[i]["title"]) self._logger.debug("{0} : content parent {1}".format(self._name, menu[i])) self._logger.debug("{0} : content idMenu {1}".format(self._name, idMenu)) if idMenu["id"] == 0: idMenu = self._getIdFromReverse(menu[i]["title"], menu[i]["href"]) if idMenu["id"] != 0: menu[i] = {"id":idMenu["id"], "type": idMenu["type"], "title":menu[i]["title"], "link":idMenu["link"], "children": menu[i]["children"]} self._createMenu(menu) def _createItemMenu(self, idMenu, itemMenu, parent): idItemMenu = 0 self._logger.info("{0} : Create item menu from API Wordpress : {1}".format(self._name, self._wordpress)) try: params = {"search": itemMenu["title"], "menus": idMenu} page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params) if page.status_code == 200: result = page.json() for i in result: if i["title"]["rendered"] == itemMenu["title"]: idItemMenu = i["id"] self._logger.info("{0} : Length of result for item menus : {1}".format(self._name, len(result))) url = "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress) if idItemMenu != 0: url = "{1}://{0}/wp-json/wp/v2/menu-items/{2}".format(self._wordpress, self._protocol_wordpress, idItemMenu) try: objectt = itemMenu["type"] if objectt == "tag": objectt = "post_tag" data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "menus":idMenu, "object":objectt, "object_id":itemMenu["id"]} page = self._request_wordpress.post(url, auth=self._basic, headers=self._headers_json, data=json.dumps(data)) if page.status_code == 201: result = page.json() idItemMenu = result["id"] self._logger.info("{0} : create item menu : {1}".format(self._name, itemMenu["title"])) elif page.status_code == 200: result = page.json() for i in result: if result["title"]["rendered"] == itemMenu["title"]: idItemMenu = i["id"] self._logger.info("{0} : update item menu : {1}".format(self._name, itemMenu["title"])) else: self._logger.error("{0} : Create menu items didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for create item menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get item menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err)) else: self._logger.error("{0} : Get menu items didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get item menus {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get item menus {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err)) return idItemMenu def _createMenu(self, menu): title = "Menu {0}".format(self._wordpress) self._logger.info("{0} : Create menu from API Wordpress : {1}".format(self._name, title)) try: params = {"search": title} page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params) if page.status_code == 200: result = page.json() self._logger.info("{0} : Get content menus : {1}".format(self._name, len(result))) idMenu = 0 if len(result) == 0: self._logger.info("{0} : Create menu : {1}".format(self._name, title)) data = {"name": title} try: page = self._request_wordpress.post("{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) if page.status_code == 201: result = page.json() self._logger.debug("{0} : Get menus : {1}".format(self._name, result)) if len(result) > 0: idMenu = result["id"] else: self._logger.error("{0} : Post menu didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for create menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err)) else: self._logger.debug("{0} : Get menus : {1}".format(self._name, result)) for i in result: self._logger.debug("{0} : List menus : {1}".format(self._name, i)) if i["name"] == title: idMenu = i["id"] self._logger.info("{0} : Get ID menus : {1}".format(self._name, idMenu)) else: self._logger.error("{0} : Get menu didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err)) def addItemMenu(self, menu, idMenu): self._logger.info("{0} : add item to menu : {1}".format(self._name, idMenu)) for i in menu: parent = self._createItemMenu(idMenu, i, parent) for j in menu["children"]: self._createItemMenu(idMenu, i, parent)