#!/usr/bin/python3 from bs4 import BeautifulSoup from urllib.parse import urlparse import requests, os, logging, re, json from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry class WPMenu: # Constructor def __init__(self, name="Thread-0", basic=None, canalblog="", wordpress="", logger=None, parser="html.parser", ssl_canalblog=True, ssl_wordpress=True): self._name = name self._basic = basic self._canalblog = canalblog self._wordpress = wordpress self._logger = logger self._parser = parser self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'} self._protocol_wordpress = "https" self._protocol_canalblog = "https" self._directory = "backup" if ssl_wordpress is False: self._protocol_wordpress = "http" if ssl_canalblog is False: self._protocol_canalblog = "http" self._request_canalblog = requests.Session() self._request_wordpress = requests.Session() retries = Retry(connect=10, read=10, redirect=5, status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) self._request_canalblog.mount('{0}://'.format(self._protocol_canalblog), HTTPAdapter(max_retries=retries)) self._request_wordpress.mount('{0}://'.format(self._protocol_wordpress), HTTPAdapter(max_retries=retries)) # Destructor def __del__(self): print("{0} : Import finished for {1}".format(self._name, self._wordpress)) # Public method ## From file def fromFile(self, files): if os.path.exists(files): with open(files, 'r') as f: self._logger.info("{0} : File is being processed : {1}".format(self._name, files)) content = f.read() self._menu(content) else: self._logger.error("{0} : File isn't exist : {1}".format(self._name, files)) ## Get from URL def fromUrl(self, canalblog): self._canalblog = canalblog try: o = urlparse(canalblog) o = o._replace(scheme=self._protocol_canalblog) i = o.geturl().replace(":///", "://") page = self._request_canalblog.get(i) if page.status_code == 200: self._logger.info("{0} : Page web is being processed : {1}".format(self._name, i)) self._menu(page.content) else: self._logger.error("{0} : index didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, canalblog, err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, canalblog, err)) def _getId(self, title, parent): menu = {"id":"", "type":"", "title": "", "parent":"", "children": []} exist = False for i in ["categories", "tags"]: if exist is False: try: params = {"search":title, "per_page":"100"} page = self._request_wordpress.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), auth=self._basic, params=params) if page.status_code == 200: result = page.json() if len(result) > 0: menu = {"id":result[0]["id"], "type":i, "title": title, "parent":parent, "children": []} exist = True else: self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, i)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err)) return menu def _menu(self, content): soup = BeautifulSoup(content, self._parser) ul = soup.find("ul", id="listsmooth") menu = list() children = list() for anchor in ul.find_all("li"): parent = anchor.find("a").get_text().replace(" \xa0", "") href = anchor.find("a").get("href") if href == "{0}://{1}/".format(self._protocol_canalblog, self._canalblog): parent = "home" itemMenu = {"id":"", "type":"", "title": parent, "children":list()} #menu = self._child(parent, parent) if href == "#": li = anchor.find("ul").find_all("li") for child in li: a = child.find("a") self._logger.info("{0} Parent {1} : Child {2}".format(self._name, parent, a.get_text())) children.append({"title": a.get_text(), "parent": parent}) menu.append(itemMenu) for i in range(0, len(children)-1): self._logger.info("{0} : Child {1}".format(self._name, children[i])) for j in range(0, len(menu)-1): if menu[j]["title"] == children[i]["title"]: self._logger.info("{0} : Parent {1}".format(self._name, menu[j])) del menu[j] for j in range(0, len(menu)-1): if menu[j]["title"] == children[i]["parent"]: menu[j]["children"].append({"id":"", "title":children[i]["title"], "parent": children[i]["parent"]}) for i in menu: self._logger.info("{0} : Menu {1} {2}".format(self._name, i["title"], len(i["children"])))