137 lines
6.1 KiB
Python
137 lines
6.1 KiB
Python
#!/usr/bin/python3
|
|
|
|
from bs4 import BeautifulSoup
|
|
from urllib.parse import urlparse
|
|
import requests, os, logging, re, json
|
|
from requests.adapters import HTTPAdapter
|
|
from requests.packages.urllib3.util.retry import Retry
|
|
|
|
class WPMenu:
|
|
# Constructor
|
|
def __init__(self, name="Thread-0", basic=None, canalblog="", wordpress="", logger=None, parser="html.parser", ssl_canalblog=True, ssl_wordpress=True):
|
|
self._name = name
|
|
self._basic = basic
|
|
self._canalblog = canalblog
|
|
self._wordpress = wordpress
|
|
self._logger = logger
|
|
self._parser = parser
|
|
self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
|
|
self._protocol_wordpress = "https"
|
|
self._protocol_canalblog = "https"
|
|
self._directory = "backup"
|
|
|
|
if ssl_wordpress is False:
|
|
self._protocol_wordpress = "http"
|
|
if ssl_canalblog is False:
|
|
self._protocol_canalblog = "http"
|
|
self._request_canalblog = requests.Session()
|
|
self._request_wordpress = requests.Session()
|
|
|
|
retries = Retry(connect=10, read=10, redirect=5,
|
|
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
|
|
|
|
self._request_canalblog.mount('{0}://'.format(self._protocol_canalblog), HTTPAdapter(max_retries=retries))
|
|
self._request_wordpress.mount('{0}://'.format(self._protocol_wordpress), HTTPAdapter(max_retries=retries))
|
|
|
|
|
|
# Destructor
|
|
def __del__(self):
|
|
print("{0} : Import finished for {1}".format(self._name, self._wordpress))
|
|
|
|
# Public method
|
|
|
|
## From file
|
|
|
|
def fromFile(self, files):
|
|
if os.path.exists(files):
|
|
with open(files, 'r') as f:
|
|
self._logger.info("{0} : File is being processed : {1}".format(self._name, files))
|
|
content = f.read()
|
|
self._menu(content)
|
|
else:
|
|
self._logger.error("{0} : File isn't exist : {1}".format(self._name, files))
|
|
|
|
|
|
|
|
## Get from URL
|
|
|
|
def fromUrl(self, canalblog):
|
|
self._canalblog = canalblog
|
|
try:
|
|
o = urlparse(canalblog)
|
|
o = o._replace(scheme=self._protocol_canalblog)
|
|
i = o.geturl().replace(":///", "://")
|
|
page = self._request_canalblog.get(i)
|
|
if page.status_code == 200:
|
|
self._logger.info("{0} : Page web is being processed : {1}".format(self._name, i))
|
|
self._menu(page.content)
|
|
else:
|
|
self._logger.error("{0} : index didn't get due status code : {1}".format(self._name, page.status_code))
|
|
self._logger.debug("{0} : {1}".format(self._name, page.content))
|
|
except ConnectionError as err:
|
|
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, canalblog, err))
|
|
exit(1)
|
|
except Exception as err:
|
|
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, canalblog, err))
|
|
|
|
def _getId(self, title):
|
|
idMenu = 0
|
|
exist = False
|
|
for i in ["categories", "tags"]:
|
|
if exist is False:
|
|
try:
|
|
params = {"search":title, "per_page":"100"}
|
|
page = self._request_wordpress.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), auth=self._basic, params=params)
|
|
if page.status_code == 200:
|
|
result = page.json()
|
|
if len(result) > 0:
|
|
idMenu = result[0]["id"]
|
|
exist = True
|
|
else:
|
|
self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, i))
|
|
self._logger.debug("{0} : {1}".format(self._name, page.content))
|
|
except ConnectionError as err:
|
|
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err))
|
|
exit(1)
|
|
except Exception as err:
|
|
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err))
|
|
return idMenu
|
|
|
|
def _menu(self, content):
|
|
|
|
soup = BeautifulSoup(content, self._parser)
|
|
ul = soup.find("ul", id="listsmooth")
|
|
menu = list()
|
|
children = list()
|
|
for anchor in ul.find_all("li"):
|
|
|
|
parent = anchor.find("a").get_text().replace(" \xa0", "")
|
|
href = anchor.find("a").get("href")
|
|
if href == "{0}://{1}/".format(self._protocol_canalblog, self._canalblog):
|
|
parent = "home"
|
|
itemMenu = {"id":"", "type":"", "title": parent, "children":list()}
|
|
if href == "#":
|
|
li = anchor.find("ul").find_all("li")
|
|
for child in li:
|
|
a = child.find("a")
|
|
self._logger.info("{0} Parent {1} : Child {2}".format(self._name, parent, a.get_text()))
|
|
children.append({"title": a.get_text(), "parent": parent})
|
|
menu.append(itemMenu)
|
|
|
|
|
|
for i in range(0, len(children)):
|
|
self._logger.info("{0} : Child {1} {2}".format(self._name, children[i], i))
|
|
for j in range(0, len(menu)):
|
|
if j < len(menu):
|
|
if menu[j]["title"] == children[i]["title"]:
|
|
self._logger.info("{0} : Parent {1} {2}".format(self._name, menu[j], j))
|
|
del menu[j]
|
|
|
|
for j in range(0, len(menu)):
|
|
self._logger.info("{0} : Children for : {1}".format(self._name, menu[j]["title"]))
|
|
if menu[j]["title"] == children[i]["parent"]:
|
|
menu[j]["children"].append({"id":"", "title":children[i]["title"], "parent": children[i]["parent"]})
|
|
|
|
for i in range(0, len(menu)):
|
|
self._logger.info("{0} : Menu {1} {2}".format(self._name, menu[i]["title"], len(menu[i]["children"])))
|
|
|