#!/usr/bin/python3 from bs4 import BeautifulSoup from urllib.parse import urlparse import requests, os, logging, re, json from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry class WPMenu: # Constructor def __init__(self, name="Thread-0", basic=None, canalblog="", wordpress="", logger=None, parser="html.parser", ssl_canalblog=True, ssl_wordpress=True): self._name = name self._basic = basic self._canalblog = canalblog self._wordpress = wordpress self._logger = logger self._parser = parser self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'} self._protocol_wordpress, self._protocol_canalblog = "https" self._directory = "backup" if ssl_wordpress is False: self._protocol_wordpress = "http" if ssl_canalblog is False: self._protocol_canalblog = "http" self._request = requests.Session() retries = Retry(connect=10, read=10, redirect=5, status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries)) # Destructor def __del__(self): print("{0} : Import finished for {1}".format(self._name, self._wordpress)) # Public method ## Get from URL def fromUrl(self): try: o = urlparse(self._canalblog) o = o._replace(scheme=self._protocol_canalblog) i = o.geturl().replace(":///", "://") page = self._request.get(i) if page.status_code == 200: soup = BeautifulSoup(page.text, self._parser) ul = soup.find_all("ul", id="listsmooth") for anchor in ul[0].find_all("li"): li = anchor.find_all("li") for content in li: a = content.find("a") else: self._logger.error("{0} : index didn't get due status code : {1}".format(self._name, page.status_code)) self._logger.debug("{0} : {1}".format(self._name, page.content)) except ConnectionError as err: self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err)) exit(1) except Exception as err: self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))