2023-06-30 23:28:54 +02:00
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib . parse import urlparse
import requests , os , logging , re , json
from requests . adapters import HTTPAdapter
from requests . packages . urllib3 . util . retry import Retry
class WPMenu :
# Constructor
2023-06-30 23:52:56 +02:00
def __init__ ( self , name = " Thread-0 " , basic = None , canalblog = " " , wordpress = " " , logger = None , parser = " html.parser " , ssl_canalblog = True , ssl_wordpress = True ) :
2023-06-30 23:28:54 +02:00
self . _name = name
self . _basic = basic
2023-06-30 23:52:56 +02:00
self . _canalblog = canalblog
2023-06-30 23:28:54 +02:00
self . _wordpress = wordpress
self . _logger = logger
self . _parser = parser
self . _headers_json = { ' Content-Type ' : ' application/json; charset=utf-8 ' , ' Accept ' : ' application/json ' }
2023-07-01 22:16:35 +02:00
self . _protocol_wordpress = " https "
self . _protocol_canalblog = " https "
2023-06-30 23:28:54 +02:00
self . _directory = " backup "
2023-06-30 23:52:56 +02:00
2023-06-30 23:28:54 +02:00
if ssl_wordpress is False :
2023-06-30 23:52:56 +02:00
self . _protocol_wordpress = " http "
if ssl_canalblog is False :
self . _protocol_canalblog = " http "
2023-07-01 22:16:35 +02:00
self . _request_canalblog = requests . Session ( )
self . _request_wordpress = requests . Session ( )
2023-06-30 23:28:54 +02:00
retries = Retry ( connect = 10 , read = 10 , redirect = 5 ,
status_forcelist = [ 429 , 500 , 502 , 503 , 504 ] , backoff_factor = 2 )
2023-07-01 22:16:35 +02:00
self . _request_canalblog . mount ( ' {0} :// ' . format ( self . _protocol_canalblog ) , HTTPAdapter ( max_retries = retries ) )
self . _request_wordpress . mount ( ' {0} :// ' . format ( self . _protocol_wordpress ) , HTTPAdapter ( max_retries = retries ) )
2023-06-30 23:28:54 +02:00
# Destructor
def __del__ ( self ) :
2023-06-30 23:52:56 +02:00
print ( " {0} : Import finished for {1} " . format ( self . _name , self . _wordpress ) )
2023-07-01 21:56:10 +02:00
# Public method
## From file
def fromFile ( self , files ) :
if os . path . exists ( files ) :
with open ( files , ' r ' ) as f :
self . _logger . info ( " {0} : File is being processed : {1} " . format ( self . _name , files ) )
content = f . read ( )
self . _menu ( content )
else :
self . _logger . error ( " {0} : File isn ' t exist : {1} " . format ( self . _name , files ) )
2023-06-30 23:52:56 +02:00
## Get from URL
2023-07-01 21:56:10 +02:00
def fromUrl ( self , canalblog ) :
2023-06-30 23:52:56 +02:00
try :
2023-07-01 21:56:10 +02:00
o = urlparse ( canalblog )
2023-06-30 23:52:56 +02:00
o = o . _replace ( scheme = self . _protocol_canalblog )
i = o . geturl ( ) . replace ( " :/// " , " :// " )
2023-07-01 22:16:35 +02:00
page = self . _request_canalblog . get ( i )
2023-06-30 23:52:56 +02:00
if page . status_code == 200 :
2023-07-01 21:56:10 +02:00
self . _logger . info ( " {0} : Page web is being processed : {1} " . format ( self . _name , i ) )
self . _menu ( page . content )
2023-06-30 23:52:56 +02:00
else :
self . _logger . error ( " {0} : index didn ' t get due status code : {1} " . format ( self . _name , page . status_code ) )
self . _logger . debug ( " {0} : {1} " . format ( self . _name , page . content ) )
except ConnectionError as err :
2023-07-01 22:16:35 +02:00
self . _logger . error ( " {0} : Connection error for get url {1} : {2} " . format ( self . _name , canalblog , err ) )
2023-06-30 23:52:56 +02:00
exit ( 1 )
except Exception as err :
2023-07-01 22:16:35 +02:00
self . _logger . error ( " {0} : Exception error for get url {1} : {2} " . format ( self . _name , canalblog , err ) )
2023-07-01 21:56:10 +02:00
2023-07-09 19:11:05 +02:00
def _getId ( self , title , parent ) :
2023-07-09 15:27:20 +02:00
menu = { " id " : " " , " type " : " " , " title " : " " , " parent " : " " , " children " : [ ] }
exist = False
for i in [ " categories " , " tags " ] :
if exist is False :
try :
params = { " search " : title , " per_page " : " 100 " }
page = self . _request_wordpress . get ( " {2} :// {0} /wp-json/wp/v2/ {1} " . format ( self . _wordpress , i , self . _protocol_wordpress ) , auth = self . _basic , params = params )
if page . status_code == 200 :
result = page . json ( )
if len ( result ) > 0 :
menu = { " id " : result [ 0 ] [ " id " ] , " type " : i , " title " : title , " parent " : parent , " children " : [ ] }
exist = True
else :
self . _logger . error ( " {0} : {2} didn ' t get due status code : {1} " . format ( self . _name , page . status_code , i ) )
self . _logger . debug ( " {0} : {1} " . format ( self . _name , page . content ) )
except ConnectionError as err :
self . _logger . error ( " {0} : Connection error for get url {1} : {2} " . format ( self . _name , " {2} :// {0} /wp-json/wp/v2/ {1} " . format ( self . _wordpress , i , self . _protocol_wordpress ) , err ) )
exit ( 1 )
except Exception as err :
self . _logger . error ( " {0} : Exception error for get url {1} : {2} " . format ( self . _name , " {2} :// {0} /wp-json/wp/v2/ {1} " . format ( self . _wordpress , i , self . _protocol_wordpress ) , err ) )
return menu
2023-07-01 21:56:10 +02:00
def _menu ( self , content ) :
2023-07-01 22:16:35 +02:00
soup = BeautifulSoup ( content , self . _parser )
2023-07-09 15:27:20 +02:00
ul = soup . find ( " ul " , id = " listsmooth " )
2023-07-09 19:11:05 +02:00
menu = list ( )
2023-07-06 00:42:47 +02:00
parents = list ( )
2023-07-09 15:27:20 +02:00
for anchor in ul . find_all ( " li " ) :
2023-07-09 19:11:05 +02:00
parent = anchor . find ( " a " ) . get_text ( ) . replace ( " \xa0 " , " " )
itemMenu = { " id " : " " , " type " : " " , " title " : parent , " parent " : parent , " children " : [ ] }
itemChild = { " id " : " " , " type " : " " , " title " : parent , " parent " : parent }
2023-07-06 00:42:47 +02:00
href = anchor . find ( " a " ) . get ( " href " )
2023-07-09 19:11:05 +02:00
#menu = self._child(parent, parent)
2023-07-06 00:42:47 +02:00
if href == " # " :
2023-07-09 15:27:20 +02:00
li = anchor . find ( " ul " ) . find_all ( " li " )
2023-07-06 00:42:47 +02:00
for child in li :
a = child . find ( " a " )
2023-07-09 19:11:05 +02:00
self . _logger . info ( " {0} Parent {1} : Child {2} " . format ( self . _name , parent , a . get_text ( ) ) )
itemChild [ " title " ] = a . get_text ( )
itemChild [ " parent " ] = parent
itemMenu [ " children " ] . append ( itemChild )
menu . append ( itemMenu )
2023-07-06 00:42:47 +02:00
2023-07-09 19:11:05 +02:00
for i in range ( 0 , len ( menu ) - 1 ) :
self . _logger . info ( " {0} Menu : {1} " . format ( self . _name , menu [ i ] ) )
#for j in menu[i]["children"]:
# if menu[i]["title"] == j["title"]:
# del menu[i]