87 Commits

Author SHA1 Message Date
dce2c2dfa5 Merge pull request 'fix webpage' (#25) from fix-export into master
Reviewed-on: #25
2023-09-05 20:10:07 +00:00
eaec1ba9d4 fix webpage 2023-09-05 22:06:14 +02:00
3059f785c2 Merge pull request 'album-plus' (#24) from album-plus into master
Reviewed-on: #24
2023-09-01 22:27:57 +00:00
279a9f2786 fix parameter author 2023-09-02 00:27:18 +02:00
963f83ae81 fix author 2023-09-02 00:26:50 +02:00
7b154e3a1d add author 2023-08-31 22:50:31 +02:00
e5109204aa get link with album 2023-08-30 23:45:16 +02:00
2279e4b0b6 search title album 50% 2023-08-30 22:39:59 +02:00
2e21040196 add private method get info album 2023-08-29 22:26:15 +02:00
b4d0fe8aa0 Merge pull request 'album' (#23) from album into master
Reviewed-on: #23
2023-08-25 21:47:47 +00:00
6401692d0d finish menu with album import 2023-08-25 23:46:43 +02:00
1fc9c48d2c fix add menu with album 2023-08-24 22:43:15 +02:00
d9c20cedcb add menu 2023-08-24 00:01:46 +02:00
9d41e57379 add method getAuthor 2023-08-23 23:35:35 +02:00
d88ae7ed44 add author 2023-08-23 22:48:38 +02:00
50bf31d334 HTTP Post for album 2023-08-22 00:21:14 +02:00
9b58b45ae8 replace string 2023-08-20 21:41:16 +02:00
418bea3778 add album test wip 2023-08-17 00:18:45 +02:00
5959ab5b2e add galery wip 2023-08-12 00:06:09 +02:00
e4eb1b6b68 add image to media wordpress 2023-08-05 23:22:44 +02:00
9ed5ffe399 add variable 2023-08-05 12:28:33 +02:00
28b513e1b2 add private method album 2023-08-05 12:03:17 +02:00
02f0c20bd0 Merge pull request 'menu-api' (#22) from menu-api into master
Reviewed-on: #22
2023-08-04 21:22:38 +00:00
1655217050 add album article 2023-08-04 23:21:45 +02:00
49c1552062 add wpmenu 2023-07-31 23:57:35 +02:00
7f800c8f7b add menu 100% work 2023-07-31 23:39:40 +02:00
5399b12133 menu wordpress 75% 2023-07-27 00:39:15 +02:00
b7493206a2 add private method addItemMenu 2023-07-26 19:22:43 +02:00
8de7485775 return value menu 2023-07-26 19:18:18 +02:00
eee14e08f1 add update menu item 2023-07-24 23:51:41 +02:00
cf2c1aa617 add item menu 2023-07-24 23:32:20 +02:00
e17cace820 remove navigation 2023-07-24 22:51:40 +02:00
941776a7c1 Create menu 2023-07-24 22:43:52 +02:00
72fbe0a364 add menu WIP debug 2023-07-22 20:23:26 +02:00
1f8ea70b40 define content for navigation 2023-07-20 00:04:11 +02:00
c6894648d1 add link 2023-07-18 22:09:20 +02:00
d5ddf1601b add post fine 2023-07-17 23:20:47 +02:00
aa8ac9cfcb first test id from post 2023-07-15 14:00:23 +02:00
82f9acd658 get title from post 2023-07-15 13:55:01 +02:00
7593b68b6c getting post title 2023-07-15 11:51:15 +02:00
5fe4b1f786 add href 2023-07-13 23:53:54 +02:00
0445054dc8 fix get id menu 2023-07-13 22:35:38 +02:00
be9ad9b934 swap loop 2023-07-13 00:25:14 +02:00
e6328135da get id 2023-07-13 00:09:18 +02:00
dded126926 change return getId 2023-07-11 21:42:50 +02:00
bca529f3c3 parent and children array 2023-07-11 21:39:41 +02:00
5059a15826 menu and submenu 2023-07-11 00:17:24 +02:00
20c4adb3cf menu and sub-menu wip 2023-07-09 21:42:09 +02:00
74fa87ea73 list menu wip 2023-07-09 19:11:05 +02:00
03f833a2c3 Menu and sub-menu 2023-07-09 15:27:20 +02:00
9acb620f93 add condition if 2023-07-09 12:29:13 +02:00
c6ccf98b1b working menu 2023-07-06 00:42:47 +02:00
95f5203727 list menu with id 2023-07-04 00:26:27 +02:00
9bbf769b40 test WIP 2023-07-01 22:16:35 +02:00
f0b4ba5e27 add url and file 2023-07-01 21:56:10 +02:00
47f504beb5 list tag 2023-07-01 00:11:43 +02:00
4b6b06aade add public method from URL 2023-06-30 23:52:56 +02:00
d10867a983 add class WPMenu 2023-06-30 23:28:54 +02:00
6fba5f009a Merge pull request 'directory-file' (#21) from directory-file into master
Reviewed-on: #21
2023-06-28 21:30:20 +00:00
699cecad4f change with tmp files 2023-06-28 23:28:24 +02:00
9f87f38347 fix file tmp for directory 2023-06-28 23:03:27 +02:00
55d62cebfb separate files method 2023-06-27 14:48:48 +02:00
193b0e6ef7 add tmp files wip 2023-06-27 14:37:45 +02:00
b88917127d Merge pull request 'webpage-file' (#20) from webpage-file into master
Reviewed-on: #20
2023-06-26 22:28:26 +00:00
781d8959c4 fix tmp directory parameter 2023-06-27 00:25:23 +02:00
a67ff868f3 fix json read file 2023-06-26 23:52:03 +02:00
8e0abc40bd check files tmp 2023-06-26 23:09:54 +02:00
9149a6c5cb rollback webpage 2023-06-26 22:44:42 +02:00
d1b6e8048a add revert files json 2023-06-25 21:16:05 +02:00
0eab1d885b add open file tmp 2023-06-20 21:38:39 +02:00
35ff22d463 change parameter for webpage 2023-06-20 00:17:38 +02:00
7dace5bdb7 add file tmp 2023-06-19 23:58:59 +02:00
703cc8922a Merge pull request 'diff-img' (#19) from diff-img into master
Reviewed-on: #19
2023-06-16 22:08:50 +00:00
ff3ee301fb diff img path done 2023-06-15 00:10:44 +02:00
04da5bc5f6 diff path network 2023-06-13 22:00:51 +02:00
f01a69a1e7 Merge pull request 'wpchange' (#18) from wpchange into master
Reviewed-on: #18
2023-06-12 22:48:57 +00:00
da4db0277a add img a change 2023-06-13 00:46:18 +02:00
7228911e68 add js and css 2023-06-13 00:38:34 +02:00
9e7e1b27fd change WIP test 2023-06-11 20:24:22 +02:00
16368c13bb add WPChange 2023-06-10 01:58:08 +02:00
c631909cb6 WPchange wip 2023-06-06 00:22:16 +02:00
3e76892676 add wpchange 2023-06-05 23:46:57 +02:00
3e75f05340 Merge pull request 'add-parameter' (#17) from add-parameter into master
Reviewed-on: #17
2023-06-05 20:58:51 +00:00
e48b262d7e add parameter no-image 2023-06-03 09:07:33 +02:00
2f1c081823 add parameter 2023-06-01 15:28:48 +02:00
4bd6f5c038 Merge pull request 'add wait' (#16) from wait_remove into master
Reviewed-on: #16
2023-05-29 21:36:38 +00:00
d3a03e1cb3 add wait 2023-05-29 23:36:11 +02:00
6 changed files with 1124 additions and 171 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@ backup*/
wp-navigation wp-navigation
*.log *.log
__pycache__/ __pycache__/
wp-gallery

View File

@@ -3,12 +3,32 @@ from requests.auth import HTTPBasicAuth
from getpass import getpass from getpass import getpass
from urllib.parse import urlparse from urllib.parse import urlparse
from concurrent import futures from concurrent import futures
from concurrent.futures import as_completed, wait from concurrent.futures import as_completed, wait, ALL_COMPLETED
import argparse, logging, threading import argparse, logging, threading, os, glob
from lib.WPImport import WPimport from lib.WPImport import WPimport
from lib.WPExport import WPExport from lib.WPExport import WPExport
from lib.WPRemove import WPRemove from lib.WPRemove import WPRemove
from lib.WPChange import WPChange
from lib.WPMenu import WPMenu
def errorRevert(logger, revert, tmp):
if revert is True:
files_tmp = glob.glob("{0}/*.json".format(tmp))
if len(files_tmp) == 0:
logger.error("Error revert, because files not found")
exit(1)
if len(files_tmp) != int(args.parallel):
for file_r in files_tmp:
os.remove(file_r)
logger.error("Error revert, because number files tmp is incompatible with parallel number")
exit(1)
def change(index, number, args, logger, tmp, revert):
changeWp = WPChange(logger=logger, index_name=index, number_thread=number, tmp=tmp)
changeWp.fromDirectory(args.directory, revert)
del changeWp
def remove(index, number, args, basic, logger, ssl_wordpress): def remove(index, number, args, basic, logger, ssl_wordpress):
removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress, index_name=index, number_thread=number) removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress, index_name=index, number_thread=number)
@@ -33,21 +53,21 @@ def remove(index, number, args, basic, logger, ssl_wordpress):
del removeWp del removeWp
def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog): def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp):
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread) if revert is False:
exportWp.getUrlPage(name_thread, max_thread)
for i in ["article", "page"]: for i in ["article", "page"]:
for j in ["publications", "principal"]: for j in ["publications", "principal"]:
if html is False: if html is False:
exportWp.downloadHTML(webpage[j][i]) exportWp.downloadHTML(j, i)
if img is False: if img is False:
exportWp.downloadImg(webpage[j][i]) exportWp.downloadImg(j, i)
del exportWp
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp, author):
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog):
canalblog = canalblog.split(",") canalblog = canalblog.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
@@ -63,14 +83,15 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
except Exception as err: except Exception as err:
logger.error("{0} : parsing error : {1}".format(name, err)) logger.error("{0} : parsing error : {1}".format(name, err))
exit(1) exit(1)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog, tmp=tmp)
webpage = exportWp.getUrlPage(name_thread, max_thread) if not revert:
exportWp.getUrlPage(name_thread, max_thread)
del exportWp del exportWp
for j in wordpress: for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress) importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp, author=author)
for k in ["article", "page"]: for k in ["article", "page"]:
for l in ["publications", "principal"]: for l in ["publications", "principal"]:
importWp.fromUrl(webpage[l][k]) importWp.fromUrl(l, k)
del importWp del importWp
else: else:
@@ -86,9 +107,10 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
logger.error("parsing error : {0}".format(err)) logger.error("parsing error : {0}".format(err))
exit(1) exit(1)
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog) exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread) if not revert:
exportWp.getUrlPage(name_thread, max_thread)
del exportWp del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp, author=author)
for k in ["article", "page"]: for k in ["article", "page"]:
for l in ["publications", "principal"]: for l in ["publications", "principal"]:
@@ -97,24 +119,24 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
del importWp del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress): def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image, revert, author):
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",") directory = directory.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
if serial is False: if serial is False:
for i in wordpress: for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress) importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, author=author)
for j in directory: for j in directory:
importWp.fromDirectory(j, name_thread, max_thread) importWp.fromDirectory(j, name_thread, max_thread, revert)
del importWp del importWp
else: else:
if len(directory) != len(wordpress): if len(directory) != len(wordpress):
logger.error("{0} : Error : Number directory is differant than wordpress".format(name)) logger.error("{0} : Error : Number directory is different than wordpress".format(name))
exit(1) exit(1)
for i in range(0, len(wordpress)-1): for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, author=author)
importWp.fromDirectory(directory[i]) importWp.fromDirectory(directory[i], name_thread, max_thread, revert)
del importWp del importWp
@@ -127,6 +149,8 @@ if __name__ == '__main__':
parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parser", help="Parser content", default="html.parser")
parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1) parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1)
parser.add_argument("--no-ssl", help="No ssl for canalblog and/or wordpress (example wordpress,canalblog)", dest="ssl", default="") parser.add_argument("--no-ssl", help="No ssl for canalblog and/or wordpress (example wordpress,canalblog)", dest="ssl", default="")
parser.add_argument("--revert", help="Restart a work from stopping work", action="store_true")
parser.add_argument("--tmp", help="directory tmp", default="/tmp/import_export_canablog")
subparsers = parser.add_subparsers(dest="command") subparsers = parser.add_subparsers(dest="command")
@@ -143,6 +167,11 @@ if __name__ == '__main__':
import_parser.add_argument("--remove-categories", help="Remove all categories", dest="categories", action="store_true") import_parser.add_argument("--remove-categories", help="Remove all categories", dest="categories", action="store_true")
import_parser.add_argument("--remove-tags", help="Remove all tags", dest="tags", action="store_true") import_parser.add_argument("--remove-tags", help="Remove all tags", dest="tags", action="store_true")
import_parser.add_argument("--remove-media", help="Remove all media", dest="media", action="store_true") import_parser.add_argument("--remove-media", help="Remove all media", dest="media", action="store_true")
import_parser.add_argument("--no-create", help="No create post", dest="create", default="store_false", action="store_true")
import_parser.add_argument("--no-update", help="No update post", dest="update", default="store_false", action="store_true")
import_parser.add_argument("--no-image", help="No image add or update", dest="image", default="store_false", action="store_true")
import_parser.add_argument("--author", dest="author", help="Define author", default="")
remove_parser = subparsers.add_parser("remove") remove_parser = subparsers.add_parser("remove")
@@ -168,7 +197,20 @@ if __name__ == '__main__':
export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true") export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true")
export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true") export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true")
change_parser = subparsers.add_parser("change")
change_parser.add_argument("--directory",
default="",
help="Directory")
change_parser.add_argument("--file",
default="",
help="File")
menu_parser = subparsers.add_parser("menu")
menu_parser.add_argument("--user", help="wordpress user", required=True)
menu_parser.add_argument("--password", help="password wordpress's user", default="")
menu_parser.add_argument("--file", help="HTML file", default="")
menu_parser.add_argument("--canalblog", help="URL Canalblog", default="")
menu_parser.add_argument("--wordpress", help="URL Wordpress", required=True)
args = parser.parse_args() args = parser.parse_args()
@@ -204,7 +246,9 @@ if __name__ == '__main__':
fileHandler.setFormatter(formatter) fileHandler.setFormatter(formatter)
logger.addHandler(fileHandler) logger.addHandler(fileHandler)
if args.command == "import" or args.command == "remove": os.makedirs(args.tmp, exist_ok=True)
if args.command == "import" or args.command == "remove" or args.command == "menu":
password = args.password password = args.password
if len(args.password) == 0: if len(args.password) == 0:
password = getpass() password = getpass()
@@ -215,29 +259,44 @@ if __name__ == '__main__':
basic = HTTPBasicAuth(args.user, password) basic = HTTPBasicAuth(args.user, password)
if args.command == "import": if args.command == "import":
wordpress = args.wordpress.split(",") wordpress = args.wordpress.split(",")
importWp = WPimport(basic=basic, wordpress="", logger=logger, parser=args.parser, ssl_wordpress=ssl_wordpress) importWp = WPimport(basic=basic, wordpress="", logger=logger, parser=args.parser, ssl_wordpress=ssl_wordpress, author=args.author, ssl_canalblog=ssl_canalblog)
if len(args.file) > 0: if len(args.file) > 0:
for i in wordpress: for i in wordpress:
importWp.setUrl(i) importWp.setUrl(i)
importWp.fromFile(files=args.file.split(",")) importWp.fromFile(files=args.file.split(","))
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
menuWp.fromFile("{0}".format(args.file.split(",")[0]))
if len(args.directory) > 0: if len(args.directory) > 0:
remove(args, basic, logger, ssl_wordpress)
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED)
errorRevert(logger, args.revert, args.tmp)
wait_for = [ wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress) ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image, args.revert, args.author)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
wait(wait_for, return_when=ALL_COMPLETED)
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
menuWp.fromFile("{0}/index.html".format(args.directory))
except Exception as err: except Exception as err:
logger.error("Threading error : {0}".format(err)) logger.error("Threading error : {0}".format(err))
if len(args.canalblog) > 0: if len(args.canalblog) > 0:
remove(args, basic, logger, ssl_wordpress)
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED)
errorRevert(logger, args.revert, args.tmp)
wait_for = [ wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog) ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, args.tmp, args.author)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
wait(wait_for, return_when=ALL_COMPLETED)
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
menuWp.fromUrl(args.canalblog)
except Exception as err: except Exception as err:
logger.error("Threading error : {0}".format(err)) logger.error("Threading error : {0}".format(err))
exit(0) exit(0)
@@ -271,7 +330,7 @@ if __name__ == '__main__':
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ wait_for = [
ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog) ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog, args.revert, args.tmp)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
except Exception as err: except Exception as err:
@@ -286,3 +345,25 @@ if __name__ == '__main__':
except Exception as err: except Exception as err:
logger.error("Thread error for remove : {0}".format(err)) logger.error("Thread error for remove : {0}".format(err))
exit(0) exit(0)
if args.command == "change":
if len(args.directory) > 0:
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
errorRevert(logger, args.revert, args.tmp)
wait_for = [ ex.submit(change, i, args.parallel, args, logger, args.tmp, args.revert) for i in range(0, int(args.parallel)) ]
except Exception as err:
logger.error("Thread error for remove : {0}".format(err))
if len(args.file) > 0:
changeWp = WPChange(logger=logger)
for filei in args.file.split(","):
changeWp.fromFile(filei)
exit(0)
if args.command == "menu":
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
if len(args.file) > 0:
menuWp.fromFile(args.file)
if len(args.canalblog) > 0:
menuWp.fromUrl(args.canalblog)
exit(0)

173
lib/WPChange.py Normal file
View File

@@ -0,0 +1,173 @@
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
class WPChange:
# Constructor
def __init__(self, index_name=1, number_thread=1, logger=None, parser="html.parser", tmp="/tmp/import_export_canablog"):
self._name = "Thread-{0}".format(index_name)
self._logger = logger
self._number_thread = number_thread
self._parser = parser
self._tmp = tmp
self._index_name = index_name
# Destructor
def __del__(self):
print("{0} : Import finished".format(self._name))
# Public method
## from file
def fromFile(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i]))
self._change(files[i])
## From directory
def fromDirectory(self, directory="", revert=False):
self._directory = directory
directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0:
files = self._getFiles(directories)
if revert is False:
self._tmpFiles(files=files, number_thread=self._index_name, max_thread=self._number_thread)
self._fromFileTmp()
else:
self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files=[]):
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
self._change(files[i])
# Private method
def _fromFileTmp(self):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
files = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(files)))
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
self._change(files[i])
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def _tmpFiles(self, files=[], number_thread=1, max_thread=1):
print()
divFiles = int(len(files) / int(max_thread))
currentRangeFiles = int(divFiles * (int(number_thread)+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
webpage = []
for i in range(firstRange, currentRangeFiles):
webpage.append(files[i])
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
## Get all files
def _getFiles(self, item):
files = []
for i in item:
for j in os.listdir(i):
if os.path.isfile("{0}/{1}".format(i, j)):
files.append("{0}/{1}".format(i, j))
return files
## Get directories
def _getDirectories(self, subdirectory, item):
sub = subdirectory
for i in os.listdir(item):
if os.path.isdir("{0}/{1}".format(item, i)):
sub.append("{0}/{1}".format(item, i))
subdirectory = self._getDirectories(sub, "{0}/{1}".format(item, i))
return subdirectory
## Change path img file
def _change(self, file):
ext_img = ["png", "svg", "gif", "jpg", "jpeg"]
try:
with open(file, 'r') as f:
content = f.read()
soup = BeautifulSoup(content, self._parser)
img = soup.find_all("img")
for i in img:
src = i.get("src")
o = urlparse(src)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source image {1} /img/{2}/{3}".format(self._name, src, o.netloc, o.path))
content = content.replace(src, "/img/{0}/{1}".format(o.netloc, o.path))
script = soup.find_all("script", {"type": "text/javascript"})
for i in script:
src = i.get("src")
if src is not None:
o = urlparse(src)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source js {1} /dists/js/{2}/{3}".format(self._name, src, o.netloc, o.path))
content = content.replace(src, "/dists/js/{0}/{1}".format(o.netloc, o.path))
link = soup.find_all("link", {"rel": "stylesheet"})
for i in link:
href = i.get("href")
if href is not None:
o = urlparse(href)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source css {1} /dists/css/{2}/{3}".format(self._name, href, o.netloc, o.path))
content = content.replace(href, "/dists/css/{0}/{1}".format(o.netloc, o.path))
a = soup.find_all("a", {"target": "_blank"})
for i in a:
href = i.get("href")
if href is not None:
o = urlparse(href)
if len(o.netloc) > 0:
ext = o.path.split(".")[len(o.path.split("."))-1]
if ext in ext_img:
self._logger.info("{0} : Change a img {1} /img/{2}/{3}".format(self._name, href, o.netloc, o.path))
content = content.replace(href, "/img/{0}/{1}".format(o.netloc, o.path))
try:
with open(file, "w") as f:
self._logger.info("{0} : File write : {1}".format(self._name, file))
f.write(content)
except Exception as ex:
self._logger.error("{0} : Error for write file {1} : {2}".format(self._name, file, ex))
except Exception as ex:
self._logger.error("{0} : Error for read file {1} : {2}".format(self._name, file, ex))

View File

@@ -1,12 +1,12 @@
#!/usr/bin/python3 #!/usr/bin/python3
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
import requests, os, argparse, logging import requests, os, argparse, logging, json
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry
class WPExport: class WPExport:
def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True): def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True, tmp="/tmp/import_export_canablog"):
self._url = url self._url = url
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
@@ -22,7 +22,7 @@ class WPExport:
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries)) self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
self._tmp = tmp
# Destructor # Destructor
def __del__(self): def __del__(self):
@@ -57,15 +57,27 @@ class WPExport:
# Download HTML # Download HTML
def downloadHTML(self, webpage): def downloadHTML(self, first, second):
self._downloadPage(webpage, self._dir) try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage = json.loads(file.read())
self._downloadPage(webpage[first][second], self._dir)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
# Download Image # Download Image
def downloadImg(self, webpage): def downloadImg(self, first, second):
page_src = self._getImg(webpage) try:
o = urlparse(self._url) with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img")) webpage = json.loads(file.read())
page_src = self._getImg(webpage[first][second])
o = urlparse(self._url)
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img"))
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
# Get URL # Get URL
@@ -161,7 +173,14 @@ class WPExport:
except Exception as err: except Exception as err:
self._logger.error("{0} : Exception error : {1}".format(self._name, err)) self._logger.error("{0} : Exception error : {1}".format(self._name, err))
exit(1) exit(1)
return webpage try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))

View File

@@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport: class WPimport:
# Constructor # Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True): def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog", author="", ssl_canalblog=True):
self._name = name self._name = name
self._basic = basic self._basic = basic
self._wordpress = wordpress self._wordpress = wordpress
@@ -16,15 +16,20 @@ class WPimport:
self._parser = parser self._parser = parser
self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'} self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
self._protocol = "https" self._protocol = "https"
self._directory = "backup"
if ssl_wordpress is False: if ssl_wordpress is False:
self._protocol = "http" self._protocol = "http"
self._request = requests.Session() self._request = requests.Session()
self._ssl_canalblog = ssl_canalblog
retries = Retry(connect=10, read=10, redirect=5, retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries)) self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
self._no_create = no_create
self._no_update = no_update
self._no_image = no_image
self._tmp = tmp
self._author = author
# Destructor # Destructor
def __del__(self): def __del__(self):
@@ -35,50 +40,62 @@ class WPimport:
def setUrl(self, wordpress): def setUrl(self, wordpress):
self._wordpress = wordpress self._wordpress = wordpress
def fromUrl(self, webpage): def fromUrl(self, first, second):
for i in range(0, len(webpage)): try:
try: with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
r = self._request.get(webpage[i]) webpage_content = json.loads(file.read())
if r.status_code == 200: self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(webpage_content)))
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i])) webpage = webpage_content[first][second]
soup = BeautifulSoup(r.content, self._parser) for i in range(0, len(webpage)):
articlebody = soup.find_all("div", class_="articlebody") try:
if len(articlebody) > 0: r = self._request.get(webpage[i])
self._addOrUpdatePost(soup) if r.status_code == 200:
else: self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
self._addOrUpdateFeaturedMedia(soup) soup = BeautifulSoup(r.content, self._parser)
else: articlebody = soup.find_all("div", class_="articlebody")
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code)) if len(articlebody) > 0:
self._logger.debug("{0} : {1}".format(self._name, r.content)) self._addOrUpdatePost(soup)
except ConnectionError as err: else:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err)) albumbody = soup.find_all("div", class_="albumbody")
exit(1) if len(albumbody) > 0:
except Exception as err: self._addOrUpdateAlbum(soup)
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err)) else:
self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i]
webpage_content = json.dumps(webpage_content)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
else:
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except IOError as err:
self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def fromDirectory(self, directory="", number_thread=1, max_thread=1): def fromDirectory(self, directory="", number_thread=1, max_thread=1, revert=False):
self._directory = directory
directory = "{0}/archives".format(directory) directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory)) directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0: if len(directories) > 0:
files = self._getFiles(directories) files = self._getFiles(directories)
self.fromFile(files, number_thread, max_thread) if revert is False:
self._tmpFiles(files=files, number_thread=number_thread, max_thread=max_thread)
self._fromFileTmp()
else: else:
self._logger.error("{0} : No files for {1}".format(self._name, directory)) self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files=[], number_thread=1, max_thread=1): def fromFile(self, files=[]):
divFiles = int(len(files) / max_thread) for i in range(0, len(files)):
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
if os.path.exists(files[i]): if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i])) self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
with open(files[i], 'r') as f: with open(files[i], 'r') as f:
content = f.read() content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content))) self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
@@ -88,11 +105,278 @@ class WPimport:
if len(articlebody) > 0: if len(articlebody) > 0:
self._addOrUpdatePost(soup) self._addOrUpdatePost(soup)
else: else:
self._addOrUpdateFeaturedMedia(soup) albumbody = soup.find_all("div", class_="albumbody")
if len(albumbody) > 0:
self._addOrUpdateAlbum(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
# Private method # Private method
def _getAuthor(self, author):
params = {"search":author, "per_page":100}
author = 0
try:
self._logger.info("{0} : Search author : {1}".format(self._name, author))
page = self._request.get("{1}://{0}/wp-json/wp/v2/users".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
self._logger.debug("{0} : End Search author : {1}".format(self._name, author))
self._logger.debug("{0} : Debug requests : {1}".format(self._name, page.content))
if page.status_code == 200:
self._logger.info("{0} : Get author : {1}".format(self._name, author))
result = page.json()
for a in result:
author = a["id"]
else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err))
return author
def _getInfoAlbum(self, link):
if self._ssl_canalblog:
link = link.replace("http", "https")
self._logger.info("{0} : Info album : {1}".format(self._name, link))
link_o = urlparse(link)
if len(link_o.netloc) > 0:
self._logger.info("{0} : get album info from web : {1}".format(self._name, link_o))
try:
response = self._request.get(link)
if response.status_code == 200:
self._logger.info("{0} : get content info from web : {1}".format(self._name, link))
page_img = response.content
except ConnectionError as err:
self._logger.error("{0} : Connection error for get album info : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get album info : {1}".format(self._name, err))
exit(1)
else:
self._logger.info("{0} : get album info from file : {1}".format(self._name, link_o))
if os.path.exists("{0}/..{1}".format(self._directory, link_o)):
page_img = open("{0}/..{1}".format(self._directory, link_o), "r")
soup = BeautifulSoup(page_img, self._parser)
paragraph = soup.find("div", class_="albumbody").find("p")
self._logger.info("{0} get paragraph : {1}".format(self._name, paragraph))
split_paragraph = str(paragraph).split("<br>")
self._logger.info("{0} length paragraph splitted : {1}".format(self._name, len(split_paragraph)))
if len(split_paragraph) == 1:
split_paragraph = str(paragraph).split("<br/>")
self._logger.info("{0} get paragraph splitted : {1}".format(self._name, split_paragraph))
author = split_paragraph[1].split(":")[1].replace(" ", "").lower()
return author
def _addOrUpdateAlbum(self, soup):
self._logger.info("{0} : Add/Update Album".format(self._name))
albumbody = soup.find("div", class_="albumbody")
albumtitle = albumbody.find("h2").get_text()
self._logger.debug("{0} : Title of the album : {1}".format(self._name, albumtitle))
albumdesc = albumbody.find("div", class_="albumdesc").find("p")
img_a = albumbody.find_all("img")
list_img = []
page_is_exist = False
if self._no_image is False:
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img_a)))
for i in img_a:
new_img = {}
href_img = i.get("src")
href_img_o = urlparse(href_img)
try:
if len(href_img_o.netloc) > 0:
img_ok = False
href_img = href_img.replace("_q", "_o")
page_img = self._request.get(href_img)
if page_img.status_code == 200:
img_ok = True
else:
if os.path.exists("{0}/..{1}".format(self._directory, href_img)):
page_img = open("{0}/..{1}".format(self._directory, href_img), "r")
img_ok = True
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if img_ok is True:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} content img : {1}".format(self._name, list_img))
content_html = ""
if len(list_img) > 0:
content_html = "<!-- wp:paragraph -->\n{0}\n<!-- /wp:paragraph -->\n\n".format(albumdesc)
for i in range(0, len(list_img)):
content_html = content_html + "<!-- wp:image {\"id\":id-image,\"sizeSlug\":\"large\",\"linkDestination\":\"none\"} --><figure class=\"wp-block-image size-large\"><img src=\"src-image\" alt=\"\" class=\"wp-image-id-image\"/></figure><!-- /wp:image -->\n\n".replace("id-image", str(list_img[i]["id"])).replace("src-image", list_img[i]["new_src"])
self._logger.info("{0} : content html : {1}".format(self._name, content_html))
if len(content_html) > 0:
data = {"title":albumtitle, "content":content_html, "status":"publish"}
if len(self._author) > 0:
author = self._getAuthor(self._author)
else:
link_a = albumbody.find_all("a")
for i in link_a:
if re.search(r"/albums/", i.get("href", "/")):
href_a = i.get("href", "/")
break
author = self._getInfoAlbum(href_a)
self._logger.info("{0} : author : {1}".format(self._name, author))
author = self._getAuthor(author)
data = {"title":albumtitle, "content":content_html, "status":"publish"}
if author != 0:
data = {"title":albumtitle, "content":content_html, "status":"publish", "author":author}
self._logger.debug("{0} : data for album page : {1}".format(self._name, data))
for index in range(1,10):
params = {"search": albumtitle, "per_page":100, "page": index}
try:
self._logger.info("{0} : Search post with index {2} : {1}".format(self._name, albumtitle, index))
page = self._request.get("{1}://{0}/wp-json/wp/v2/pages".format(self._wordpress, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
if page.status_code == 200:
self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding))
page.encoding = "utf-8"
result = page.json()
if len(result) == 0:
break
self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result)))
count = 0
for i in result:
title_rendered = i["title"]["rendered"]
self._logger.info("{0} : Search title pages for |{2}| : |{1}|".format(self._name, title_rendered, albumtitle))
if len(title_rendered) != len(albumtitle):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : Search title pages for |{2}| : |{1}|".format(self._name, title_rendered, albumtitle))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(albumtitle), len(title_rendered)))
if title_rendered == albumtitle:
if self._no_update is False:
page_is_exist = True
post_id = i["id"]
count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, albumtitle))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/pages/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
if page.status_code == 200:
self._logger.info("{0} : Page deleted : {1}".format(self._name, albumtitle))
else:
self._logger.error("{0} : Page not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted page : {1}".format(self._name, err))
else:
self._logger.debug("{0} : Data for page to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, albumtitle))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/pages/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : page updated : {1}".format(self._name, albumtitle))
else:
self._logger.error("{0} : page not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for update page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update page : {1}".format(self._name, err))
if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else:
self._logger.error("{0} : Connection for update page error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for search page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for search page : {1}".format(self._name, err))
if page_is_exist is False and self._no_create is False:
try:
self._logger.info("{0} : Creating page : {1}".format(self._name, data["title"]))
page = self._request.post("{1}://{0}/wp-json/wp/v2/pages".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 201:
result = page.json()
self._logger.info("{0} : page added : {1}".format(self._name, result["title"]["raw"]))
else:
self._logger.error("{0} : page not added due status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for create page : {1}".format(self._name, err))
def _fromFileTmp(self):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
files = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(files)))
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
with open(files[i], 'r') as f:
content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
soup = BeautifulSoup(content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def _tmpFiles(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
webpage = []
for i in range(firstRange, currentRangeFiles):
webpage.append(files[i])
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
## replace caracter ## replace caracter
def _replaceCaracter(self, title_rendered): def _replaceCaracter(self, title_rendered):
@@ -435,48 +719,64 @@ class WPimport:
img_a = articlebody[0].find_all("a", {"target": "_blank"}) img_a = articlebody[0].find_all("a", {"target": "_blank"})
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a))) self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
list_img = [] list_img = []
for i in img_a: if self._no_image is False:
new_img = {} for i in img_a:
img = i.find_all("img") new_img = {}
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img))) img = i.find_all("img")
if len(img) > 0: self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
href_a = i.get("href") if len(img) > 0:
href_img = img[0].get("src") href_a = i.get("href")
new_img["old_src"]=href_img href_img = img[0].get("src")
new_img["old_href"]=href_a href_a_o = urlparse(href_a)
try: href_img_o = urlparse(href_img)
page_img = self._request.get(href_img) new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
if len(href_img_o.netloc) > 0:
img_ok = False
page_img = self._request.get(href_img)
if page_img.status_code == 404: if page_img.status_code == 404:
href_img = href_a href_img = href_a
try: try:
page_img = self._request.get(href_a) page_img = self._request.get(href_a)
except ConnectionError as err: if page_img.status_code == 200:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) img_ok = True
exit(1) except ConnectionError as err:
except Exception as err: self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) exit(1)
exit(1) except Exception as err:
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code)) self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
if page_img.status_code == 200: exit(1)
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"] else:
new_img["new_src"]=media["rendered"] if os.path.exists("{0}/..{1}".format(self._directory, href_img)):
list_img.append(new_img) page_img = open("{0}/..{1}".format(self._directory, href_img), "r")
if href_img != href_a: img_ok = True
media=self._addOrUpdateMedia(href_a, page_img) else:
if os.path.exists("{0}/..{1}".format(self._directory, href_a)):
page_img = open("{0}/..{1}".format(self._directory, href_a), "r")
img_ok = True
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if img_ok is True:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"] new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"] new_img["new_src"]=media["rendered"]
list_img.append(new_img) list_img.append(new_img)
if page_img.status_code not in [200, 404]: if href_img != href_a:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code)) media=self._addOrUpdateMedia(href_a, page_img)
self._logger.debug("{0} : {1}".format(self._name, page_img.content)) new_img["id"]=media["id"]
except ConnectionError as err: new_img["new_src"]=media["rendered"]
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err)) list_img.append(new_img)
exit(1) if page_img.status_code not in [200, 404]:
except Exception as err: self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err)) self._logger.debug("{0} : {1}".format(self._name, page_img.content))
exit(1) except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img))) self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
comment_post = self._getComment(comment) comment_post = self._getComment(comment)
@@ -552,6 +852,8 @@ class WPimport:
title = articletitle[0].text title = articletitle[0].text
author = articleacreator[0].text.lower() author = articleacreator[0].text.lower()
if len(self._author) > 0:
author = self._author
body = articlebody[0].find_all("p") body = articlebody[0].find_all("p")
bodyhtml = "<p>" bodyhtml = "<p>"
for i in body: for i in body:
@@ -573,25 +875,7 @@ class WPimport:
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data)) self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data))
params = {"search":author, "per_page":100} data["author"] = self._getAuthor(author)
try:
self._logger.info("{0} : Search author : {1}".format(self._name, author))
page = self._request.get("{1}://{0}/wp-json/wp/v2/users".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
self._logger.debug("{0} : End Search author : {1}".format(self._name, author))
self._logger.debug("{0} : Debug requests : {1}".format(self._name, page.content))
if page.status_code == 200:
self._logger.info("{0} : Get author id : {1}".format(self._name, result))
result = page.json()
for a in result:
data["author"] = a["id"]
else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err))
page_is_exist = False page_is_exist = False
for index in range(1,10): for index in range(1,10):
@@ -615,45 +899,46 @@ class WPimport:
self._logger.debug("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title)) self._logger.debug("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered))) self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered)))
if title_rendered == title: if title_rendered == title:
page_is_exist = True if self._no_update is False:
post_id = i["id"] page_is_exist = True
count = count + 1 post_id = i["id"]
if count > 1: count = count + 1
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title)) if count > 1:
try: self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
params = {"force":1} try:
page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params) params = {"force":1}
if page.status_code == 200: page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
self._logger.info("{0} : Post deleted : {1}".format(self._name, title)) if page.status_code == 200:
else: self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) else:
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
except ConnectionError as err: self._logger.debug("{0} : {1}".format(self._name, page.content))
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err)) except ConnectionError as err:
exit(1) self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
except Exception as err: exit(1)
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err)) except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
else: else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i)) self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
try: try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title)) self._logger.info("{0} : Post updated : {1}".format(self._name, title))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"]) self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"]) self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else: else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err: except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err: except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err)) self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 400: if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
@@ -667,7 +952,7 @@ class WPimport:
except Exception as err: except Exception as err:
self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err)) self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err))
if page_is_exist is False: if page_is_exist is False and self._no_create is False:
try: try:
self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"])) self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"]))
page = self._request.post("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) page = self._request.post("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))

394
lib/WPMenu.py Normal file
View File

@@ -0,0 +1,394 @@
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPMenu:
# Constructor
def __init__(self, name="Thread-0", basic=None, canalblog="", wordpress="", logger=None, parser="html.parser", ssl_canalblog=True, ssl_wordpress=True):
self._name = name
self._basic = basic
self._canalblog = canalblog
self._wordpress = wordpress
self._logger = logger
self._parser = parser
self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
self._protocol_wordpress = "https"
self._protocol_canalblog = "https"
self._directory = "backup"
if ssl_wordpress is False:
self._protocol_wordpress = "http"
if ssl_canalblog is False:
self._protocol_canalblog = "http"
self._request_canalblog = requests.Session()
self._request_wordpress = requests.Session()
retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request_canalblog.mount('{0}://'.format(self._protocol_canalblog), HTTPAdapter(max_retries=retries))
self._request_wordpress.mount('{0}://'.format(self._protocol_wordpress), HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
print("{0} : Import finished for {1}".format(self._name, self._wordpress))
# Public method
## From file
def fromFile(self, files):
if os.path.exists(files):
with open(files, 'r') as f:
self._logger.info("{0} : File is being processed : {1}".format(self._name, files))
content = f.read()
self._menu(content)
else:
self._logger.error("{0} : File isn't exist : {1}".format(self._name, files))
## Get from URL
def fromUrl(self, canalblog):
self._canalblog = canalblog
try:
o = urlparse(canalblog)
o = o._replace(scheme=self._protocol_canalblog)
i = o.geturl().replace(":///", "://")
page = self._request_canalblog.get(i)
if page.status_code == 200:
self._logger.info("{0} : Page web is being processed : {1}".format(self._name, i))
self._menu(page.content)
else:
self._logger.error("{0} : index didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, canalblog, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, canalblog, err))
## replace caracter
def _replaceCaracter(self, title_rendered):
list_replace = {'&rsquo;': "'", '&#8211;': '-', '&#8230;': '...', '« ': '"', ' »': '"', '« ': '"', ' »': '"', '&#8217;': "'", '"&lsquo;': "'"}
for old, new in list_replace.items():
title_rendered = title_rendered.replace(old, new)
return title_rendered
def _getIdfromTitlePost(self, content):
idMenu = {"id":0, "type":"", "link":""}
soup = BeautifulSoup(content, self._parser)
articletitle = soup.find_all("h2", class_="articletitle")
if len(articletitle) > 0:
articletitle = articletitle[0].get_text()
search = "posts"
post_type = "post"
if len(articletitle) == 0:
articletitle = soup.find_all("div", class_="albumbody")
if len(articletitle) > 0:
articletitle = articletitle[0].find("h2").get_text()
search = "pages"
post_type = "page"
exist = False
for index in range(1,10):
if exist is False:
params = {"search":articletitle, "per_page":100, "page":index}
try:
self._logger.debug("{0} : Get Url for {3} : {1} {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/{2}".format(self._wordpress, self._protocol_wordpress, search), params, search))
page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/{2}".format(self._wordpress, self._protocol_wordpress, search), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Get content {2} : {1}".format(self._name, len(result), search))
if len(result) > 0:
for i in result:
title_rendered = i["title"]["rendered"]
if len(articletitle) != len(title_rendered):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : comparaison debug {1} {2}".format(self._name, articletitle, title_rendered))
if articletitle == title_rendered:
self._logger.debug("{0} : get {2} id : {1}".format(self._name, i, search))
idMenu = {"id":i["id"], "type":post_type, "link": i["link"]}
exist = True
else:
self._logger.debug("{0} : {2} {1}".format(self._name, result, len(result)))
break
elif page.status_code == 400:
self._logger.debug("{0} : {2} {1}".format(self._name, page.content, page.status_code))
break
else:
self._logger.error("{0} : Post didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get content : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get content : {1} ".format(self._name, err))
return idMenu
def _getIdFromPost(self, href):
idMenu = {"id":0, "type":"", "link":""}
o = urlparse(href)
if len(o.netloc) > 0:
try:
page = self._request_canalblog.get(href)
if page.status_code == 200:
self._logger.info("{0} : Get content : {1}".format(self._name, href))
idMenu = self._getIdfromTitlePost(page.content)
else:
self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, href))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, href, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, href, err))
else:
if os.path.exists("{0}/..{1}".format(self._directory, o.path)):
try:
content = open("{0}/..{1}".format(self._directory, o.path), "r")
idMenu = self._getIdfromTitlePost(content)
except Exception as err:
self._logger.error("{0} : Exception error for get file content {1} : {2}".format(self._name, href, err))
return idMenu
def _getIdFromReverse(self, title, href):
self._logger.info("{0} : get title {1} from href {2}".format(self._name, title, href))
idMenu = {"id":0, "type":"", "link":""}
if href != "#":
title = href[::-1]
second_title = title.split("/")[2]
second_title = second_title[::-1]
link = title.split("/")[0]
link = link[::-1]
title = title.split("/")[1]
title = title[::-1]
self._logger.info("{0} link {1} title {2}".format(self._name, link, title))
if link == "index.html":
if second_title == "albums":
idMenu = self._getIdFromPost(href)
else:
idMenu = self._getId(title)
else:
idMenu = self._getIdFromPost(href)
return idMenu
def _getId(self, title):
idMenu = {"id": 0, "type":"", "link":""}
exist = False
if exist is False:
for i in ["categories", "tags"]:
typeId = "category"
if i == "tags":
typeId = "tag"
for index in range(1,10):
try:
params = {"search":title, "per_page":"100", "page":index}
self._logger.info("{0} Get menu {1} {2} {3}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), index, title))
page = self._request_wordpress.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
if len(result) > 0:
for j in result:
self._logger.info("{0} info : {1} {2} {3}".format(self._name, j["name"], j["slug"], title))
if j["name"] == title or j["slug"] == title:
self._logger.info("{0} : comparaison ok : {1} {2}".format(self._name, j["id"], i))
idMenu = {"id": j["id"], "type": typeId, "link": j["link"]}
exist = True
else:
break
elif page.status_code == 400:
break
else:
self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, i))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err))
return idMenu
def _menu(self, content):
soup = BeautifulSoup(content, self._parser)
ul = soup.find("ul", id="listsmooth")
menu = list()
children = list()
for anchor in ul.find_all("li"):
parent = anchor.find("a").get_text().replace(" \xa0", "")
href = anchor.find("a").get("href")
if href == "{0}://{1}/".format(self._protocol_canalblog, self._canalblog):
parent = "home"
itemMenu = {"id":"", "type":"", "title": parent, "link":"", "href":href, "children":list()}
if href == "#":
li = anchor.find("ul").find_all("li")
for child in li:
a = child.find("a")
href = a.get("href")
self._logger.info("{0} Parent {1} : Child {2}".format(self._name, parent, a.get_text()))
children.append({"title": a.get_text(), "parent": parent, "href":href, "link":""})
menu.append(itemMenu)
for i in range(0, len(children)):
self._logger.info("{0} : Child {1} {2}".format(self._name, children[i], i))
for j in range(0, len(menu)):
if j < len(menu):
if menu[j]["title"] == children[i]["title"]:
self._logger.info("{0} : Parent {1} {2}".format(self._name, menu[j], j))
del menu[j]
for j in range(0, len(menu)):
self._logger.info("{0} : Children for : {1}".format(self._name, menu[j]["title"]))
if menu[j]["title"] == children[i]["parent"]:
menu[j]["children"].append({"id":"", "type":"", "title":children[i]["title"], "parent": children[i]["parent"], "link":"", "href":children[i]["href"]})
for i in range(0, len(menu)):
self._logger.info("{0} : Menu {1} {2}".format(self._name, menu[i]["title"], len(menu[i]["children"])))
if menu[i]["title"] != "home":
for j in range(0, len(menu[i]["children"])):
idMenu = self._getId(menu[i]["children"][j]["title"])
if idMenu["id"] == 0:
self._logger.debug("{0} : content children {1}".format(self._name, menu[i]["children"][j]))
idMenu = self._getIdFromReverse(menu[i]["children"][j]["title"], menu[i]["children"][j]["href"])
if idMenu["id"] != 0:
menu[i]["children"][j] = {"id":idMenu["id"], "type": idMenu["type"], "link": idMenu["link"], "title": menu[i]["children"][j]["title"], "parent": menu[i]["children"][j]["parent"]}
idMenu = self._getId(menu[i]["title"])
self._logger.debug("{0} : content parent {1}".format(self._name, menu[i]))
self._logger.debug("{0} : content idMenu {1}".format(self._name, idMenu))
if idMenu["id"] == 0:
idMenu = self._getIdFromReverse(menu[i]["title"], menu[i]["href"])
if idMenu["id"] != 0:
menu[i] = {"id":idMenu["id"], "type": idMenu["type"], "title":menu[i]["title"], "link":idMenu["link"], "children": menu[i]["children"]}
self._createMenu(menu)
def _createItemMenu(self, idMenu, itemMenu, parent):
idItemMenu = 0
self._logger.info("{0} : Create item menu from API Wordpress : {1}".format(self._name, self._wordpress))
try:
params = {"search": itemMenu["title"], "menus": idMenu}
page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
for i in result:
if self._replaceCaracter(i["title"]["rendered"]) == itemMenu["title"]:
idItemMenu = int(i["id"])
self._logger.info("{0} : Length of result for item menus : {1}".format(self._name, len(result)))
url = "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress)
if idItemMenu != 0:
url = "{1}://{0}/wp-json/wp/v2/menu-items/{2}".format(self._wordpress, self._protocol_wordpress, idItemMenu)
try:
objectt = itemMenu["type"]
if objectt == "tag":
objectt = "post_tag"
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "menus":idMenu, "url":"#"}
if itemMenu["title"] == "home":
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "menus":idMenu, "url":"{0}://{1}".format(self._protocol_wordpress, self._wordpress)}
if type(itemMenu["id"]) is str:
if len(itemMenu["id"]) > 0:
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "url": itemMenu["link"], "menus":idMenu, "object":objectt, "object_id":int(itemMenu["id"])}
elif type(itemMenu["id"]) is int:
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "url": itemMenu["link"], "menus":idMenu, "object":objectt, "object_id":itemMenu["id"]}
self._logger.debug("{0} : data for create/update : {1}".format(self._name, data))
page = self._request_wordpress.post(url, auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code in [201, 200]:
result = page.json()
idItemMenu = int(result["id"])
self._logger.info("{0} : create/update item menu : {1}".format(self._name, itemMenu["title"]))
else:
self._logger.error("{0} : Create menu items for {2} didn't get due status code : {1}".format(self._name, page.status_code, itemMenu["title"]))
self._logger.debug("{0} : {1} {2}".format(self._name, page.content, itemMenu))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create item menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for create item menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
else:
self._logger.error("{0} : Get menu items didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get item menus {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get item menus {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
return idItemMenu
def _createMenu(self, menu):
title = "Menu {0}".format(self._wordpress)
self._logger.info("{0} : Create menu from API Wordpress : {1}".format(self._name, title))
try:
params = {"search": title}
page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Get content menus : {1}".format(self._name, len(result)))
idMenu = 0
if len(result) == 0:
self._logger.info("{0} : Create menu : {1}".format(self._name, title))
data = {"name": title}
try:
page = self._request_wordpress.post("{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 201:
result = page.json()
self._logger.debug("{0} : Get menus : {1}".format(self._name, result))
if len(result) > 0:
idMenu = result["id"]
else:
self._logger.error("{0} : Post menu didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
else:
self._logger.debug("{0} : Get menus : {1}".format(self._name, result))
for i in result:
self._logger.debug("{0} : List menus : {1}".format(self._name, i))
if i["name"] == title:
idMenu = i["id"]
self._logger.info("{0} : Get ID menus : {1}".format(self._name, idMenu))
self._addItemMenu(menu, idMenu)
else:
self._logger.error("{0} : Get menu didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
def _addItemMenu(self, menu, idMenu):
self._logger.info("{0} : add item to menu : {1}".format(self._name, idMenu))
parent = 0
for i in menu:
parent = 0
self._logger.debug("{0} : debug create item menu : {1}".format(self._name, i))
parent = self._createItemMenu(idMenu, i, parent)
for j in i["children"]:
self._createItemMenu(idMenu, j, parent)