76 Commits
3.0.0 ... 4.0.4

Author SHA1 Message Date
f8e8239f40 Merge pull request 'fix-featured-image' (#27) from fix-featured-image into master
Reviewed-on: #27
2023-09-18 21:03:17 +00:00
19f96abb2d add no-menu and fix method linkFeaturedMedia 2023-09-18 22:58:36 +02:00
a936857eb9 add method private link featured media 2023-09-18 22:44:52 +02:00
9ab484da8d Merge pull request 'fix-export' (#26) from fix-export into master
Reviewed-on: #26
2023-09-15 21:43:35 +00:00
c275f10fb0 test ok for featured media 2023-09-15 23:38:35 +02:00
fa1854052b fix if 2023-09-15 23:14:24 +02:00
0e5afe60a7 add image featured media 2023-09-15 23:13:08 +02:00
d2cfb949f8 add image for featured image 2023-09-15 23:11:25 +02:00
bce1643135 fix replace 2023-09-14 21:27:09 +02:00
0c94783852 fix album author 2023-09-14 21:23:27 +02:00
6f78a48e57 fix index 2023-09-11 23:20:48 +02:00
dbab60c7dc fix import album 2023-09-11 23:20:22 +02:00
41e192f903 fix bug import 2023-09-11 22:40:41 +02:00
dce2c2dfa5 Merge pull request 'fix webpage' (#25) from fix-export into master
Reviewed-on: #25
2023-09-05 20:10:07 +00:00
eaec1ba9d4 fix webpage 2023-09-05 22:06:14 +02:00
3059f785c2 Merge pull request 'album-plus' (#24) from album-plus into master
Reviewed-on: #24
2023-09-01 22:27:57 +00:00
279a9f2786 fix parameter author 2023-09-02 00:27:18 +02:00
963f83ae81 fix author 2023-09-02 00:26:50 +02:00
7b154e3a1d add author 2023-08-31 22:50:31 +02:00
e5109204aa get link with album 2023-08-30 23:45:16 +02:00
2279e4b0b6 search title album 50% 2023-08-30 22:39:59 +02:00
2e21040196 add private method get info album 2023-08-29 22:26:15 +02:00
b4d0fe8aa0 Merge pull request 'album' (#23) from album into master
Reviewed-on: #23
2023-08-25 21:47:47 +00:00
6401692d0d finish menu with album import 2023-08-25 23:46:43 +02:00
1fc9c48d2c fix add menu with album 2023-08-24 22:43:15 +02:00
d9c20cedcb add menu 2023-08-24 00:01:46 +02:00
9d41e57379 add method getAuthor 2023-08-23 23:35:35 +02:00
d88ae7ed44 add author 2023-08-23 22:48:38 +02:00
50bf31d334 HTTP Post for album 2023-08-22 00:21:14 +02:00
9b58b45ae8 replace string 2023-08-20 21:41:16 +02:00
418bea3778 add album test wip 2023-08-17 00:18:45 +02:00
5959ab5b2e add galery wip 2023-08-12 00:06:09 +02:00
e4eb1b6b68 add image to media wordpress 2023-08-05 23:22:44 +02:00
9ed5ffe399 add variable 2023-08-05 12:28:33 +02:00
28b513e1b2 add private method album 2023-08-05 12:03:17 +02:00
02f0c20bd0 Merge pull request 'menu-api' (#22) from menu-api into master
Reviewed-on: #22
2023-08-04 21:22:38 +00:00
1655217050 add album article 2023-08-04 23:21:45 +02:00
49c1552062 add wpmenu 2023-07-31 23:57:35 +02:00
7f800c8f7b add menu 100% work 2023-07-31 23:39:40 +02:00
5399b12133 menu wordpress 75% 2023-07-27 00:39:15 +02:00
b7493206a2 add private method addItemMenu 2023-07-26 19:22:43 +02:00
8de7485775 return value menu 2023-07-26 19:18:18 +02:00
eee14e08f1 add update menu item 2023-07-24 23:51:41 +02:00
cf2c1aa617 add item menu 2023-07-24 23:32:20 +02:00
e17cace820 remove navigation 2023-07-24 22:51:40 +02:00
941776a7c1 Create menu 2023-07-24 22:43:52 +02:00
72fbe0a364 add menu WIP debug 2023-07-22 20:23:26 +02:00
1f8ea70b40 define content for navigation 2023-07-20 00:04:11 +02:00
c6894648d1 add link 2023-07-18 22:09:20 +02:00
d5ddf1601b add post fine 2023-07-17 23:20:47 +02:00
aa8ac9cfcb first test id from post 2023-07-15 14:00:23 +02:00
82f9acd658 get title from post 2023-07-15 13:55:01 +02:00
7593b68b6c getting post title 2023-07-15 11:51:15 +02:00
5fe4b1f786 add href 2023-07-13 23:53:54 +02:00
0445054dc8 fix get id menu 2023-07-13 22:35:38 +02:00
be9ad9b934 swap loop 2023-07-13 00:25:14 +02:00
e6328135da get id 2023-07-13 00:09:18 +02:00
dded126926 change return getId 2023-07-11 21:42:50 +02:00
bca529f3c3 parent and children array 2023-07-11 21:39:41 +02:00
5059a15826 menu and submenu 2023-07-11 00:17:24 +02:00
20c4adb3cf menu and sub-menu wip 2023-07-09 21:42:09 +02:00
74fa87ea73 list menu wip 2023-07-09 19:11:05 +02:00
03f833a2c3 Menu and sub-menu 2023-07-09 15:27:20 +02:00
9acb620f93 add condition if 2023-07-09 12:29:13 +02:00
c6ccf98b1b working menu 2023-07-06 00:42:47 +02:00
95f5203727 list menu with id 2023-07-04 00:26:27 +02:00
9bbf769b40 test WIP 2023-07-01 22:16:35 +02:00
f0b4ba5e27 add url and file 2023-07-01 21:56:10 +02:00
47f504beb5 list tag 2023-07-01 00:11:43 +02:00
4b6b06aade add public method from URL 2023-06-30 23:52:56 +02:00
d10867a983 add class WPMenu 2023-06-30 23:28:54 +02:00
6fba5f009a Merge pull request 'directory-file' (#21) from directory-file into master
Reviewed-on: #21
2023-06-28 21:30:20 +00:00
699cecad4f change with tmp files 2023-06-28 23:28:24 +02:00
9f87f38347 fix file tmp for directory 2023-06-28 23:03:27 +02:00
55d62cebfb separate files method 2023-06-27 14:48:48 +02:00
193b0e6ef7 add tmp files wip 2023-06-27 14:37:45 +02:00
6 changed files with 883 additions and 125 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@ backup*/
wp-navigation wp-navigation
*.log *.log
__pycache__/ __pycache__/
wp-gallery

View File

@@ -10,10 +10,23 @@ from lib.WPImport import WPimport
from lib.WPExport import WPExport from lib.WPExport import WPExport
from lib.WPRemove import WPRemove from lib.WPRemove import WPRemove
from lib.WPChange import WPChange from lib.WPChange import WPChange
from lib.WPMenu import WPMenu
def change(index, number, args, logger): def errorRevert(logger, revert, tmp):
changeWp = WPChange(logger=logger, index_name=index, number_thread=number) if revert is True:
changeWp.fromDirectory(args.directory) files_tmp = glob.glob("{0}/*.json".format(tmp))
if len(files_tmp) == 0:
logger.error("Error revert, because files not found")
exit(1)
if len(files_tmp) != int(args.parallel):
for file_r in files_tmp:
os.remove(file_r)
logger.error("Error revert, because number files tmp is incompatible with parallel number")
exit(1)
def change(index, number, args, logger, tmp, revert):
changeWp = WPChange(logger=logger, index_name=index, number_thread=number, tmp=tmp)
changeWp.fromDirectory(args.directory, revert)
del changeWp del changeWp
@@ -42,7 +55,7 @@ def remove(index, number, args, basic, logger, ssl_wordpress):
def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp): def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp):
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog)
if not revert: if revert is False:
exportWp.getUrlPage(name_thread, max_thread) exportWp.getUrlPage(name_thread, max_thread)
for i in ["article", "page"]: for i in ["article", "page"]:
for j in ["publications", "principal"]: for j in ["publications", "principal"]:
@@ -54,7 +67,7 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img,
del exportWp del exportWp
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp): def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp, author):
canalblog = canalblog.split(",") canalblog = canalblog.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
@@ -75,7 +88,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
exportWp.getUrlPage(name_thread, max_thread) exportWp.getUrlPage(name_thread, max_thread)
del exportWp del exportWp
for j in wordpress: for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp) importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp, author=author)
for k in ["article", "page"]: for k in ["article", "page"]:
for l in ["publications", "principal"]: for l in ["publications", "principal"]:
importWp.fromUrl(l, k) importWp.fromUrl(l, k)
@@ -97,7 +110,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
if not revert: if not revert:
exportWp.getUrlPage(name_thread, max_thread) exportWp.getUrlPage(name_thread, max_thread)
del exportWp del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp, author=author)
for k in ["article", "page"]: for k in ["article", "page"]:
for l in ["publications", "principal"]: for l in ["publications", "principal"]:
@@ -106,15 +119,15 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
del importWp del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image): def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image, revert, author):
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",") directory = directory.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
if serial is False: if serial is False:
for i in wordpress: for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, author=author)
for j in directory: for j in directory:
importWp.fromDirectory(j, name_thread, max_thread) importWp.fromDirectory(j, name_thread, max_thread, revert)
del importWp del importWp
else: else:
@@ -122,8 +135,8 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres
logger.error("{0} : Error : Number directory is different than wordpress".format(name)) logger.error("{0} : Error : Number directory is different than wordpress".format(name))
exit(1) exit(1)
for i in range(0, len(wordpress)-1): for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, author=author)
importWp.fromDirectory(directory[i]) importWp.fromDirectory(directory[i], name_thread, max_thread, revert)
del importWp del importWp
@@ -157,6 +170,9 @@ if __name__ == '__main__':
import_parser.add_argument("--no-create", help="No create post", dest="create", default="store_false", action="store_true") import_parser.add_argument("--no-create", help="No create post", dest="create", default="store_false", action="store_true")
import_parser.add_argument("--no-update", help="No update post", dest="update", default="store_false", action="store_true") import_parser.add_argument("--no-update", help="No update post", dest="update", default="store_false", action="store_true")
import_parser.add_argument("--no-image", help="No image add or update", dest="image", default="store_false", action="store_true") import_parser.add_argument("--no-image", help="No image add or update", dest="image", default="store_false", action="store_true")
import_parser.add_argument("--no-menu", help="No menu add or update", dest="menu", default="store_false", action="store_true")
import_parser.add_argument("--author", dest="author", help="Define author", default="")
@@ -190,8 +206,14 @@ if __name__ == '__main__':
change_parser.add_argument("--file", change_parser.add_argument("--file",
default="", default="",
help="File") help="File")
menu_parser = subparsers.add_parser("menu")
menu_parser.add_argument("--user", help="wordpress user", required=True)
menu_parser.add_argument("--password", help="password wordpress's user", default="")
menu_parser.add_argument("--file", help="HTML file", default="")
menu_parser.add_argument("--canalblog", help="URL Canalblog", default="")
menu_parser.add_argument("--wordpress", help="URL Wordpress", required=True)
args = parser.parse_args() args = parser.parse_args()
logger = logging.getLogger('import export canalblog') logger = logging.getLogger('import export canalblog')
@@ -228,7 +250,7 @@ if __name__ == '__main__':
os.makedirs(args.tmp, exist_ok=True) os.makedirs(args.tmp, exist_ok=True)
if args.command == "import" or args.command == "remove": if args.command == "import" or args.command == "remove" or args.command == "menu":
password = args.password password = args.password
if len(args.password) == 0: if len(args.password) == 0:
password = getpass() password = getpass()
@@ -239,20 +261,28 @@ if __name__ == '__main__':
basic = HTTPBasicAuth(args.user, password) basic = HTTPBasicAuth(args.user, password)
if args.command == "import": if args.command == "import":
wordpress = args.wordpress.split(",") wordpress = args.wordpress.split(",")
importWp = WPimport(basic=basic, wordpress="", logger=logger, parser=args.parser, ssl_wordpress=ssl_wordpress) importWp = WPimport(basic=basic, wordpress="", logger=logger, parser=args.parser, ssl_wordpress=ssl_wordpress, author=args.author, ssl_canalblog=ssl_canalblog)
if len(args.file) > 0: if len(args.file) > 0:
for i in wordpress: for i in wordpress:
importWp.setUrl(i) importWp.setUrl(i)
importWp.fromFile(files=args.file.split(",")) importWp.fromFile(files=args.file.split(","))
if args.menu is False:
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
menuWp.fromFile("{0}".format(args.file.split(",")[0]))
if len(args.directory) > 0: if len(args.directory) > 0:
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED) wait(wait_for, return_when=ALL_COMPLETED)
errorRevert(logger, args.revert, args.tmp)
wait_for = [ wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image) ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image, args.revert, args.author)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
if args.menu is False:
wait(wait_for, return_when=ALL_COMPLETED)
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
menuWp.fromFile("{0}/index.html".format(args.directory))
except Exception as err: except Exception as err:
logger.error("Threading error : {0}".format(err)) logger.error("Threading error : {0}".format(err))
if len(args.canalblog) > 0: if len(args.canalblog) > 0:
@@ -260,19 +290,16 @@ if __name__ == '__main__':
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ] wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED) wait(wait_for, return_when=ALL_COMPLETED)
if args.revert is True: errorRevert(logger, args.revert, args.tmp)
files_tmp = glob.glob("{0}/*.json".format(args.tmp))
if len(files_tmp) == 0:
logger.error("Error revert, because files not found")
exit(1)
if len(files_tmp) != int(args.parallel):
for file_r in files_tmp:
os.remove(file_r)
wait_for = [ wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, args.tmp) ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, args.tmp, args.author)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
if args.menu is False:
wait(wait_for, return_when=ALL_COMPLETED)
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
menuWp.fromUrl(args.canalblog)
except Exception as err: except Exception as err:
@@ -328,11 +355,20 @@ if __name__ == '__main__':
if len(args.directory) > 0: if len(args.directory) > 0:
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(change, i, args.parallel, args, logger) for i in range(0, int(args.parallel)) ] errorRevert(logger, args.revert, args.tmp)
wait_for = [ ex.submit(change, i, args.parallel, args, logger, args.tmp, args.revert) for i in range(0, int(args.parallel)) ]
except Exception as err: except Exception as err:
logger.error("Thread error for remove : {0}".format(err)) logger.error("Thread error for remove : {0}".format(err))
if len(args.file) > 0: if len(args.file) > 0:
changeWp = WPChange(logger=logger) changeWp = WPChange(logger=logger)
for filei in args.file.split(","): for filei in args.file.split(","):
changeWp.fromFile(filei) changeWp.fromFile(filei)
exit(0)
if args.command == "menu":
menuWp = WPMenu(name="Thread-1", basic=basic, wordpress=args.wordpress, logger=logger, parser=args.parser, ssl_canalblog=ssl_canalblog, ssl_wordpress=ssl_wordpress)
if len(args.file) > 0:
menuWp.fromFile(args.file)
if len(args.canalblog) > 0:
menuWp.fromUrl(args.canalblog)
exit(0) exit(0)

View File

@@ -4,11 +4,13 @@ import requests, os, logging, re, json
class WPChange: class WPChange:
# Constructor # Constructor
def __init__(self, index_name=1, number_thread=1, logger=None, parser="html.parser"): def __init__(self, index_name=1, number_thread=1, logger=None, parser="html.parser", tmp="/tmp/import_export_canablog"):
self._name = "Thread-{0}".format(index_name) self._name = "Thread-{0}".format(index_name)
self._logger = logger self._logger = logger
self._number_thread = number_thread self._number_thread = number_thread
self._parser = parser self._parser = parser
self._tmp = tmp
self._index_name = index_name
# Destructor # Destructor
def __del__(self): def __del__(self):
@@ -37,19 +39,62 @@ class WPChange:
## From directory ## From directory
def fromDirectory(self, directory="", number_thread=1, max_thread=1):
def fromDirectory(self, directory="", revert=False):
self._directory = directory
directory = "{0}/archives".format(directory) directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory)) directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0: if len(directories) > 0:
files = self._getFiles(directories) files = self._getFiles(directories)
self.fromFile(files, number_thread, max_thread) if revert is False:
self._tmpFiles(files=files, number_thread=self._index_name, max_thread=self._number_thread)
self._fromFileTmp()
else: else:
self._logger.error("{0} : No files for {1}".format(self._name, directory)) self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files=[]):
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
self._change(files[i])
# Private method # Private method
def _fromFileTmp(self):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
files = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(files)))
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
self._change(files[i])
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def _tmpFiles(self, files=[], number_thread=1, max_thread=1):
print()
divFiles = int(len(files) / int(max_thread))
currentRangeFiles = int(divFiles * (int(number_thread)+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
webpage = []
for i in range(firstRange, currentRangeFiles):
webpage.append(files[i])
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
## Get all files ## Get all files
def _getFiles(self, item): def _getFiles(self, item):

View File

@@ -58,14 +58,26 @@ class WPExport:
# Download HTML # Download HTML
def downloadHTML(self, first, second): def downloadHTML(self, first, second):
self._downloadPage(webpage[first][second], self._dir) try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage = json.loads(file.read())
self._downloadPage(webpage[first][second], self._dir)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
# Download Image # Download Image
def downloadImg(self, first, second): def downloadImg(self, first, second):
page_src = self._getImg(webpage[first][second]) try:
o = urlparse(self._url) with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img")) webpage = json.loads(file.read())
page_src = self._getImg(webpage[first][second])
o = urlparse(self._url)
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img"))
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
# Get URL # Get URL

View File

@@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport: class WPimport:
# Constructor # Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog"): def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog", author="", ssl_canalblog=True):
self._name = name self._name = name
self._basic = basic self._basic = basic
self._wordpress = wordpress self._wordpress = wordpress
@@ -20,7 +20,7 @@ class WPimport:
if ssl_wordpress is False: if ssl_wordpress is False:
self._protocol = "http" self._protocol = "http"
self._request = requests.Session() self._request = requests.Session()
self._ssl_canalblog = ssl_canalblog
retries = Retry(connect=10, read=10, redirect=5, retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
@@ -29,6 +29,7 @@ class WPimport:
self._no_update = no_update self._no_update = no_update
self._no_image = no_image self._no_image = no_image
self._tmp = tmp self._tmp = tmp
self._author = author
# Destructor # Destructor
def __del__(self): def __del__(self):
@@ -55,7 +56,11 @@ class WPimport:
if len(articlebody) > 0: if len(articlebody) > 0:
self._addOrUpdatePost(soup) self._addOrUpdatePost(soup)
else: else:
self._addOrUpdateFeaturedMedia(soup) albumbody = soup.find_all("div", class_="albumbody")
if len(albumbody) > 0:
self._addOrUpdateAlbum(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i] del webpage_content[first][second][i]
webpage_content = json.dumps(webpage_content) webpage_content = json.dumps(webpage_content)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content) open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
@@ -74,33 +79,23 @@ class WPimport:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex)) self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def fromDirectory(self, directory="", number_thread=1, max_thread=1): def fromDirectory(self, directory="", number_thread=1, max_thread=1, revert=False):
self._directory = directory self._directory = directory
directory = "{0}/archives".format(directory) directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory)) directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0: if len(directories) > 0:
files = self._getFiles(directories) files = self._getFiles(directories)
self.fromFile(files=files, number_thread=number_thread, max_thread=max_thread) if revert is False:
self._tmpFiles(files=files, number_thread=number_thread, max_thread=max_thread)
self._fromFileTmp()
else: else:
self._logger.error("{0} : No files for {1}".format(self._name, directory)) self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files=[]):
for i in range(0, len(files)):
def fromFile(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
if os.path.exists(files[i]): if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i])) self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
with open(files[i], 'r') as f: with open(files[i], 'r') as f:
content = f.read() content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content))) self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
@@ -110,11 +105,281 @@ class WPimport:
if len(articlebody) > 0: if len(articlebody) > 0:
self._addOrUpdatePost(soup) self._addOrUpdatePost(soup)
else: else:
self._addOrUpdateFeaturedMedia(soup) albumbody = soup.find_all("div", class_="albumbody")
if len(albumbody) > 0:
self._addOrUpdateAlbum(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
# Private method # Private method
def _getAuthor(self, author):
params = {"search":author, "per_page":100}
author = 0
try:
self._logger.info("{0} : Search author : {1}".format(self._name, author))
page = self._request.get("{1}://{0}/wp-json/wp/v2/users".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
self._logger.debug("{0} : End Search author : {1}".format(self._name, author))
self._logger.debug("{0} : Debug requests : {1}".format(self._name, page.content))
if page.status_code == 200:
self._logger.info("{0} : Get author : {1}".format(self._name, author))
result = page.json()
for a in result:
author = a["id"]
else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err))
return author
def _getInfoAlbum(self, link):
if self._ssl_canalblog:
link = link.replace("http", "https").replace("httpss", "https")
self._logger.info("{0} : Info album : {1}".format(self._name, link))
link_o = urlparse(link)
if len(link_o.netloc) > 0:
self._logger.info("{0} : get album info from web : {1}".format(self._name, link_o))
try:
response = self._request.get(link)
if response.status_code == 200:
self._logger.info("{0} : get content info from web : {1}".format(self._name, link))
page_img = response.content
except ConnectionError as err:
self._logger.error("{0} : Connection error for get album info : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get album info : {1}".format(self._name, err))
exit(1)
else:
self._logger.info("{0} : get album info from file : {1}".format(self._name, link_o))
if os.path.exists("{0}/..{1}".format(self._directory, link_o)):
page_img = open("{0}/..{1}".format(self._directory, link_o), "r")
soup = BeautifulSoup(page_img, self._parser)
paragraphs = soup.find("div", class_="albumbody").find_all("p")
for paragraph in paragraphs:
self._logger.info("{0} get paragraph : {1}".format(self._name, paragraph))
split_paragraph = str(paragraph).split("<br>")
self._logger.info("{0} length paragraph splitted : {1}".format(self._name, len(split_paragraph)))
if len(split_paragraph) == 1:
split_paragraph = str(paragraph).split("<br/>")
self._logger.info("{0} get paragraph splitted : {1}".format(self._name, split_paragraph))
if len(split_paragraph) > 1:
if len(split_paragraph[1].split(":")) > 1:
author = split_paragraph[1].split(":")[1].replace(" ", "").lower()
return author
def _addOrUpdateAlbum(self, soup):
self._logger.info("{0} : Add/Update Album".format(self._name))
albumbody = soup.find("div", class_="albumbody")
albumtitle = albumbody.find("h2").get_text()
self._logger.debug("{0} : Title of the album : {1}".format(self._name, albumtitle))
albumdesc = albumbody.find("div", class_="albumdesc").find("p")
img_a = albumbody.find_all("img")
list_img = []
page_is_exist = False
if self._no_image is False:
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img_a)))
for i in img_a:
new_img = {}
href_img = i.get("src")
href_img_o = urlparse(href_img)
try:
if len(href_img_o.netloc) > 0:
img_ok = False
href_img = href_img.replace("_q", "_o")
page_img = self._request.get(href_img)
if page_img.status_code == 200:
img_ok = True
else:
if os.path.exists("{0}/..{1}".format(self._directory, href_img)):
page_img = open("{0}/..{1}".format(self._directory, href_img), "r")
img_ok = True
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if img_ok is True:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} content img : {1}".format(self._name, list_img))
content_html = ""
if len(list_img) > 0:
content_html = "<!-- wp:paragraph -->\n{0}\n<!-- /wp:paragraph -->\n\n".format(albumdesc)
for i in range(0, len(list_img)):
content_html = content_html + "<!-- wp:image {\"id\":id-image,\"sizeSlug\":\"large\",\"linkDestination\":\"none\"} --><figure class=\"wp-block-image size-large\"><img src=\"src-image\" alt=\"\" class=\"wp-image-id-image\"/></figure><!-- /wp:image -->\n\n".replace("id-image", str(list_img[i]["id"])).replace("src-image", list_img[i]["new_src"])
self._logger.info("{0} : content html : {1}".format(self._name, content_html))
if len(content_html) > 0:
data = {"title":albumtitle, "content":content_html, "status":"publish"}
if len(self._author) > 0:
author = self._getAuthor(self._author)
else:
link_a = albumbody.find_all("a")
for i in link_a:
if re.search(r"/albums/", i.get("href", "/")):
href_a = i.get("href", "/")
break
author = self._getInfoAlbum(href_a)
self._logger.info("{0} : author : {1}".format(self._name, author))
author = self._getAuthor(author)
data = {"title":albumtitle, "content":content_html, "status":"publish"}
if author != 0:
data = {"title":albumtitle, "content":content_html, "status":"publish", "author":author}
self._logger.debug("{0} : data for album page : {1}".format(self._name, data))
for index in range(1,10):
params = {"search": albumtitle, "per_page":100, "page": index}
try:
self._logger.info("{0} : Search post with index {2} : {1}".format(self._name, albumtitle, index))
page = self._request.get("{1}://{0}/wp-json/wp/v2/pages".format(self._wordpress, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
if page.status_code == 200:
self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding))
page.encoding = "utf-8"
result = page.json()
if len(result) == 0:
break
self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result)))
count = 0
for i in result:
title_rendered = i["title"]["rendered"]
self._logger.info("{0} : Search title pages for |{2}| : |{1}|".format(self._name, title_rendered, albumtitle))
if len(title_rendered) != len(albumtitle):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : Search title pages for |{2}| : |{1}|".format(self._name, title_rendered, albumtitle))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(albumtitle), len(title_rendered)))
if title_rendered == albumtitle:
if self._no_update is False:
page_is_exist = True
post_id = i["id"]
count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, albumtitle))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/pages/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
if page.status_code == 200:
self._logger.info("{0} : Page deleted : {1}".format(self._name, albumtitle))
else:
self._logger.error("{0} : Page not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted page : {1}".format(self._name, err))
else:
self._logger.debug("{0} : Data for page to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, albumtitle))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/pages/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : page updated : {1}".format(self._name, albumtitle))
else:
self._logger.error("{0} : page not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for update page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update page : {1}".format(self._name, err))
elif page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else:
self._logger.error("{0} : Connection for update page error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for search page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for search page : {1}".format(self._name, err))
if page_is_exist is False and self._no_create is False:
try:
self._logger.info("{0} : Creating page : {1}".format(self._name, data["title"]))
page = self._request.post("{1}://{0}/wp-json/wp/v2/pages".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 201:
result = page.json()
self._logger.info("{0} : page added : {1}".format(self._name, result["title"]["raw"]))
else:
self._logger.error("{0} : page not added due status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create page : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for create page : {1}".format(self._name, err))
def _fromFileTmp(self):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
files = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(files)))
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
with open(files[i], 'r') as f:
content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
soup = BeautifulSoup(content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def _tmpFiles(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
webpage = []
for i in range(firstRange, currentRangeFiles):
webpage.append(files[i])
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
## replace caracter ## replace caracter
def _replaceCaracter(self, title_rendered): def _replaceCaracter(self, title_rendered):
@@ -152,6 +417,67 @@ class WPimport:
subdirectory = self._getDirectories(sub, "{0}/{1}".format(item, i)) subdirectory = self._getDirectories(sub, "{0}/{1}".format(item, i))
return subdirectory return subdirectory
def _linkFeaturedMedia(self, img_src, h2, result):
try:
page = self._request.get(img_src)
if page.status_code == 200:
name_img = img_src.replace("_q", "")
name_img = name_img.split("/")[len(name_img.split("/"))-1]
params = {"search": name_img}
try:
page = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
if page.status_code == 200:
res = page.json()
if len(res) > 0:
id_media = res[0]["id"]
data = {"featured_media": id_media}
try:
r = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"], self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if r.status_code == 200:
self._logger.info("{0} : Add media featured : {1}".format(self._name, r.json()["title"]["raw"]))
else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for post media featured : {1}".format(self._name, err))
else:
self._logger.info("{0} : No media found for {1}".format(self._name, h2))
try:
page = self._request.get(img_src.replace("_q", ""))
if page.status_code == 200:
self._logger.info("{0} : Get image for media featured : {1}".format(self._name, img_src.replace("_q", "")))
media = self._addOrUpdateMedia(img_src.replace("_q", ""), page)
if media["id"] != "":
self._linkFeaturedMedia(img_src.replace("_q", ""), h2)
else:
self._logger.error("{0} : Connection error with status code for get image for featured media : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get to image for featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get to image media featured : {1}".format(self._name, err))
else:
self._logger.error("{0} : Connection error with status code for search featured media: {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error search featured media : {1}".format(self._name, err))
else:
self._logger.error("{0} : Connection error for get featured media with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get featured media : {1}".format(self._name, err))
## Add or update featured media ## Add or update featured media
def _addOrUpdateFeaturedMedia(self, soup): def _addOrUpdateFeaturedMedia(self, soup):
@@ -164,54 +490,11 @@ class WPimport:
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
if len(result) > 0: if len(result) > 0:
if h2 == result[0]["title"]: if h2 == self._replaceCaracter(result[0]["title"]):
img = i.find_all("img") img = i.find_all("img")
if len(img) > 0: if len(img) > 0:
img_src = img[0].get("src") img_src = img[0].get("src")
try: self._linkFeaturedMedia(img_src, h2, result)
page = self._request.get(img_src)
if page.status_code == 200:
name_img = img_src.replace("_q", "")
name_img = name_img.split("/")[len(name_img.split("/"))-1]
params = {"search": name_img}
try:
page = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
if page.status_code == 200:
res = page.json()
if len(res) > 0:
id_media = res[0]["id"]
data = {"featured_media": id_media}
try:
r = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"], self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if r.status_code == 200:
self._logger.info("{0} : Add media featured : {1}".format(self._name, r.json()["title"]["raw"]))
else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for post media featured : {1}".format(self._name, err))
else:
self._logger.info("{0} : No media found for {1}".format(self._name, h2))
else:
self._logger.error("{0} : Connection error with status code for search featured media: {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error search featured media : {1}".format(self._name, err))
else:
self._logger.error("{0} : Connection error for get featured media with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get featured media : {1}".format(self._name, err))
else: else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
@@ -555,7 +838,7 @@ class WPimport:
listelement[i].append(k["id"]) listelement[i].append(k["id"])
else: else:
break break
if page.status_code == 400: elif page.status_code == 400:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code)) self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
break break
@@ -589,7 +872,9 @@ class WPimport:
self._logger.error("{0} : Exception error for post {1} : {2}".format(self._name, i, err)) self._logger.error("{0} : Exception error for post {1} : {2}".format(self._name, i, err))
title = articletitle[0].text title = articletitle[0].text
author = articleacreator[0].text.lower() author = articleacreator[0].text.lower()
if len(self._author) > 0:
author = self._author
body = articlebody[0].find_all("p") body = articlebody[0].find_all("p")
bodyhtml = "<p>" bodyhtml = "<p>"
for i in body: for i in body:
@@ -603,33 +888,18 @@ class WPimport:
bodyhtml = bodyhtml.replace(i["old_href"], o.path) bodyhtml = bodyhtml.replace(i["old_href"], o.path)
bodyhtml = bodyhtml.replace(i["old_src"], o.path) bodyhtml = bodyhtml.replace(i["old_src"], o.path)
hour = articledate[0].text hour = articledate[0].text
time = dateheader[0].text.split(" ") time = ""
if len(dateheader) > 0:
time = dateheader[0].text.split(" ")
self._logger.debug("{0} : Title post : |{1}|".format(self._name, title)) self._logger.debug("{0} : Title post : |{1}|".format(self._name, title))
title = self._removeSpace(title) title = self._removeSpace(title)
self._logger.debug("{0} : Rendered Title post : |{1}|".format(self._name, title)) self._logger.debug("{0} : Rendered Title post : |{1}|".format(self._name, title))
data = {"title":title, "content":bodyhtml, "status":"publish", "tags": listelement["tags"], "categories": listelement["categories"]}
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} if len(dateheader) > 0:
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data)) self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data))
params = {"search":author, "per_page":100} data["author"] = self._getAuthor(author)
try:
self._logger.info("{0} : Search author : {1}".format(self._name, author))
page = self._request.get("{1}://{0}/wp-json/wp/v2/users".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
self._logger.debug("{0} : End Search author : {1}".format(self._name, author))
self._logger.debug("{0} : Debug requests : {1}".format(self._name, page.content))
if page.status_code == 200:
self._logger.info("{0} : Get author id : {1}".format(self._name, result))
result = page.json()
for a in result:
data["author"] = a["id"]
else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err))
page_is_exist = False page_is_exist = False
for index in range(1,10): for index in range(1,10):

394
lib/WPMenu.py Normal file
View File

@@ -0,0 +1,394 @@
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPMenu:
# Constructor
def __init__(self, name="Thread-0", basic=None, canalblog="", wordpress="", logger=None, parser="html.parser", ssl_canalblog=True, ssl_wordpress=True):
self._name = name
self._basic = basic
self._canalblog = canalblog
self._wordpress = wordpress
self._logger = logger
self._parser = parser
self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
self._protocol_wordpress = "https"
self._protocol_canalblog = "https"
self._directory = "backup"
if ssl_wordpress is False:
self._protocol_wordpress = "http"
if ssl_canalblog is False:
self._protocol_canalblog = "http"
self._request_canalblog = requests.Session()
self._request_wordpress = requests.Session()
retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request_canalblog.mount('{0}://'.format(self._protocol_canalblog), HTTPAdapter(max_retries=retries))
self._request_wordpress.mount('{0}://'.format(self._protocol_wordpress), HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
print("{0} : Import finished for {1}".format(self._name, self._wordpress))
# Public method
## From file
def fromFile(self, files):
if os.path.exists(files):
with open(files, 'r') as f:
self._logger.info("{0} : File is being processed : {1}".format(self._name, files))
content = f.read()
self._menu(content)
else:
self._logger.error("{0} : File isn't exist : {1}".format(self._name, files))
## Get from URL
def fromUrl(self, canalblog):
self._canalblog = canalblog
try:
o = urlparse(canalblog)
o = o._replace(scheme=self._protocol_canalblog)
i = o.geturl().replace(":///", "://")
page = self._request_canalblog.get(i)
if page.status_code == 200:
self._logger.info("{0} : Page web is being processed : {1}".format(self._name, i))
self._menu(page.content)
else:
self._logger.error("{0} : index didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, canalblog, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, canalblog, err))
## replace caracter
def _replaceCaracter(self, title_rendered):
list_replace = {'&rsquo;': "'", '&#8211;': '-', '&#8230;': '...', '« ': '"', ' »': '"', '« ': '"', ' »': '"', '&#8217;': "'", '"&lsquo;': "'"}
for old, new in list_replace.items():
title_rendered = title_rendered.replace(old, new)
return title_rendered
def _getIdfromTitlePost(self, content):
idMenu = {"id":0, "type":"", "link":""}
soup = BeautifulSoup(content, self._parser)
articletitle = soup.find_all("h2", class_="articletitle")
if len(articletitle) > 0:
articletitle = articletitle[0].get_text()
search = "posts"
post_type = "post"
if len(articletitle) == 0:
articletitle = soup.find_all("div", class_="albumbody")
if len(articletitle) > 0:
articletitle = articletitle[0].find("h2").get_text()
search = "pages"
post_type = "page"
exist = False
for index in range(1,10):
if exist is False:
params = {"search":articletitle, "per_page":100, "page":index}
try:
self._logger.debug("{0} : Get Url for {3} : {1} {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/{2}".format(self._wordpress, self._protocol_wordpress, search), params, search))
page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/{2}".format(self._wordpress, self._protocol_wordpress, search), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Get content {2} : {1}".format(self._name, len(result), search))
if len(result) > 0:
for i in result:
title_rendered = i["title"]["rendered"]
if len(articletitle) != len(title_rendered):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : comparaison debug {1} {2}".format(self._name, articletitle, title_rendered))
if articletitle == title_rendered:
self._logger.debug("{0} : get {2} id : {1}".format(self._name, i, search))
idMenu = {"id":i["id"], "type":post_type, "link": i["link"]}
exist = True
else:
self._logger.debug("{0} : {2} {1}".format(self._name, result, len(result)))
break
elif page.status_code == 400:
self._logger.debug("{0} : {2} {1}".format(self._name, page.content, page.status_code))
break
else:
self._logger.error("{0} : Post didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get content : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get content : {1} ".format(self._name, err))
return idMenu
def _getIdFromPost(self, href):
idMenu = {"id":0, "type":"", "link":""}
o = urlparse(href)
if len(o.netloc) > 0:
try:
page = self._request_canalblog.get(href)
if page.status_code == 200:
self._logger.info("{0} : Get content : {1}".format(self._name, href))
idMenu = self._getIdfromTitlePost(page.content)
else:
self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, href))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, href, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, href, err))
else:
if os.path.exists("{0}/..{1}".format(self._directory, o.path)):
try:
content = open("{0}/..{1}".format(self._directory, o.path), "r")
idMenu = self._getIdfromTitlePost(content)
except Exception as err:
self._logger.error("{0} : Exception error for get file content {1} : {2}".format(self._name, href, err))
return idMenu
def _getIdFromReverse(self, title, href):
self._logger.info("{0} : get title {1} from href {2}".format(self._name, title, href))
idMenu = {"id":0, "type":"", "link":""}
if href != "#":
title = href[::-1]
second_title = title.split("/")[2]
second_title = second_title[::-1]
link = title.split("/")[0]
link = link[::-1]
title = title.split("/")[1]
title = title[::-1]
self._logger.info("{0} link {1} title {2}".format(self._name, link, title))
if link == "index.html":
if second_title == "albums":
idMenu = self._getIdFromPost(href)
else:
idMenu = self._getId(title)
else:
idMenu = self._getIdFromPost(href)
return idMenu
def _getId(self, title):
idMenu = {"id": 0, "type":"", "link":""}
exist = False
if exist is False:
for i in ["categories", "tags"]:
typeId = "category"
if i == "tags":
typeId = "tag"
for index in range(1,10):
try:
params = {"search":title, "per_page":"100", "page":index}
self._logger.info("{0} Get menu {1} {2} {3}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), index, title))
page = self._request_wordpress.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
if len(result) > 0:
for j in result:
self._logger.info("{0} info : {1} {2} {3}".format(self._name, j["name"], j["slug"], title))
if j["name"] == title or j["slug"] == title:
self._logger.info("{0} : comparaison ok : {1} {2}".format(self._name, j["id"], i))
idMenu = {"id": j["id"], "type": typeId, "link": j["link"]}
exist = True
else:
break
elif page.status_code == 400:
break
else:
self._logger.error("{0} : {2} didn't get due status code : {1}".format(self._name, page.status_code, i))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, "{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol_wordpress), err))
return idMenu
def _menu(self, content):
soup = BeautifulSoup(content, self._parser)
ul = soup.find("ul", id="listsmooth")
menu = list()
children = list()
for anchor in ul.find_all("li"):
parent = anchor.find("a").get_text().replace(" \xa0", "")
href = anchor.find("a").get("href")
if href == "{0}://{1}/".format(self._protocol_canalblog, self._canalblog):
parent = "home"
itemMenu = {"id":"", "type":"", "title": parent, "link":"", "href":href, "children":list()}
if href == "#":
li = anchor.find("ul").find_all("li")
for child in li:
a = child.find("a")
href = a.get("href")
self._logger.info("{0} Parent {1} : Child {2}".format(self._name, parent, a.get_text()))
children.append({"title": a.get_text(), "parent": parent, "href":href, "link":""})
menu.append(itemMenu)
for i in range(0, len(children)):
self._logger.info("{0} : Child {1} {2}".format(self._name, children[i], i))
for j in range(0, len(menu)):
if j < len(menu):
if menu[j]["title"] == children[i]["title"]:
self._logger.info("{0} : Parent {1} {2}".format(self._name, menu[j], j))
del menu[j]
for j in range(0, len(menu)):
self._logger.info("{0} : Children for : {1}".format(self._name, menu[j]["title"]))
if menu[j]["title"] == children[i]["parent"]:
menu[j]["children"].append({"id":"", "type":"", "title":children[i]["title"], "parent": children[i]["parent"], "link":"", "href":children[i]["href"]})
for i in range(0, len(menu)):
self._logger.info("{0} : Menu {1} {2}".format(self._name, menu[i]["title"], len(menu[i]["children"])))
if menu[i]["title"] != "home":
for j in range(0, len(menu[i]["children"])):
idMenu = self._getId(menu[i]["children"][j]["title"])
if idMenu["id"] == 0:
self._logger.debug("{0} : content children {1}".format(self._name, menu[i]["children"][j]))
idMenu = self._getIdFromReverse(menu[i]["children"][j]["title"], menu[i]["children"][j]["href"])
if idMenu["id"] != 0:
menu[i]["children"][j] = {"id":idMenu["id"], "type": idMenu["type"], "link": idMenu["link"], "title": menu[i]["children"][j]["title"], "parent": menu[i]["children"][j]["parent"]}
idMenu = self._getId(menu[i]["title"])
self._logger.debug("{0} : content parent {1}".format(self._name, menu[i]))
self._logger.debug("{0} : content idMenu {1}".format(self._name, idMenu))
if idMenu["id"] == 0:
idMenu = self._getIdFromReverse(menu[i]["title"], menu[i]["href"])
if idMenu["id"] != 0:
menu[i] = {"id":idMenu["id"], "type": idMenu["type"], "title":menu[i]["title"], "link":idMenu["link"], "children": menu[i]["children"]}
self._createMenu(menu)
def _createItemMenu(self, idMenu, itemMenu, parent):
idItemMenu = 0
self._logger.info("{0} : Create item menu from API Wordpress : {1}".format(self._name, self._wordpress))
try:
params = {"search": itemMenu["title"], "menus": idMenu}
page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
for i in result:
if self._replaceCaracter(i["title"]["rendered"]) == itemMenu["title"]:
idItemMenu = int(i["id"])
self._logger.info("{0} : Length of result for item menus : {1}".format(self._name, len(result)))
url = "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress)
if idItemMenu != 0:
url = "{1}://{0}/wp-json/wp/v2/menu-items/{2}".format(self._wordpress, self._protocol_wordpress, idItemMenu)
try:
objectt = itemMenu["type"]
if objectt == "tag":
objectt = "post_tag"
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "menus":idMenu, "url":"#"}
if itemMenu["title"] == "home":
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "menus":idMenu, "url":"{0}://{1}".format(self._protocol_wordpress, self._wordpress)}
if type(itemMenu["id"]) is str:
if len(itemMenu["id"]) > 0:
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "url": itemMenu["link"], "menus":idMenu, "object":objectt, "object_id":int(itemMenu["id"])}
elif type(itemMenu["id"]) is int:
data = {"title": itemMenu["title"], "status": "publish", "parent":parent, "url": itemMenu["link"], "menus":idMenu, "object":objectt, "object_id":itemMenu["id"]}
self._logger.debug("{0} : data for create/update : {1}".format(self._name, data))
page = self._request_wordpress.post(url, auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code in [201, 200]:
result = page.json()
idItemMenu = int(result["id"])
self._logger.info("{0} : create/update item menu : {1}".format(self._name, itemMenu["title"]))
else:
self._logger.error("{0} : Create menu items for {2} didn't get due status code : {1}".format(self._name, page.status_code, itemMenu["title"]))
self._logger.debug("{0} : {1} {2}".format(self._name, page.content, itemMenu))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create item menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for create item menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
else:
self._logger.error("{0} : Get menu items didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get item menus {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get item menus {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menu-items".format(self._wordpress, self._protocol_wordpress), err))
return idItemMenu
def _createMenu(self, menu):
title = "Menu {0}".format(self._wordpress)
self._logger.info("{0} : Create menu from API Wordpress : {1}".format(self._name, title))
try:
params = {"search": title}
page = self._request_wordpress.get("{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Get content menus : {1}".format(self._name, len(result)))
idMenu = 0
if len(result) == 0:
self._logger.info("{0} : Create menu : {1}".format(self._name, title))
data = {"name": title}
try:
page = self._request_wordpress.post("{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 201:
result = page.json()
self._logger.debug("{0} : Get menus : {1}".format(self._name, result))
if len(result) > 0:
idMenu = result["id"]
else:
self._logger.error("{0} : Post menu didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
else:
self._logger.debug("{0} : Get menus : {1}".format(self._name, result))
for i in result:
self._logger.debug("{0} : List menus : {1}".format(self._name, i))
if i["name"] == title:
idMenu = i["id"]
self._logger.info("{0} : Get ID menus : {1}".format(self._name, idMenu))
self._addItemMenu(menu, idMenu)
else:
self._logger.error("{0} : Get menu didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get menu {1} : {2}".format(self._name, "{1}://{0}/wp-json/wp/v2/menus".format(self._wordpress, self._protocol_wordpress), err))
def _addItemMenu(self, menu, idMenu):
self._logger.info("{0} : add item to menu : {1}".format(self._name, idMenu))
parent = 0
for i in menu:
parent = 0
self._logger.debug("{0} : debug create item menu : {1}".format(self._name, i))
parent = self._createItemMenu(idMenu, i, parent)
for j in i["children"]:
self._createItemMenu(idMenu, j, parent)