40 Commits
2.0.1 ... 3.1.0

Author SHA1 Message Date
6fba5f009a Merge pull request 'directory-file' (#21) from directory-file into master
Reviewed-on: #21
2023-06-28 21:30:20 +00:00
699cecad4f change with tmp files 2023-06-28 23:28:24 +02:00
9f87f38347 fix file tmp for directory 2023-06-28 23:03:27 +02:00
55d62cebfb separate files method 2023-06-27 14:48:48 +02:00
193b0e6ef7 add tmp files wip 2023-06-27 14:37:45 +02:00
b88917127d Merge pull request 'webpage-file' (#20) from webpage-file into master
Reviewed-on: #20
2023-06-26 22:28:26 +00:00
781d8959c4 fix tmp directory parameter 2023-06-27 00:25:23 +02:00
a67ff868f3 fix json read file 2023-06-26 23:52:03 +02:00
8e0abc40bd check files tmp 2023-06-26 23:09:54 +02:00
9149a6c5cb rollback webpage 2023-06-26 22:44:42 +02:00
d1b6e8048a add revert files json 2023-06-25 21:16:05 +02:00
0eab1d885b add open file tmp 2023-06-20 21:38:39 +02:00
35ff22d463 change parameter for webpage 2023-06-20 00:17:38 +02:00
7dace5bdb7 add file tmp 2023-06-19 23:58:59 +02:00
703cc8922a Merge pull request 'diff-img' (#19) from diff-img into master
Reviewed-on: #19
2023-06-16 22:08:50 +00:00
ff3ee301fb diff img path done 2023-06-15 00:10:44 +02:00
04da5bc5f6 diff path network 2023-06-13 22:00:51 +02:00
f01a69a1e7 Merge pull request 'wpchange' (#18) from wpchange into master
Reviewed-on: #18
2023-06-12 22:48:57 +00:00
da4db0277a add img a change 2023-06-13 00:46:18 +02:00
7228911e68 add js and css 2023-06-13 00:38:34 +02:00
9e7e1b27fd change WIP test 2023-06-11 20:24:22 +02:00
16368c13bb add WPChange 2023-06-10 01:58:08 +02:00
c631909cb6 WPchange wip 2023-06-06 00:22:16 +02:00
3e76892676 add wpchange 2023-06-05 23:46:57 +02:00
3e75f05340 Merge pull request 'add-parameter' (#17) from add-parameter into master
Reviewed-on: #17
2023-06-05 20:58:51 +00:00
e48b262d7e add parameter no-image 2023-06-03 09:07:33 +02:00
2f1c081823 add parameter 2023-06-01 15:28:48 +02:00
4bd6f5c038 Merge pull request 'add wait' (#16) from wait_remove into master
Reviewed-on: #16
2023-05-29 21:36:38 +00:00
d3a03e1cb3 add wait 2023-05-29 23:36:11 +02:00
f507efce60 Merge pull request 'replace-exception' (#15) from replace-exception into master
Reviewed-on: #15
2023-05-29 21:29:18 +00:00
75c9fa0ad3 fix if 2023-05-28 22:42:38 +02:00
110ccc4bb1 replace exception for wpexport 2023-05-28 22:42:04 +02:00
269a9e9ccd add replace exception import 2023-05-28 22:31:46 +02:00
4c0ec09d91 move exception 2023-05-28 22:07:43 +02:00
42cfb30583 Merge pull request 'remove-thread' (#14) from remove-thread into master
Reviewed-on: #14
2023-05-26 22:18:19 +00:00
c76b20e64a add remove multithread 2023-05-27 00:16:41 +02:00
aff69bfcbc add multithread for remove 2023-05-27 00:06:11 +02:00
fd426f150d add variable 2023-05-26 17:50:57 +02:00
e21721cac1 move exception 2023-05-26 17:44:28 +02:00
69504687ef add count 2023-05-26 16:38:19 +02:00
5 changed files with 933 additions and 517 deletions

View File

@@ -3,15 +3,34 @@ from requests.auth import HTTPBasicAuth
from getpass import getpass
from urllib.parse import urlparse
from concurrent import futures
from concurrent.futures import as_completed, wait
from concurrent.futures import as_completed, wait, ALL_COMPLETED
import argparse, logging, threading
import argparse, logging, threading, os, glob
from lib.WPImport import WPimport
from lib.WPExport import WPExport
from lib.WPRemove import WPRemove
from lib.WPChange import WPChange
def remove(args, basic, logger, ssl_wordpress):
removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress)
def errorRevert(logger, revert, tmp):
if revert is True:
files_tmp = glob.glob("{0}/*.json".format(tmp))
if len(files_tmp) == 0:
logger.error("Error revert, because files not found")
exit(1)
if len(files_tmp) != int(args.parallel):
for file_r in files_tmp:
os.remove(file_r)
logger.error("Error revert, because number files tmp is incompatible with parallel number")
exit(1)
def change(index, number, args, logger, tmp, revert):
changeWp = WPChange(logger=logger, index_name=index, number_thread=number, tmp=tmp)
changeWp.fromDirectory(args.directory, revert)
del changeWp
def remove(index, number, args, basic, logger, ssl_wordpress):
removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress, index_name=index, number_thread=number)
if args.remove == True:
for i in args.wordpress.split(","):
removeWp.setUrl(i)
@@ -33,21 +52,21 @@ def remove(args, basic, logger, ssl_wordpress):
del removeWp
def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog):
def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog, revert, tmp):
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread)
if not revert:
exportWp.getUrlPage(name_thread, max_thread)
for i in ["article", "page"]:
for j in ["publications", "principal"]:
if html is False:
exportWp.downloadHTML(webpage[j][i])
exportWp.downloadHTML(j, i)
if img is False:
exportWp.downloadImg(webpage[j][i])
exportWp.downloadImg(j, i)
del exportWp
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog):
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image, revert, tmp):
canalblog = canalblog.split(",")
wordpress = wordpress.split(",")
name = "Thread-{0}".format(int(name_thread) + 1)
@@ -63,14 +82,15 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
except Exception as err:
logger.error("{0} : parsing error : {1}".format(name, err))
exit(1)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog, tmp=tmp)
if not revert:
exportWp.getUrlPage(name_thread, max_thread)
del exportWp
for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp)
for k in ["article", "page"]:
for l in ["publications", "principal"]:
importWp.fromUrl(webpage[l][k])
importWp.fromUrl(l, k)
del importWp
else:
@@ -86,9 +106,10 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
logger.error("parsing error : {0}".format(err))
exit(1)
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread)
if not revert:
exportWp.getUrlPage(name_thread, max_thread)
del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image, tmp=tmp)
for k in ["article", "page"]:
for l in ["publications", "principal"]:
@@ -97,24 +118,24 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress):
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image, revert):
name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",")
wordpress = wordpress.split(",")
if serial is False:
for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
for j in directory:
importWp.fromDirectory(j, name_thread, max_thread)
importWp.fromDirectory(j, name_thread, max_thread, revert)
del importWp
else:
if len(directory) != len(wordpress):
logger.error("{0} : Error : Number directory is differant than wordpress".format(name))
logger.error("{0} : Error : Number directory is different than wordpress".format(name))
exit(1)
for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp.fromDirectory(directory[i])
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
importWp.fromDirectory(directory[i], name_thread, max_thread, revert)
del importWp
@@ -127,6 +148,8 @@ if __name__ == '__main__':
parser.add_argument("--parser", help="Parser content", default="html.parser")
parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1)
parser.add_argument("--no-ssl", help="No ssl for canalblog and/or wordpress (example wordpress,canalblog)", dest="ssl", default="")
parser.add_argument("--revert", help="Restart a work from stopping work", action="store_true")
parser.add_argument("--tmp", help="directory tmp", default="/tmp/import_export_canablog")
subparsers = parser.add_subparsers(dest="command")
@@ -143,6 +166,10 @@ if __name__ == '__main__':
import_parser.add_argument("--remove-categories", help="Remove all categories", dest="categories", action="store_true")
import_parser.add_argument("--remove-tags", help="Remove all tags", dest="tags", action="store_true")
import_parser.add_argument("--remove-media", help="Remove all media", dest="media", action="store_true")
import_parser.add_argument("--no-create", help="No create post", dest="create", default="store_false", action="store_true")
import_parser.add_argument("--no-update", help="No update post", dest="update", default="store_false", action="store_true")
import_parser.add_argument("--no-image", help="No image add or update", dest="image", default="store_false", action="store_true")
remove_parser = subparsers.add_parser("remove")
@@ -168,8 +195,15 @@ if __name__ == '__main__':
export_parser.add_argument("--no-img", help="No img", dest="img", action="store_true")
export_parser.add_argument("--no-html", help="No HTML", dest="html", action="store_true")
change_parser = subparsers.add_parser("change")
change_parser.add_argument("--directory",
default="",
help="Directory")
change_parser.add_argument("--file",
default="",
help="File")
args = parser.parse_args()
logger = logging.getLogger('import export canalblog')
@@ -204,6 +238,8 @@ if __name__ == '__main__':
fileHandler.setFormatter(formatter)
logger.addHandler(fileHandler)
os.makedirs(args.tmp, exist_ok=True)
if args.command == "import" or args.command == "remove":
password = args.password
if len(args.password) == 0:
@@ -221,23 +257,29 @@ if __name__ == '__main__':
importWp.setUrl(i)
importWp.fromFile(files=args.file.split(","))
if len(args.directory) > 0:
remove(args, basic, logger, ssl_wordpress)
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED)
errorRevert(logger, args.revert, args.tmp)
wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress)
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image, args.revert)
for i in range(0, int(args.parallel))
]
except Exception as err:
logger.error("Threading error : {0}".format(err))
if len(args.canalblog) > 0:
remove(args, basic, logger, ssl_wordpress)
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED)
errorRevert(logger, args.revert, args.tmp)
wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog)
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image, args.revert, args.tmp)
for i in range(0, int(args.parallel))
]
except Exception as err:
logger.error("Threading error : {0}".format(err))
exit(0)
@@ -271,7 +313,7 @@ if __name__ == '__main__':
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [
ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog)
ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog, args.revert, args.tmp)
for i in range(0, int(args.parallel))
]
except Exception as err:
@@ -280,5 +322,23 @@ if __name__ == '__main__':
if args.command == "remove":
remove(args, basic, logger, ssl_wordpress)
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
except Exception as err:
logger.error("Thread error for remove : {0}".format(err))
exit(0)
if args.command == "change":
if len(args.directory) > 0:
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
errorRevert(logger, args.revert, args.tmp)
wait_for = [ ex.submit(change, i, args.parallel, args, logger, args.tmp, args.revert) for i in range(0, int(args.parallel)) ]
except Exception as err:
logger.error("Thread error for remove : {0}".format(err))
if len(args.file) > 0:
changeWp = WPChange(logger=logger)
for filei in args.file.split(","):
changeWp.fromFile(filei)
exit(0)

173
lib/WPChange.py Normal file
View File

@@ -0,0 +1,173 @@
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
class WPChange:
# Constructor
def __init__(self, index_name=1, number_thread=1, logger=None, parser="html.parser", tmp="/tmp/import_export_canablog"):
self._name = "Thread-{0}".format(index_name)
self._logger = logger
self._number_thread = number_thread
self._parser = parser
self._tmp = tmp
self._index_name = index_name
# Destructor
def __del__(self):
print("{0} : Import finished".format(self._name))
# Public method
## from file
def fromFile(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i]))
self._change(files[i])
## From directory
def fromDirectory(self, directory="", revert=False):
self._directory = directory
directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0:
files = self._getFiles(directories)
if revert is False:
self._tmpFiles(files=files, number_thread=self._index_name, max_thread=self._number_thread)
self._fromFileTmp()
else:
self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files=[]):
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
self._change(files[i])
# Private method
def _fromFileTmp(self):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
files = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(files)))
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
self._change(files[i])
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def _tmpFiles(self, files=[], number_thread=1, max_thread=1):
print()
divFiles = int(len(files) / int(max_thread))
currentRangeFiles = int(divFiles * (int(number_thread)+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
webpage = []
for i in range(firstRange, currentRangeFiles):
webpage.append(files[i])
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
## Get all files
def _getFiles(self, item):
files = []
for i in item:
for j in os.listdir(i):
if os.path.isfile("{0}/{1}".format(i, j)):
files.append("{0}/{1}".format(i, j))
return files
## Get directories
def _getDirectories(self, subdirectory, item):
sub = subdirectory
for i in os.listdir(item):
if os.path.isdir("{0}/{1}".format(item, i)):
sub.append("{0}/{1}".format(item, i))
subdirectory = self._getDirectories(sub, "{0}/{1}".format(item, i))
return subdirectory
## Change path img file
def _change(self, file):
ext_img = ["png", "svg", "gif", "jpg", "jpeg"]
try:
with open(file, 'r') as f:
content = f.read()
soup = BeautifulSoup(content, self._parser)
img = soup.find_all("img")
for i in img:
src = i.get("src")
o = urlparse(src)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source image {1} /img/{2}/{3}".format(self._name, src, o.netloc, o.path))
content = content.replace(src, "/img/{0}/{1}".format(o.netloc, o.path))
script = soup.find_all("script", {"type": "text/javascript"})
for i in script:
src = i.get("src")
if src is not None:
o = urlparse(src)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source js {1} /dists/js/{2}/{3}".format(self._name, src, o.netloc, o.path))
content = content.replace(src, "/dists/js/{0}/{1}".format(o.netloc, o.path))
link = soup.find_all("link", {"rel": "stylesheet"})
for i in link:
href = i.get("href")
if href is not None:
o = urlparse(href)
if len(o.netloc) > 0:
self._logger.info("{0} : Change source css {1} /dists/css/{2}/{3}".format(self._name, href, o.netloc, o.path))
content = content.replace(href, "/dists/css/{0}/{1}".format(o.netloc, o.path))
a = soup.find_all("a", {"target": "_blank"})
for i in a:
href = i.get("href")
if href is not None:
o = urlparse(href)
if len(o.netloc) > 0:
ext = o.path.split(".")[len(o.path.split("."))-1]
if ext in ext_img:
self._logger.info("{0} : Change a img {1} /img/{2}/{3}".format(self._name, href, o.netloc, o.path))
content = content.replace(href, "/img/{0}/{1}".format(o.netloc, o.path))
try:
with open(file, "w") as f:
self._logger.info("{0} : File write : {1}".format(self._name, file))
f.write(content)
except Exception as ex:
self._logger.error("{0} : Error for write file {1} : {2}".format(self._name, file, ex))
except Exception as ex:
self._logger.error("{0} : Error for read file {1} : {2}".format(self._name, file, ex))

View File

@@ -1,12 +1,12 @@
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, argparse, logging
import requests, os, argparse, logging, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPExport:
def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True):
def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True, tmp="/tmp/import_export_canablog"):
self._url = url
self._logger = logger
self._parser = parser
@@ -22,7 +22,7 @@ class WPExport:
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
self._tmp = tmp
# Destructor
def __del__(self):
@@ -57,13 +57,13 @@ class WPExport:
# Download HTML
def downloadHTML(self, webpage):
self._downloadPage(webpage, self._dir)
def downloadHTML(self, first, second):
self._downloadPage(webpage[first][second], self._dir)
# Download Image
def downloadImg(self, webpage):
page_src = self._getImg(webpage)
def downloadImg(self, first, second):
page_src = self._getImg(webpage[first][second])
o = urlparse(self._url)
self._downloadPage(page_src, "{0}/{1}/{2}".format(self._dir, o.path, "img"))
@@ -72,20 +72,23 @@ class WPExport:
def getUrlPage(self, index_thread, max_thread):
try:
page = self._request.get(self._url)
except Exception as err:
page_url = []
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
ul = soup.find_all("ul", id="listsmooth")
for anchor in ul[0].find_all("a"):
href = anchor.get('href', '/')
if href != "#":
page_url.append(href)
else:
self._logger.error("{0} : URL did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
page_url = []
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
ul = soup.find_all("ul", id="listsmooth")
for anchor in ul[0].find_all("a"):
href = anchor.get('href', '/')
if href != "#":
page_url.append(href)
else:
self._logger.error("{0} : URL did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except Exception as err:
self._logger.error("{0} : Exception error : {1}".format(self._name, err))
webpage = {"principal": {"page":[], "article":[]}, "publications": {"page":[], "article":[]}}
for i in page_url:
@@ -97,64 +100,75 @@ class WPExport:
section = "principal"
try:
page = self._request.get(i)
except Exception as err:
if page.status_code == 200:
self._logger.info("{0} : page : {1}".format(self._name, i))
if i not in webpage[section]["page"]:
webpage[section]["page"].append(i)
soup = BeautifulSoup(page.text, self._parser)
class_div = soup.find_all("div", class_="pagingfirstline")
if len(class_div) > 0:
pagingfirstline = class_div[0].find_all("a")
if len(pagingfirstline) > 1:
lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/")
self._logger.debug("{0} : Last page {1}".format(self._name, lastpage))
element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
number_page = element_lastpage.split("-")[0].split("p")[1]
number_lastpage = int(number_page) / 10
setPageDivided = int(number_lastpage) / max_thread
if setPageDivided > int(setPageDivided):
setPageDivided = setPageDivided + 1
setPagePart = setPageDivided * (index_thread + 1)
firstPagePart = (setPagePart - setPageDivided)
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart)))
for j in range(int(firstPagePart),int(setPagePart)+1):
paging = j * 10
categorie = urlparse(i).path.split("/")
url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging)
if len(categorie) > 2:
url_paging = "{0}/archives/{1}/p{2}-10.html".format(self._url, categorie[2], paging)
self._logger.info("{0} : {1}".format(self._name, url_paging))
if url_paging not in webpage[section]["page"]:
webpage[section]["page"].append(url_paging)
page = self._request.get(url_paging)
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
h2 = soup.find_all("h2")
self._logger.debug("{0} : {1} H2 : {2}".format(self._name, url_paging, h2))
for title in h2:
self._logger.debug("{0} : {1} a : {2}".format(self._name, url_paging, title.find_all("a")))
href = title.find_all("a")[0].get("href", "/")
if href not in webpage[section]["article"]:
try:
o = urlparse(href)
o = o._replace(scheme="https").geturl()
webpage[section]["article"].append(o)
except Exception as err:
self._logger.error("parsing error : {0}".format(err))
exit(1)
else:
self._logger.error("{0} : web didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
self._logger.info("{0} : page : {1}".format(self._name, i))
if i not in webpage[section]["page"]:
webpage[section]["page"].append(i)
soup = BeautifulSoup(page.text, self._parser)
class_div = soup.find_all("div", class_="pagingfirstline")
if len(class_div) > 0:
pagingfirstline = class_div[0].find_all("a")
if len(pagingfirstline) > 1:
lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/")
self._logger.debug("{0} : Last page {1}".format(self._name, lastpage))
except Exception as err:
self._logger.error("{0} : Exception error : {1}".format(self._name, err))
exit(1)
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
number_page = element_lastpage.split("-")[0].split("p")[1]
number_lastpage = int(number_page) / 10
setPageDivided = int(number_lastpage) / max_thread
if setPageDivided > int(setPageDivided):
setPageDivided = setPageDivided + 1
setPagePart = setPageDivided * (index_thread + 1)
firstPagePart = (setPagePart - setPageDivided)
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart)))
for j in range(int(firstPagePart),int(setPagePart)+1):
paging = j * 10
categorie = urlparse(i).path.split("/")
url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging)
if len(categorie) > 2:
url_paging = "{0}/archives/{1}/p{2}-10.html".format(self._url, categorie[2], paging)
self._logger.info("{0} : {1}".format(self._name, url_paging))
if url_paging not in webpage[section]["page"]:
webpage[section]["page"].append(url_paging)
page = self._request.get(url_paging)
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
h2 = soup.find_all("h2")
self._logger.debug("{0} : {1} H2 : {2}".format(self._name, url_paging, h2))
for title in h2:
self._logger.debug("{0} : {1} a : {2}".format(self._name, url_paging, title.find_all("a")))
href = title.find_all("a")[0].get("href", "/")
if href not in webpage[section]["article"]:
try:
o = urlparse(href)
o = o._replace(scheme="https").geturl()
except Exception as err:
self._logger.error("parsing error : {0}".format(err))
exit(1)
webpage[section]["article"].append(o)
else:
self._logger.error("{0} : web didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
return webpage
@@ -183,47 +197,52 @@ class WPExport:
def _getScriptCss(self, js, css):
try:
page = self._request.get(self._url)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
page_url = []
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
if js is True:
script = soup.find_all("script")
for anchor in script:
src = anchor.get("src", "/")
if src != "/":
try:
u = urlparse(self._url)
o = urlparse(src)
except Exception as err:
self._logger.error("parsing error : {0}".format(err))
exit(1)
if o.netloc == "":
o = o._replace(netloc=u.netloc)
o = o._replace(scheme=u.scheme)
page_url.append(o.geturl())
if css is True:
link = soup.find_all("link")
for anchor in link:
rel = anchor.get("rel")
if rel[0] == "stylesheet":
href = anchor.get("href", "/")
if href != "/":
page_url = []
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
if js is True:
script = soup.find_all("script")
for anchor in script:
src = anchor.get("src", "/")
if src != "/":
try:
u = urlparse(self._url)
o = urlparse(href)
o = urlparse(src)
if o.netloc == "":
o = o._replace(netloc=u.netloc)
o = o._replace(scheme=u.scheme)
page_url.append(o.geturl())
except Exception as err:
self._logger.error("parsing error : {0}".format(err))
exit(1)
if o.netloc == "":
o = o._replace(netloc=u.netloc)
o = o._replace(scheme=u.scheme)
page_url.append(o.geturl())
else:
self._logger.error("JS or CSS did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
if css is True:
link = soup.find_all("link")
for anchor in link:
rel = anchor.get("rel")
if rel[0] == "stylesheet":
href = anchor.get("href", "/")
if href != "/":
try:
u = urlparse(self._url)
o = urlparse(href)
if o.netloc == "":
o = o._replace(netloc=u.netloc)
o = o._replace(scheme=u.scheme)
page_url.append(o.geturl())
except Exception as err:
self._logger.error("parsing error : {0}".format(err))
exit(1)
else:
self._logger.error("JS or CSS did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
except ConnectionError as err:
self._logger.error("Connection error : {0}".format(err))
exit(1)
except Exception as err:
self._logger.error("Exception error : {0}".format(err))
return page_url
@@ -234,22 +253,25 @@ class WPExport:
for i in webpage:
try:
page = self._request.get(i)
except Exception as err:
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
img = soup.find_all("img")
self._logger.info("{0} : image from page: {1} : ".format(self._name,i))
for anchor in img:
src = anchor.get("src", "/")
if src != "/":
if src not in page_img:
self._logger.info("{0} : image: {1} : ".format(self._name, src))
page_img.append(src)
else:
self._logger.error("{0} : Image did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
img = soup.find_all("img")
self._logger.info("{0} : image from page: {1} : ".format(self._name,i))
for anchor in img:
src = anchor.get("src", "/")
if src != "/":
if src not in page_img:
self._logger.info("{0} : image: {1} : ".format(self._name, src))
page_img.append(src)
else:
self._logger.error("{0} : Image did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except Exception as err:
self._logger.error("{0} : Exception error : {1}".format(self._name, err))
return page_img
@@ -260,31 +282,33 @@ class WPExport:
for i in range(0, len(webpage)):
try:
o = urlparse(webpage[i])
path_web = o.path.split("/")
filePageWeb = path_web[len(path_web)-1]
path_web.pop(len(path_web)-1)
dir_page_web = "/".join(path_web)
self._mkdirPath("{0}/{1}/{2}".format(backup_dir, o.netloc, dir_page_web))
try:
r = self._request.get(webpage[i])
if r.status_code == 200:
fileDownload = "{0}/{1}/index.html".format(backup_dir, o.netloc)
if len(dir_page_web) > 0 and len(filePageWeb) > 0:
fileDownload = "{0}/{1}{2}/{3}".format(backup_dir, o.netloc, dir_page_web, filePageWeb)
self._logger.info("{0} : {1}/{2} : {3}".format(self._name, i+1, len(webpage), fileDownload))
try:
open(fileDownload, "wb").write(r.content)
except Exception as err:
self._logger.error("file error : {0}".format(err))
exit(1)
else:
self._logger.error("Not download due status code : {0}".format(r.status_code))
self._logger.debug(r.content)
except ConnectionError as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} Exception error : {1}".format(self._name, err))
except Exception as err:
self._logger.error("parsing error : {0}".format(err))
exit(1)
path_web = o.path.split("/")
filePageWeb = path_web[len(path_web)-1]
path_web.pop(len(path_web)-1)
dir_page_web = "/".join(path_web)
self._mkdirPath("{0}/{1}/{2}".format(backup_dir, o.netloc, dir_page_web))
try:
r = self._request.get(webpage[i])
except ConnectionError as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} Exception error : {1}".format(self._name, err))
if r.status_code == 200:
fileDownload = "{0}/{1}/index.html".format(backup_dir, o.netloc)
if len(dir_page_web) > 0 and len(filePageWeb) > 0:
fileDownload = "{0}/{1}{2}/{3}".format(backup_dir, o.netloc, dir_page_web, filePageWeb)
self._logger.info("{0} : {1}/{2} : {3}".format(self._name, i+1, len(webpage), fileDownload))
try:
open(fileDownload, "wb").write(r.content)
except Exception as err:
self._logger.error("file error : {0}".format(err))
exit(1)
else:
self._logger.error("Not download due status code : {0}".format(r.status_code))
self._logger.debug(r.content)

View File

@@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport:
# Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True):
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False, tmp="/tmp/import_export_canablog"):
self._name = name
self._basic = basic
self._wordpress = wordpress
@@ -16,6 +16,7 @@ class WPimport:
self._parser = parser
self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
self._protocol = "https"
self._directory = "backup"
if ssl_wordpress is False:
self._protocol = "http"
self._request = requests.Session()
@@ -24,7 +25,10 @@ class WPimport:
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
self._no_create = no_create
self._no_update = no_update
self._no_image = no_image
self._tmp = tmp
# Destructor
def __del__(self):
@@ -35,48 +39,58 @@ class WPimport:
def setUrl(self, wordpress):
self._wordpress = wordpress
def fromUrl(self, webpage):
for i in range(0, len(webpage)):
try:
r = self._request.get(webpage[i])
except Exception as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
if r.status_code == 200:
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
soup = BeautifulSoup(r.content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
else:
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
def fromUrl(self, first, second):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
webpage_content = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(webpage_content)))
webpage = webpage_content[first][second]
for i in range(0, len(webpage)):
try:
r = self._request.get(webpage[i])
if r.status_code == 200:
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
soup = BeautifulSoup(r.content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
del webpage_content[first][second][i]
webpage_content = json.dumps(webpage_content)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(webpage_content)
else:
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except IOError as err:
self._logger.error("{0} : Connection error for IO url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get url {1} : {2}".format(self._name, webpage[i], err))
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def fromDirectory(self, directory="", number_thread=1, max_thread=1):
def fromDirectory(self, directory="", number_thread=1, max_thread=1, revert=False):
self._directory = directory
directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory))
if len(directories) > 0:
files = self._getFiles(directories)
self.fromFile(files, number_thread, max_thread)
if revert is False:
self._tmpFiles(files=files, number_thread=number_thread, max_thread=max_thread)
self._fromFileTmp()
else:
self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
def fromFile(self, files=[]):
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i]))
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
with open(files[i], 'r') as f:
content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
@@ -87,10 +101,52 @@ class WPimport:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
# Private method
def _fromFileTmp(self):
try:
with open("{0}/{1}.json".format(self._tmp, self._name)) as file:
files = json.loads(file.read())
self._logger.debug("{0} : size of webpage : {1}".format(self._name, len(files)))
for i in range(0, len(files)):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, len(files), files[i]))
with open(files[i], 'r') as f:
content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
soup = BeautifulSoup(content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
self._addOrUpdateFeaturedMedia(soup)
except Exception as ex:
self._logger.error("{0} : Read file json from tmp : {1}".format(self._name, ex))
def _tmpFiles(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
webpage = []
for i in range(firstRange, currentRangeFiles):
webpage.append(files[i])
try:
string_webpage = json.dumps(webpage)
open("{0}/{1}.json".format(self._tmp, self._name), "wt").write(string_webpage)
except Exception as ex:
self._logger.error("{0} : Error for writing webpage : {1}".format(self._name, ex))
## replace caracter
def _replaceCaracter(self, title_rendered):
@@ -137,60 +193,65 @@ class WPimport:
params = {"search":h2, "type":"post"}
try:
page = self._request.get("{1}://{0}/wp-json/wp/v2/search".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err:
if page.status_code == 200:
result = page.json()
if len(result) > 0:
if h2 == result[0]["title"]:
img = i.find_all("img")
if len(img) > 0:
img_src = img[0].get("src")
try:
page = self._request.get(img_src)
if page.status_code == 200:
name_img = img_src.replace("_q", "")
name_img = name_img.split("/")[len(name_img.split("/"))-1]
params = {"search": name_img}
try:
page = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
if page.status_code == 200:
res = page.json()
if len(res) > 0:
id_media = res[0]["id"]
data = {"featured_media": id_media}
try:
r = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"], self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if r.status_code == 200:
self._logger.info("{0} : Add media featured : {1}".format(self._name, r.json()["title"]["raw"]))
else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for post media featured : {1}".format(self._name, err))
else:
self._logger.info("{0} : No media found for {1}".format(self._name, h2))
else:
self._logger.error("{0} : Connection error with status code for search featured media: {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error search featured media : {1}".format(self._name, err))
else:
self._logger.error("{0} : Connection error for get featured media with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get featured media : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get featured media : {1}".format(self._name, err))
else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
if len(result) > 0:
if h2 == result[0]["title"]:
img = i.find_all("img")
if len(img) > 0:
img_src = img[0].get("src")
try:
page = self._request.get(img_src)
except Exception as err:
self._logger.error("{0} : Connection error for get featured media : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
name_img = img_src.replace("_q", "")
name_img = name_img.split("/")[len(name_img.split("/"))-1]
params = {"search": name_img}
try:
page = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err:
self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
res = page.json()
if len(res) > 0:
id_media = res[0]["id"]
data = {"featured_media": id_media}
try:
r = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"], self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err:
self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err))
exit(1)
if r.status_code == 200:
self._logger.info("{0} : Add media featured : {1}".format(self._name, r.json()["title"]["raw"]))
else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
else:
self._logger.info("{0} : No media found for {1}".format(self._name, h2))
else:
self._logger.error("{0} : Connection error with status code for search featured media: {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("{0} : Connection error for get featured media with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except Exception as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
## Association image to post
@@ -199,14 +260,16 @@ class WPimport:
data = {"post": post_id}
try:
r = self._request.post("{2}://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, i["id"], self._protocol), auth=self._basic, data=data)
except Exception as err:
if r.status_code == 200:
self._logger.info("{0} : Link image to post {1}".format(self._name, title))
else:
self._logger.error("{0} Connection error with status code for link image to post : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for link image to post : {1}".format(self._name, err))
exit(1)
if r.status_code == 200:
self._logger.info("{0} : Link image to post {1}".format(self._name, title))
else:
self._logger.error("{0} Connection error with status code for link image to post : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except Exception as err:
self._logger.error("{0} : Exception error for link image to post : {1}".format(self._name, err))
## Add or update img
@@ -226,49 +289,57 @@ class WPimport:
params = { "search": img_name}
try:
r = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err:
self._logger.debug("{0} : Search for image {1} and his status code {2}".format(self._name, img_name, r.status_code))
if r.status_code == 200:
res = r.json()
self._logger.debug("{0} : Number of image in search : {1}".format(self._name, len(res)))
if len(res) > 0:
params = {"force":1}
try:
r = self._request.delete("{2}://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"], self._protocol), auth=self._basic, params=params)
if r.status_code == 200:
self._logger.info("{0} : Image removed {1}".format(self._name, img_name))
else:
self._logger.error("{0} : Image {1} not removed due status code : {2}".format(self._name, img_name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} Connection error for delete image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} Exception error for delete image : {1}".format(self._name, err))
data = page.content
img_type = "image/{0}".format(img_type_file)
if img_type_file == "jpg":
img_type = "image/jpeg"
headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)}
try:
r = self._request.post("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, headers=headers, data=data)
if r.status_code == 201:
self._logger.info("{0} : Image added {1}".format(self._name, img_name))
res = r.json()
media["id"] = res["id"]
media["rendered"] = res["guid"]["rendered"]
else:
self._logger.error("{0} : Image {1}.{2} not added due status code : {3}".format(self._name, img_name, img_type, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for add image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for add image : {1}".format(self._name, err))
exit(1)
else:
self._logger.error("{0} : Connection error for search image with status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Search for image {1} and his status code {2}".format(self._name, img_name, r.status_code))
if r.status_code == 200:
res = r.json()
self._logger.debug("{0} : Number of image in search : {1}".format(self._name, len(res)))
if len(res) > 0:
params = {"force":1}
try:
r = self._request.delete("{2}://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"], self._protocol), auth=self._basic, params=params)
except Exception as err:
self._logger.error("{0} Connection error for delete image : {1}".format(self._name, err))
exit(1)
if r.status_code == 200:
self._logger.info("{0} : Image removed {1}".format(self._name, img_name))
else:
self._logger.error("{0} : Image {1} not removed due status code : {2}".format(self._name, img_name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
data = page.content
img_type = "image/{0}".format(img_type_file)
if img_type_file == "jpg":
img_type = "image/jpeg"
headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)}
try:
r = self._request.post("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, headers=headers, data=data)
except Exception as err:
self._logger.error("{0} : Connection error for add image : {1}".format(self._name, err))
exit(1)
if r.status_code == 201:
self._logger.info("{0} : Image added {1}".format(self._name, img_name))
res = r.json()
media["id"] = res["id"]
media["rendered"] = res["guid"]["rendered"]
else:
self._logger.error("{0} : Image {1}.{2} not added due status code : {3}".format(self._name, img_name, img_type, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
else:
self._logger.error("{0} : Connection error for search image with status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except Exception as err:
self._logger.error("{0} : Exception error for search media : {1}".format(self._name, err))
return media
## Add or update comment
@@ -279,28 +350,34 @@ class WPimport:
try:
params = {"post": post, "author_name":i["author"], "date":i["date"]}
page = self._request.get("{1}://{0}/wp-json/wp/v2/comments".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err:
if page.status_code == 200:
result = page.json()
for j in result:
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"], self._protocol), params=params, auth=self._basic)
if page.status_code == 200:
self._logger.info("{0} : Comment deleted for {1}".format(self._name, title))
self._logger.debug("{0} : Comment deleted : {1}".format(self._name, j))
else:
self._logger.error("{0} : Comment not deleted for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for delete comment : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for delete comment : {1}".format(self._name, err))
else:
self._logger.error("{0} : Comment not listed for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for search comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
for j in result:
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"], self._protocol), params=params, auth=self._basic)
except Exception as err:
self._logger.error("{0} : Connection error for delete comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
self._logger.info("{0} : Comment deleted for {1}".format(self._name, title))
self._logger.debug("{0} : Comment deleted : {1}".format(self._name, j))
else:
self._logger.error("{0} : Comment not deleted for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("{0} : Comment not listed for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except Exception as err:
self._logger.error("{0} : Exception error for search comment : {1}".format(self._name, err))
for i in comment:
data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"], "status": "approved"}
@@ -310,28 +387,35 @@ class WPimport:
params = {"post": post, "author_name":comment[parent_id]["author"], "date":comment[parent_id]["date"]}
try:
page = self._request.get("{1}://{0}/wp-json/wp/v2/comments".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err:
if page.status_code == 200:
result = page.json()
if len(result) > 0:
data["parent"]=result[0]["id"]
else:
self._logger.error("{0} : Connection error for parent comment with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for parent comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
if len(result) > 0:
data["parent"]=result[0]["id"]
else:
self._logger.error("{0} : Connection error for parent comment with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except Exception as err:
self._logger.error("{0} : Exception error for parent comment : {1}".format(self._name, err))
try:
page = self._request.post("{1}://{0}/wp-json/wp/v2/comments".format(self._wordpress, self._protocol), auth=self._basic, data=data)
except Exception as err:
if page.status_code == 201:
self._logger.info("{0} : Comment added for {1}".format(self._name, title))
self._logger.debug("{0} : Data : {1}".format(self._name, data))
else:
self._logger.error("{0} : Comment not added for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for add comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 201:
self._logger.info("{0} : Comment added for {1}".format(self._name, title))
self._logger.debug("{0} : Data : {1}".format(self._name, data))
else:
self._logger.error("{0} : Comment not added for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except Exception as err:
self._logger.error("{0} : Exception error for add comment : {1}".format(self._name, err))
exit(1)
## Check class name
@@ -405,41 +489,64 @@ class WPimport:
img_a = articlebody[0].find_all("a", {"target": "_blank"})
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
list_img = []
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
page_img = self._request.get(href_img)
except Exception as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
if page_img.status_code == 404:
href_img = href_a
if self._no_image is False:
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
href_a_o = urlparse(href_a)
href_img_o = urlparse(href_img)
new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
page_img = self._request.get(href_a)
except Exception as err:
if len(href_img_o.netloc) > 0:
img_ok = False
page_img = self._request.get(href_img)
if page_img.status_code == 404:
href_img = href_a
try:
page_img = self._request.get(href_a)
if page_img.status_code == 200:
img_ok = True
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
else:
if os.path.exists("{0}/..{1}".format(self._directory, href_img)):
page_img = open("{0}/..{1}".format(self._directory, href_img), "r")
img_ok = True
else:
if os.path.exists("{0}/..{1}".format(self._directory, href_a)):
page_img = open("{0}/..{1}".format(self._directory, href_a), "r")
img_ok = True
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if img_ok is True:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if href_img != href_a:
media=self._addOrUpdateMedia(href_a, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if href_img != href_a:
media=self._addOrUpdateMedia(href_a, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
comment_post = self._getComment(comment)
@@ -462,36 +569,36 @@ class WPimport:
try:
params = {"search":title_element, "per_page":"100", "page":index}
page = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol), auth=self._basic, params=params)
if page.status_code == 200:
result = page.json()
self._logger.debug("{0} : content {3} {2} : {1}".format(self._name, result, title_element, i))
if len(result) > 0:
for k in result:
title_rendered = k["name"]
self._logger.debug("{0} : content {2} : {1}".format(self._name, title_rendered, i))
self._logger.debug("{0} : size of content {3} : {2} - {1}".format(self._name, len(title_rendered), len(title_element), i))
if len(title_element) != len(title_rendered):
title_rendered = self._replaceCaracter(title_rendered)
if title_element == title_rendered:
self._logger.info("{0} : {1} found : {2}".format(self._name, i, title_rendered))
element_exist = True
listelement[i].append(k["id"])
else:
break
if page.status_code == 400:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for {1} : {2}".format(self._name, i, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for {1} : {2}".format(self._name, i, err))
if page.status_code == 200:
result = page.json()
self._logger.debug("{0} : content {3} {2} : {1}".format(self._name, result, title_element, i))
if len(result) > 0:
for k in result:
title_rendered = k["name"]
self._logger.debug("{0} : content {2} : {1}".format(self._name, title_rendered, i))
self._logger.debug("{0} : size of content {3} : {2} - {1}".format(self._name, len(title_rendered), len(title_element), i))
if len(title_element) != len(title_rendered):
title_rendered = self._replaceCaracter(title_rendered)
if title_element == title_rendered:
self._logger.info("{0} : {1} found : {2}".format(self._name, i, title_rendered))
element_exist = True
listelement[i].append(k["id"])
else:
break
if page.status_code == 400:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
self._logger.debug("{0} : Element {3} {2} is {1}".format(self._name, element_exist, title_element, i))
if element_exist == False:
data = {"name": title_element}
@@ -499,18 +606,19 @@ class WPimport:
self._logger.debug("{0} : Data : {1}".format(self._name, data))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 201:
self._logger.info("{0} : {1} created : {2}".format(self._name, i, j))
result = page.json()
listelement[i].append(result["id"])
else:
self._logger.error("{0} : {1} not added due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for post {1} : {2}".format(self._name, i, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for post {1} : {2}".format(self._name, i, err))
if page.status_code == 201:
self._logger.info("{0} : {1} created : {2}".format(self._name, i, j))
result = page.json()
listelement[i].append(result["id"])
else:
self._logger.error("{0} : {1} not added due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
title = articletitle[0].text
author = articleacreator[0].text.lower()
@@ -541,21 +649,19 @@ class WPimport:
page = self._request.get("{1}://{0}/wp-json/wp/v2/users".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
self._logger.debug("{0} : End Search author : {1}".format(self._name, author))
self._logger.debug("{0} : Debug requests : {1}".format(self._name, page.content))
if page.status_code == 200:
self._logger.info("{0} : Get author id : {1}".format(self._name, result))
result = page.json()
for a in result:
data["author"] = a["id"]
else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err))
if page.status_code == 200:
self._logger.info("{0} : Get author id : {1}".format(self._name, result))
result = page.json()
for a in result:
data["author"] = a["id"]
else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
page_is_exist = False
for index in range(1,10):
@@ -563,87 +669,90 @@ class WPimport:
try:
self._logger.info("{0} : Search post with index {2} : {1}".format(self._name, title, index))
page = self._request.get("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
if page.status_code == 200:
self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding))
page.encoding = "utf-8"
result = page.json()
if len(result) == 0:
break
self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result)))
count = 0
for i in result:
title_rendered = i["title"]["rendered"]
self._logger.info("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
if len(title_rendered) != len(title):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered)))
if title_rendered == title:
if self._no_update is False:
page_is_exist = True
post_id = i["id"]
count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
if page.status_code == 200:
self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else:
self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err))
if page.status_code == 200:
self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding))
page.encoding = "utf-8"
result = page.json()
if len(result) == 0:
break
self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result)))
count = 0
for i in result:
title_rendered = i["title"]["rendered"]
self._logger.info("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
if len(title_rendered) != len(title):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered)))
if title_rendered == title:
page_is_exist = True
post_id = i["id"]
count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
if page.status_code == 200:
self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else:
self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
if page_is_exist is False:
if page_is_exist is False and self._no_create is False:
try:
self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"]))
page = self._request.post("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 201:
result = page.json()
self._logger.info("{0} : Post added : {1}".format(self._name, result["title"]["raw"]))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("{0} : Post not added due status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for create post : {1}".format(self._name, err))
if page.status_code == 201:
result = page.json()
self._logger.info("{0} : Post added : {1}".format(self._name, result["title"]["raw"]))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("{0} : Post not added due status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))

View File

@@ -8,14 +8,15 @@ from requests.packages.urllib3.util.retry import Retry
class WPRemove:
# Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, ssl_wordpress=True):
self._name = name
def __init__(self, index_name=1, number_thread=1, basic=None, wordpress="", logger=None, ssl_wordpress=True):
self._basic = basic
self._wordpress = wordpress
self._logger = logger
self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'}
self._name = "Thread-{0}".format(index_name)
self._index_thread = index_name
self._protocol = "https"
self._number_thread = number_thread
if ssl_wordpress is False:
self._protocol = "http"
self._request = requests.Session()
@@ -32,6 +33,24 @@ class WPRemove:
# Public method
def _getCount(self, composant):
count = 0
try:
params = {"per_page":1}
self._logger.info("{0} : Get count {2} to remove for url : {1}".format(self._name, self._wordpress, composant))
r = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, composant, self._protocol), params=params, auth=self._basic, headers=self._headers_json)
if r.status_code == 200:
count = int(r.headers["X-WP-Total"])
else:
self._logger.error("{0} : Error for list to remove {1} due status code {2}".format(self._name, composant, r.status_code))
self._logger.debug("{0} : Content error for {1} : {2}".format(self._name, composant, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for list {1} to remove : {2}".format(self._name, composant, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for list {1} to remove : {2}".format(self._name, composant, err))
return count
def setUrl(self, wordpress):
self._wordpress = wordpress
@@ -50,29 +69,60 @@ class WPRemove:
# Private method
def _removeAll(self, composant):
params = {"per_page":100}
try:
self._logger.info("{0} : List {2} to remove for url : {1}".format(self._name, self._wordpress, composant))
count = self._getCount(composant)
self._logger.debug("{0} : Count for {1} : {2}".format(self._name, composant, count))
if count > 0:
self._logger.debug("{0} : Number thread for {1} : {2}".format(self._name, composant, self._number_thread))
page = count / int(self._number_thread)
self._logger.debug("{0} : Page for {1} : {2}".format(self._name, composant, page))
if page > int(page):
page = int(page) + 1
if page > 100:
page = 100
params = {"per_page":page, "page":self._index_thread}
self._logger.info("{0} : Params for {1} : {2}".format(self._name, composant, params))
try:
self._logger.info("{0} : List {2} to remove for url : {1}".format(self._name, self._wordpress, composant))
r = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, composant, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
except Exception as err:
self._logger.error("{0} : Connection error for list {1} to remove : {2}".format(self._name, composant, err))
if r.status_code == 200:
result = r.json()
if len(result) > 0:
for i in result:
self._logger.info("{0} : Remove {2} for url {1} : {3}".format(self._name, self._wordpress, composant, i["title"]["rendered"]))
params = {"force":1}
try:
r = self._request.delete("{3}://{0}/wp-json/wp/v2/{1}/{2}".format(self._wordpress, composant, i["id"], self._protocol), auth=self._basic, headers=self._headers_json , params=params)
if r.status_code == 200:
self._logger.info("{0} : Post removed for URL {1} {2} : {3}".format(self._name, self._wordpress, composant, i["title"]["rendered"]))
else:
self._logger.error("{0} : Connection error for post {1} {2} {3} with status code {4}".format(self._name, self._wordpress, composant, i["title"]["rendered"], r.status_code))
except Exception as err:
self._logger.error("{0} : Connection error for {1} remove : {2}".format(self._name, composant, err))
exit(1)
self._removeAll(composant)
else:
self._logger.error("{0} : Error for list to remove {1} due status code {2}".format(self._name, composant, r.status_code))
self._logger.debug("{0} : Content error for {1} : {2}".format(self._name, composant, r.content))
r = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, composant, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
if r.status_code == 200:
result = r.json()
if len(result) > 0:
for i in result:
is_delete = True
self._logger.info(i["slug"])
if i["slug"] == "non-classe":
is_delete = False
if is_delete is True:
if composant == "tags" or composant == "categories":
title = i["name"]
else:
title = i["title"]["rendered"]
self._logger.info("{0} : Remove {2} for url {1} : {3}".format(self._name, self._wordpress, composant, title))
params = {"force":1}
try:
r = self._request.delete("{3}://{0}/wp-json/wp/v2/{1}/{2}".format(self._wordpress, composant, i["id"], self._protocol), auth=self._basic, headers=self._headers_json , params=params)
if r.status_code == 200:
self._logger.info("{0} : Post removed for URL {1} {2} : {3}".format(self._name, self._wordpress, composant, title))
else:
self._logger.error("{0} : Connection error for post {1} {2} {3} with status code {4}".format(self._name, self._wordpress, composant, title, r.status_code))
except ConnectionError as err:
self._logger.error("{0} : Connection error for {1} remove : {2}".format(self._name, composant, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for {1} remove : {2}".format(self._name, composant, err))
self._removeAll(composant)
if r.status_code == 400:
self._logger.error("{0} : No content for {1} to remove : {2}".format(self._name, composant, r.status_code))
else:
self._logger.error("{0} : Error for list to remove {1} due status code {2}".format(self._name, composant, r.status_code))
self._logger.debug("{0} : Content error for {1} : {2}".format(self._name, composant, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for list {1} to remove : {2}".format(self._name, composant, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for list {1} to remove : {2}".format(self._name, composant, err))