19 Commits

Author SHA1 Message Date
fb59746fc0 Merge pull request 'https' (#13) from https into master
Reviewed-on: #13
2023-05-26 09:24:21 +00:00
5916cbff00 fix parameter 2023-05-26 10:04:36 +02:00
cd2fbd5372 add protocol https/http 2023-05-25 00:31:34 +02:00
f3b04f9459 update script backup 2023-05-24 23:34:03 +02:00
a400375e01 remove slugify import 2023-05-24 23:30:23 +02:00
351cb10f01 Merge pull request 'fix-media' (#12) from fix-media into master
Reviewed-on: #12
2023-05-23 14:47:07 +00:00
5c5dc707f5 fix headers search author 2023-05-23 16:46:07 +02:00
f69298179a reduce line code and add private method 2023-05-23 13:45:59 +02:00
d3ec7d147d loop replace 2023-05-23 11:22:37 +02:00
0fc6e78a18 fix title rendered 2023-05-23 00:02:51 +02:00
3718b807ba more message debug 2023-05-21 21:14:36 +02:00
75772ba7f0 remove doublon 2023-05-21 21:12:00 +02:00
769b7f43fc fix add or update post 2023-05-18 00:24:41 +02:00
ba42d56be1 fix webpage 2023-05-16 00:15:16 +02:00
d18f4e1579 Add clean 2023-05-15 23:51:45 +02:00
8bdaea3910 add remove command 2023-05-15 23:42:18 +02:00
f3cb5c4069 fix parameters 2023-05-15 23:22:41 +02:00
cfb24bed0e add remove parameters 2023-05-15 23:21:25 +02:00
ee8674fd59 add remove class 2023-05-15 23:13:55 +02:00
5 changed files with 358 additions and 152 deletions

View File

@@ -3,8 +3,8 @@
TAR=/usr/bin/tar TAR=/usr/bin/tar
PYTHON=/usr/bin/python3 PYTHON=/usr/bin/python3
GZIP=/usr/bin/gzip GZIP=/usr/bin/gzip
SCRIPTDIR=/home/valentin/script SCRIPTDIR=/home/valentin/script/webscrap
WEBSCRAP=${SCRIPTDIR}/web_scrap.py WEBSCRAP=${SCRIPTDIR}/import_export_canalblog.py
URL=www.clarissariviere.com URL=www.clarissariviere.com
DATE=$(date +%Y%m%d) DATE=$(date +%Y%m%d)
DIRECTORY=/home/valentin/backup DIRECTORY=/home/valentin/backup
@@ -24,7 +24,7 @@ else
fi fi
subject="${subject} ${URL} ${DATE}" subject="${subject} ${URL} ${DATE}"
echo > ${BACKUPDIR}/${LOGFILE} echo > ${BACKUPDIR}/${LOGFILE}
${PYTHON} ${WEBSCRAP} --url ${URL} --dir ${DIRECTORY} --quiet --logfile ${BACKUPDIR}/${LOGFILE} ${PYTHON} ${WEBSCRAP} --quiet --logfile ${BACKUPDIR}/${LOGFILE} --parallel 20 export --url ${URL} --directory ${DIRECTORY}
if [ ${?} -ne 0 ]; then if [ ${?} -ne 0 ]; then
subject="${subject} echoue : recuperation page" subject="${subject} echoue : recuperation page"
echo ${subject} | mail -s "${subject}" -A ${BACKUPDIR}/${LOGFILE} ${SENDER} echo ${subject} | mail -s "${subject}" -A ${BACKUPDIR}/${LOGFILE} ${SENDER}

View File

@@ -8,11 +8,33 @@ from concurrent.futures import as_completed, wait
import argparse, logging, threading import argparse, logging, threading
from lib.WPImport import WPimport from lib.WPImport import WPimport
from lib.WPExport import WPExport from lib.WPExport import WPExport
from lib.WPRemove import WPRemove
def remove(args, basic, logger, ssl_wordpress):
removeWp = WPRemove(basic=basic, wordpress="", logger=logger, ssl_wordpress=ssl_wordpress)
if args.remove == True:
for i in args.wordpress.split(","):
removeWp.setUrl(i)
removeWp.cleanPosts()
removeWp.cleanTags()
removeWp.cleanCategories()
removeWp.cleanMedia()
else:
for i in args.wordpress.split(","):
removeWp.setUrl(i)
if args.posts == True:
removeWp.cleanPosts()
if args.categories == True:
removeWp.cleanCategories()
if args.tags == True:
removeWp.cleanTags()
if args.media == True:
removeWp.cleanMedia()
del removeWp
def download(name_thread, max_thread, url, logger, parser, directory, html, img, ssl_canalblog):
def download(name_thread, max_thread, url, logger, parser, directory, html, img): exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory, ssl_canalblog=ssl_canalblog)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory)
webpage = exportWp.getUrlPage(name_thread, max_thread) webpage = exportWp.getUrlPage(name_thread, max_thread)
for i in ["article", "page"]: for i in ["article", "page"]:
for j in ["publications", "principal"]: for j in ["publications", "principal"]:
@@ -25,25 +47,27 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img)
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial): def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog):
canalblog = canalblog.split(",") canalblog = canalblog.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
protocol = "https"
if ssl_canalblog is False:
protocol = "http"
if serial is False: if serial is False:
for canal in canalblog: for canal in canalblog:
try: try:
o = urlparse(canal) o = urlparse(canal)
o = o._replace(scheme="https") o = o._replace(scheme=protocol)
url = o.geturl().replace(":///", "://") url = o.geturl().replace(":///", "://")
except Exception as err: except Exception as err:
logger.error("{0} : parsing error : {1}".format(name, err)) logger.error("{0} : parsing error : {1}".format(name, err))
exit(1) exit(1)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser) exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread) webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp del exportWp
for j in wordpress: for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
for k in ["article", "page"]: for k in ["article", "page"]:
for l in ["publications", "principal"]: for l in ["publications", "principal"]:
importWp.fromUrl(webpage[l][k]) importWp.fromUrl(webpage[l][k])
@@ -56,15 +80,15 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
for i in range(0, len(canalblog)-1): for i in range(0, len(canalblog)-1):
try: try:
o = urlparse(canalblog[i]) o = urlparse(canalblog[i])
o = o._replace(scheme="https") o = o._replace(scheme=protocol)
url = o.geturl().replace(":///", "://") url = o.geturl().replace(":///", "://")
except Exception as err: except Exception as err:
logger.error("parsing error : {0}".format(err)) logger.error("parsing error : {0}".format(err))
exit(1) exit(1)
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser) exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread) webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
for k in ["article", "page"]: for k in ["article", "page"]:
for l in ["publications", "principal"]: for l in ["publications", "principal"]:
@@ -73,13 +97,13 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
del importWp del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial): def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress):
name = "Thread-{0}".format(int(name_thread) + 1) name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",") directory = directory.split(",")
wordpress = wordpress.split(",") wordpress = wordpress.split(",")
if serial is False: if serial is False:
for i in wordpress: for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
for j in directory: for j in directory:
importWp.fromDirectory(j, name_thread, max_thread) importWp.fromDirectory(j, name_thread, max_thread)
del importWp del importWp
@@ -89,7 +113,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres
logger.error("{0} : Error : Number directory is differant than wordpress".format(name)) logger.error("{0} : Error : Number directory is differant than wordpress".format(name))
exit(1) exit(1)
for i in range(0, len(wordpress)-1): for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser) importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp.fromDirectory(directory[i]) importWp.fromDirectory(directory[i])
del importWp del importWp
@@ -102,6 +126,7 @@ if __name__ == '__main__':
parser.add_argument("--quiet", help="No console output", action="store_true") parser.add_argument("--quiet", help="No console output", action="store_true")
parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parser", help="Parser content", default="html.parser")
parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1) parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1)
parser.add_argument("--no-ssl", help="No ssl for canalblog and/or wordpress (example wordpress,canalblog)", dest="ssl", default="")
subparsers = parser.add_subparsers(dest="command") subparsers = parser.add_subparsers(dest="command")
@@ -113,7 +138,22 @@ if __name__ == '__main__':
import_parser.add_argument("--canalblog", help="URL Canalblog", default="") import_parser.add_argument("--canalblog", help="URL Canalblog", default="")
import_parser.add_argument("--wordpress", help="URL Wordpress", required=True) import_parser.add_argument("--wordpress", help="URL Wordpress", required=True)
import_parser.add_argument("--serial", help="Serial execution", action="store_true") import_parser.add_argument("--serial", help="Serial execution", action="store_true")
import_parser.add_argument("--remove", help="Remove all articles", action="store_true") import_parser.add_argument("--remove-all", dest="remove", help="Remove all", action="store_true")
import_parser.add_argument("--remove-posts", help="Remove all posts", dest="posts", action="store_true")
import_parser.add_argument("--remove-categories", help="Remove all categories", dest="categories", action="store_true")
import_parser.add_argument("--remove-tags", help="Remove all tags", dest="tags", action="store_true")
import_parser.add_argument("--remove-media", help="Remove all media", dest="media", action="store_true")
remove_parser = subparsers.add_parser("remove")
remove_parser.add_argument("--user", help="wordpress user", required=True)
remove_parser.add_argument("--password", help="password wordpress's user", default="")
remove_parser.add_argument("--wordpress", help="URL Wordpress", required=True)
remove_parser.add_argument("--all", dest="remove", help="Remove all (posts, media, tags, categories)", action="store_true")
remove_parser.add_argument("--posts", help="Remove all posts", action="store_true")
remove_parser.add_argument("--categories", help="Remove all categories", action="store_true")
remove_parser.add_argument("--tags", help="Remove all tags", action="store_true")
remove_parser.add_argument("--media", help="Remove all media", action="store_true")
@@ -134,6 +174,14 @@ if __name__ == '__main__':
logger = logging.getLogger('import export canalblog') logger = logging.getLogger('import export canalblog')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ssl_canalblog = True
ssl_wordpress = True
for i in args.ssl.split(","):
if i == "canalblog":
ssl_canalblog = False
if i == "wordpress":
ssl_wordpress = False
if args.quiet is False: if args.quiet is False:
ch = logging.StreamHandler() ch = logging.StreamHandler()
@@ -156,7 +204,7 @@ if __name__ == '__main__':
fileHandler.setFormatter(formatter) fileHandler.setFormatter(formatter)
logger.addHandler(fileHandler) logger.addHandler(fileHandler)
if args.command == "import": if args.command == "import" or args.command == "remove":
password = args.password password = args.password
if len(args.password) == 0: if len(args.password) == 0:
password = getpass() password = getpass()
@@ -165,34 +213,29 @@ if __name__ == '__main__':
exit(1) exit(1)
basic = HTTPBasicAuth(args.user, password) basic = HTTPBasicAuth(args.user, password)
if args.command == "import":
wordpress = args.wordpress.split(",") wordpress = args.wordpress.split(",")
importWp = WPimport(basic=basic, wordpress="", logger=logger, parser=args.parser) importWp = WPimport(basic=basic, wordpress="", logger=logger, parser=args.parser, ssl_wordpress=ssl_wordpress)
if len(args.file) > 0: if len(args.file) > 0:
for i in wordpress: for i in wordpress:
importWp.setUrl(i) importWp.setUrl(i)
importWp.fromFile(files=args.file.split(",")) importWp.fromFile(files=args.file.split(","))
if len(args.directory) > 0: if len(args.directory) > 0:
if args.remove: remove(args, basic, logger, ssl_wordpress)
for i in args.wordpress.split(","):
importWp.setUrl(i)
importWp.removeAll()
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial) ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
except Exception as err: except Exception as err:
logger.error("Threading error : {0}".format(err)) logger.error("Threading error : {0}".format(err))
if len(args.canalblog) > 0: if len(args.canalblog) > 0:
if args.remove: remove(args, basic, logger, ssl_wordpress)
for i in args.wordpress.split(","):
importWp.setUrl(i)
importWp.removeAll()
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial) ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
except Exception as err: except Exception as err:
@@ -202,11 +245,14 @@ if __name__ == '__main__':
if args.command == "export": if args.command == "export":
canalblog = args.url.split(",") canalblog = args.url.split(",")
exportWp = WPExport(logger=logger, parser=args.parser, directory=args.directory) protocol = "https"
if ssl_canalblog is False:
protocol = "http"
exportWp = WPExport(logger=logger, parser=args.parser, directory=args.directory, ssl_canalblog=ssl_canalblog)
for canal in canalblog: for canal in canalblog:
try: try:
o = urlparse(canal) o = urlparse(canal)
o = o._replace(scheme="https") o = o._replace(scheme=protocol)
url = o.geturl().replace(":///", "://") url = o.geturl().replace(":///", "://")
except Exception as err: except Exception as err:
logger.error("parsing error : {0}".format(err)) logger.error("parsing error : {0}".format(err))
@@ -225,11 +271,14 @@ if __name__ == '__main__':
try: try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex: with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [ wait_for = [
ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img) ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img, ssl_canalblog)
for i in range(0, int(args.parallel)) for i in range(0, int(args.parallel))
] ]
except Exception as err: except Exception as err:
logger.error("Threading error : {0}".format(err)) logger.error("Threading error : {0}".format(err))
exit(0)
if args.command == "remove":
remove(args, basic, logger, ssl_wordpress)
exit(0) exit(0)

View File

@@ -6,20 +6,22 @@ from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry
class WPExport: class WPExport:
def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup"): def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup", ssl_canalblog=True):
self._url = url self._url = url
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
self._dir = directory self._dir = directory
self._name = name self._name = name
self._protocol = "https"
if ssl_canalblog is False:
self._protocol = "http"
self._request = requests.Session() self._request = requests.Session()
retries = Retry(total=10, retries = Retry(total=10,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('http://', HTTPAdapter(max_retries=retries)) self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
# Destructor # Destructor
@@ -89,7 +91,7 @@ class WPExport:
for i in page_url: for i in page_url:
section = "publications" section = "publications"
o = urlparse(i) o = urlparse(i)
o = o._replace(scheme="https") o = o._replace(scheme=self._protocol)
i = o.geturl().replace(":///", "://") i = o.geturl().replace(":///", "://")
if i == "{0}/".format(self._url): if i == "{0}/".format(self._url):
section = "principal" section = "principal"
@@ -103,16 +105,20 @@ class WPExport:
if i not in webpage[section]["page"]: if i not in webpage[section]["page"]:
webpage[section]["page"].append(i) webpage[section]["page"].append(i)
soup = BeautifulSoup(page.text, self._parser) soup = BeautifulSoup(page.text, self._parser)
class_div = pagingfirstline = soup.find_all("div", class_="pagingfirstline") class_div = soup.find_all("div", class_="pagingfirstline")
if len(class_div) > 0: if len(class_div) > 0:
pagingfirstline = class_div[0].find_all("a") pagingfirstline = class_div[0].find_all("a")
if len(pagingfirstline) > 1: if len(pagingfirstline) > 1:
lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/") lastpage = pagingfirstline[len(pagingfirstline)-1].get("href", "/")
self._logger.debug("{0} : Last page {1}".format(self._name, lastpage))
element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1] element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
number_page = element_lastpage.split("-")[0].split("p")[1] number_page = element_lastpage.split("-")[0].split("p")[1]
number_lastpage = int(number_page) / 10 number_lastpage = int(number_page) / 10
setPageDivided = int(number_lastpage) / max_thread setPageDivided = int(number_lastpage) / max_thread
if setPageDivided > int(setPageDivided):
setPageDivided = setPageDivided + 1
setPagePart = setPageDivided * (index_thread + 1) setPagePart = setPageDivided * (index_thread + 1)
firstPagePart = (setPagePart - setPageDivided) firstPagePart = (setPagePart - setPageDivided)
@@ -120,7 +126,7 @@ class WPExport:
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart))) self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart))) self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart)))
for j in range(int(firstPagePart),int(setPagePart)): for j in range(int(firstPagePart),int(setPagePart)+1):
paging = j * 10 paging = j * 10
categorie = urlparse(i).path.split("/") categorie = urlparse(i).path.split("/")
url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging) url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging)
@@ -133,7 +139,9 @@ class WPExport:
if page.status_code == 200: if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser) soup = BeautifulSoup(page.text, self._parser)
h2 = soup.find_all("h2") h2 = soup.find_all("h2")
self._logger.debug("{0} : {1} H2 : {2}".format(self._name, url_paging, h2))
for title in h2: for title in h2:
self._logger.debug("{0} : {1} a : {2}".format(self._name, url_paging, title.find_all("a")))
href = title.find_all("a")[0].get("href", "/") href = title.find_all("a")[0].get("href", "/")
if href not in webpage[section]["article"]: if href not in webpage[section]["article"]:
try: try:
@@ -262,9 +270,11 @@ class WPExport:
self._mkdirPath("{0}/{1}/{2}".format(backup_dir, o.netloc, dir_page_web)) self._mkdirPath("{0}/{1}/{2}".format(backup_dir, o.netloc, dir_page_web))
try: try:
r = self._request.get(webpage[i]) r = self._request.get(webpage[i])
except Exception as err: except ConnectionError as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} Exception error : {1}".format(self._name, err))
if r.status_code == 200: if r.status_code == 200:
fileDownload = "{0}/{1}/index.html".format(backup_dir, o.netloc) fileDownload = "{0}/{1}/index.html".format(backup_dir, o.netloc)
if len(dir_page_web) > 0 and len(filePageWeb) > 0: if len(dir_page_web) > 0 and len(filePageWeb) > 0:

View File

@@ -8,20 +8,22 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport: class WPimport:
# Constructor # Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser"): def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True):
self._name = name self._name = name
self._basic = basic self._basic = basic
self._wordpress = wordpress self._wordpress = wordpress
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'} self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
self._protocol = "https"
if ssl_wordpress is False:
self._protocol = "http"
self._request = requests.Session() self._request = requests.Session()
retries = Retry(connect=10, read=10, redirect=5, retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2) status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('http://', HTTPAdapter(max_retries=retries)) self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
# Destructor # Destructor
@@ -86,37 +88,26 @@ class WPimport:
else: else:
self._addOrUpdateFeaturedMedia(soup) self._addOrUpdateFeaturedMedia(soup)
def removeAll(self):
params = {"per_page":100}
try:
self._logger.info("{0} : List posts to remove for url : {1}".format(self._name, self._wordpress))
r = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params, headers=self._headers_json)
except Exception as err:
self._logger.error("{0} : Connection error for list post to remove : {1}".format(self._name, err))
if r.status_code == 200:
result = r.json()
if len(result) > 0:
for i in result:
self._logger.info("{0} : Remove article for url {1} : {2}".format(self._name, self._wordpress, i["title"]["rendered"]))
params = {"force":1}
try:
r = self._request.delete("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, i["id"]), auth=self._basic, headers=self._headers_json , params=params)
if r.status_code == 200:
self._logger.info("{0} : Post removed for URL {1} : {2}".format(self._name, self._wordpress, i["title"]["rendered"]))
else:
self._logger.error("{0} : Connection error for post {1} with status code {2}".format(self._name, self._wordpress, i["title"]["rendered"]))
except Exception as err:
self._logger.error("{0} : Connection error for post remove : {1}".format(self._name, err))
exit(1)
self.removeAll()
else:
self._logger.error("{0} : Error for list to remove due status code {1}".format(self._name, r.status_code))
self._logger.debug("{0} : Content error : {1}".format(self._name, r.content))
# Private method # Private method
## replace caracter
def _replaceCaracter(self, title_rendered):
list_replace = {'’': "'", '–': '-', '…': '...', '« ': '"', ' »': '"', '« ': '"', ' »': '"', '’': "'", '"‘': "'"}
for old, new in list_replace.items():
title_rendered = title_rendered.replace(old, new)
return title_rendered
## remove space
def _removeSpace(self, title):
if title[len(title)-1] == " ":
title = title[:-1]
if title[0] == " ":
title = title[1:]
return title
## Get all files ## Get all files
def _getFiles(self, item): def _getFiles(self, item):
@@ -145,7 +136,7 @@ class WPimport:
h2 = i.find_all("h2")[0].text h2 = i.find_all("h2")[0].text
params = {"search":h2, "type":"post"} params = {"search":h2, "type":"post"}
try: try:
page = self._request.get("http://{0}/wp-json/wp/v2/search".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("{1}://{0}/wp-json/wp/v2/search".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error : {1}".format(self._name, err)) self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1) exit(1)
@@ -166,7 +157,7 @@ class WPimport:
name_img = name_img.split("/")[len(name_img.split("/"))-1] name_img = name_img.split("/")[len(name_img.split("/"))-1]
params = {"search": name_img} params = {"search": name_img}
try: try:
page = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err)) self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err))
exit(1) exit(1)
@@ -176,7 +167,7 @@ class WPimport:
id_media = res[0]["id"] id_media = res[0]["id"]
data = {"featured_media": id_media} data = {"featured_media": id_media}
try: try:
r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) r = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"], self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err))
exit(1) exit(1)
@@ -207,7 +198,7 @@ class WPimport:
for i in list_img: for i in list_img:
data = {"post": post_id} data = {"post": post_id}
try: try:
r = self._request.post("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, i["id"]), auth=self._basic, data=data) r = self._request.post("{2}://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, i["id"], self._protocol), auth=self._basic, data=data)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for link image to post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for link image to post : {1}".format(self._name, err))
exit(1) exit(1)
@@ -221,7 +212,7 @@ class WPimport:
## Add or update img ## Add or update img
def _addOrUpdateMedia(self, href_img, page): def _addOrUpdateMedia(self, href_img, page):
media_authorized = ["png", "jpg", "jpeg", "svg"] media_authorized = ["png", "jpg", "jpeg", "svg", "gif"]
media = {"id":"", "rendered":""} media = {"id":"", "rendered":""}
split_fileimg = href_img.split("/") split_fileimg = href_img.split("/")
img_name = split_fileimg[len(split_fileimg)-1] img_name = split_fileimg[len(split_fileimg)-1]
@@ -231,10 +222,10 @@ class WPimport:
self._logger.error("{0} : Element {1} is not image".format(self._name,img_name)) self._logger.error("{0} : Element {1} is not image".format(self._name,img_name))
is_img = False is_img = False
if is_img is True: if is_img is True:
self._logger.debug("{0} : Search for image {1} with URL {2}".format(self._name, img_name, "http://{0}/wp-json/wp/v2/media".format(self._wordpress))) self._logger.debug("{0} : Search for image {1} with URL {2}".format(self._name, img_name, "{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol)))
params = { "search": img_name} params = { "search": img_name}
try: try:
r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params) r = self._request.get("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err))
exit(1) exit(1)
@@ -245,7 +236,7 @@ class WPimport:
if len(res) > 0: if len(res) > 0:
params = {"force":1} params = {"force":1}
try: try:
r = self._request.delete("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"]), auth=self._basic, params=params) r = self._request.delete("{2}://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"], self._protocol), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("{0} Connection error for delete image : {1}".format(self._name, err)) self._logger.error("{0} Connection error for delete image : {1}".format(self._name, err))
exit(1) exit(1)
@@ -261,7 +252,7 @@ class WPimport:
img_type = "image/jpeg" img_type = "image/jpeg"
headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)} headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)}
try: try:
r = self._request.post("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, headers=headers, data=data) r = self._request.post("{1}://{0}/wp-json/wp/v2/media".format(self._wordpress, self._protocol), auth=self._basic, headers=headers, data=data)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for add image : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for add image : {1}".format(self._name, err))
exit(1) exit(1)
@@ -287,7 +278,7 @@ class WPimport:
try: try:
params = {"post": post, "author_name":i["author"], "date":i["date"]} params = {"post": post, "author_name":i["author"], "date":i["date"]}
page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("{1}://{0}/wp-json/wp/v2/comments".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for search comment : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for search comment : {1}".format(self._name, err))
exit(1) exit(1)
@@ -296,7 +287,7 @@ class WPimport:
for j in result: for j in result:
try: try:
params = {"force":1} params = {"force":1}
page = self._request.delete("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"]), params=params, auth=self._basic) page = self._request.delete("{2}://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"], self._protocol), params=params, auth=self._basic)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for delete comment : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for delete comment : {1}".format(self._name, err))
exit(1) exit(1)
@@ -318,7 +309,7 @@ class WPimport:
parent_id = int(i["parent_id"]) parent_id = int(i["parent_id"])
params = {"post": post, "author_name":comment[parent_id]["author"], "date":comment[parent_id]["date"]} params = {"post": post, "author_name":comment[parent_id]["author"], "date":comment[parent_id]["date"]}
try: try:
page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("{1}://{0}/wp-json/wp/v2/comments".format(self._wordpress, self._protocol), auth=self._basic, params=params)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for parent comment : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for parent comment : {1}".format(self._name, err))
exit(1) exit(1)
@@ -331,7 +322,7 @@ class WPimport:
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
try: try:
page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data) page = self._request.post("{1}://{0}/wp-json/wp/v2/comments".format(self._wordpress, self._protocol), auth=self._basic, data=data)
except Exception as err: except Exception as err:
self._logger.error("{0} : Connection error for add comment : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for add comment : {1}".format(self._name, err))
exit(1) exit(1)
@@ -465,33 +456,56 @@ class WPimport:
for i in liste: for i in liste:
for j in element[i]: for j in element[i]:
element_exist = False element_exist = False
title_element = self._removeSpace(j)
for index in range(1,10):
self._logger.info("{0} : search {1} with index {2} : {3}".format(self._name, i, index, title_element))
try: try:
params = {"params":j} params = {"search":title_element, "per_page":"100", "page":index}
page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, params=params) page = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol), auth=self._basic, params=params)
except Exception as err: except ConnectionError as err:
self._logger.error("{0} : Connection error for {1} : {2}".format(self._name, i, err)) self._logger.error("{0} : Connection error for {1} : {2}".format(self._name, i, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for {1} : {2}".format(self._name, i, err))
if page.status_code == 200: if page.status_code == 200:
element_exist = True
result = page.json() result = page.json()
listelement[i].append(result[0]["id"]) self._logger.debug("{0} : content {3} {2} : {1}".format(self._name, result, title_element, i))
if len(result) > 0:
for k in result:
title_rendered = k["name"]
self._logger.debug("{0} : content {2} : {1}".format(self._name, title_rendered, i))
self._logger.debug("{0} : size of content {3} : {2} - {1}".format(self._name, len(title_rendered), len(title_element), i))
if len(title_element) != len(title_rendered):
title_rendered = self._replaceCaracter(title_rendered)
if title_element == title_rendered:
self._logger.info("{0} : {1} found : {2}".format(self._name, i, title_rendered))
element_exist = True
listelement[i].append(k["id"])
else:
break
if page.status_code == 400:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else: else:
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code)) self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
self._logger.debug("{0} : Element {3} {2} is {1}".format(self._name, element_exist, title_element, i))
if element_exist is False: if element_exist == False:
data = {"name": j} data = {"name": title_element}
self._logger.debug("{0} : URL : {1} ".format("http://{1}/wp-json/wp/v2/{2}".format(self._name, self._wordpress, i))) self._logger.info("{0} : Create {1} : {2}".format(self._name, i, title_element))
self._logger.debug("{0} : data : {1}".format(self._name, data)) self._logger.debug("{0} : Data : {1}".format(self._name, data))
self._logger.debug("{0} : headers : {1}".format(self._name, self._headers_form))
try: try:
page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, headers=self._headers_json, data=data) page = self._request.post("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err: except ConnectionError as err:
self._logger.error("{0} : Connection error for post {1} : {2}".format(self._name, i, err)) self._logger.error("{0} : Connection error for post {1} : {2}".format(self._name, i, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for post {1} : {2}".format(self._name, i, err))
if page.status_code == 201: if page.status_code == 201:
self._logger.info("{0} : {1} created : {2}".format(self._name, i, j))
result = page.json() result = page.json()
listelement[i].append(result["id"]) listelement[i].append(result["id"])
else: else:
@@ -514,44 +528,92 @@ class WPimport:
bodyhtml = bodyhtml.replace(i["old_src"], o.path) bodyhtml = bodyhtml.replace(i["old_src"], o.path)
hour = articledate[0].text hour = articledate[0].text
time = dateheader[0].text.split(" ") time = dateheader[0].text.split(" ")
self._logger.debug("{0} : Title post : |{1}|".format(self._name, title))
title = self._removeSpace(title)
self._logger.debug("{0} : Rendered Title post : |{1}|".format(self._name, title))
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
params = {"search":author} self._logger.debug("{0} : Data for post : |{1}| : {2}" .format(self._name, title, data))
params = {"search":author, "per_page":100}
try: try:
self._logger.info("{0} : Get author : {1}".format(self._name, author)) self._logger.info("{0} : Search author : {1}".format(self._name, author))
page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("{1}://{0}/wp-json/wp/v2/users".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
except Exception as err: self._logger.debug("{0} : End Search author : {1}".format(self._name, author))
self._logger.debug("{0} : Debug requests : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get author : {1}".format(self._name, err))
if page.status_code == 200: if page.status_code == 200:
self._logger.info("{0} : Get author id : {1}".format(self._name, result))
result = page.json() result = page.json()
data["author"] = result[0]["id"] for a in result:
data["author"] = a["id"]
else: else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content)) self._logger.debug("{0} : {1}".format(page.content))
page_is_exist = False
params = {"search": title} for index in range(1,10):
params = {"search": title, "per_page":100, "page": index}
try: try:
page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params) self._logger.info("{0} : Search post with index {2} : {1}".format(self._name, title, index))
except Exception as err: page = self._request.get("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
except ConnectionError as err:
self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err))
exit(1) exit(1)
page_is_exist = True except Exception as err:
self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err))
if page.status_code == 200: if page.status_code == 200:
self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding))
page.encoding = "utf-8"
result = page.json() result = page.json()
if len(result) == 0: if len(result) == 0:
page_is_exist = False break
else: self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result)))
count = 0
for i in result: for i in result:
if i["title"]["rendered"] == title: title_rendered = i["title"]["rendered"]
self._logger.info("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
if len(title_rendered) != len(title):
title_rendered = self._replaceCaracter(title_rendered)
self._logger.debug("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered)))
if title_rendered == title:
page_is_exist = True
post_id = i["id"] post_id = i["id"]
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, result[0])) count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
if page.status_code == 200:
self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
try: try:
page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err: except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title)) self._logger.info("{0} : Post updated : {1}".format(self._name, title))
@@ -560,16 +622,23 @@ class WPimport:
else: else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else: else:
self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
if page_is_exist == False: if page_is_exist is False:
try: try:
page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"]))
except Exception as err: page = self._request.post("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except ConnectionError as err:
self._logger.error("{0} : Connection error for create post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for create post : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for create post : {1}".format(self._name, err))
if page.status_code == 201: if page.status_code == 201:
result = page.json() result = page.json()
self._logger.info("{0} : Post added : {1}".format(self._name, result["title"]["raw"])) self._logger.info("{0} : Post added : {1}".format(self._name, result["title"]["raw"]))

78
lib/WPRemove.py Normal file
View File

@@ -0,0 +1,78 @@
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPRemove:
# Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, ssl_wordpress=True):
self._name = name
self._basic = basic
self._wordpress = wordpress
self._logger = logger
self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'}
self._protocol = "https"
if ssl_wordpress is False:
self._protocol = "http"
self._request = requests.Session()
retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
print("{0} : Import finished for {1}".format(self._name, self._wordpress))
# Public method
def setUrl(self, wordpress):
self._wordpress = wordpress
def cleanPosts(self):
self._removeAll("posts")
def cleanTags(self):
self._removeAll("tags")
def cleanCategories(self):
self._removeAll("categories")
def cleanMedia(self):
self._removeAll("media")
# Private method
def _removeAll(self, composant):
params = {"per_page":100}
try:
self._logger.info("{0} : List {2} to remove for url : {1}".format(self._name, self._wordpress, composant))
r = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, composant, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
except Exception as err:
self._logger.error("{0} : Connection error for list {1} to remove : {2}".format(self._name, composant, err))
if r.status_code == 200:
result = r.json()
if len(result) > 0:
for i in result:
self._logger.info("{0} : Remove {2} for url {1} : {3}".format(self._name, self._wordpress, composant, i["title"]["rendered"]))
params = {"force":1}
try:
r = self._request.delete("{3}://{0}/wp-json/wp/v2/{1}/{2}".format(self._wordpress, composant, i["id"], self._protocol), auth=self._basic, headers=self._headers_json , params=params)
if r.status_code == 200:
self._logger.info("{0} : Post removed for URL {1} {2} : {3}".format(self._name, self._wordpress, composant, i["title"]["rendered"]))
else:
self._logger.error("{0} : Connection error for post {1} {2} {3} with status code {4}".format(self._name, self._wordpress, composant, i["title"]["rendered"], r.status_code))
except Exception as err:
self._logger.error("{0} : Connection error for {1} remove : {2}".format(self._name, composant, err))
exit(1)
self._removeAll(composant)
else:
self._logger.error("{0} : Error for list to remove {1} due status code {2}".format(self._name, composant, r.status_code))
self._logger.debug("{0} : Content error for {1} : {2}".format(self._name, composant, r.content))