Merge pull request 'thread' (#9) from thread into master

Reviewed-on: #9
This commit is contained in:
v4l3n71n 2023-05-01 20:05:02 +00:00
commit 3161a06459
4 changed files with 258 additions and 165 deletions

4
.gitignore vendored
View File

@ -1,4 +1,4 @@
backup/
backup1/
backup*/
wp-navigation
web_scrap.log
__pycache__/

View File

@ -2,17 +2,95 @@
from requests.auth import HTTPBasicAuth
from getpass import getpass
from urllib.parse import urlparse
import argparse, logging
from concurrent import futures
import argparse, logging, threading
from lib.WPImport import WPimport
from lib.WPExport import WPExport
def download(name_thread, max_thread, url, logger, parser, directory, html, img):
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory)
webpage = exportWp.getUrlPage(name_thread, max_thread)
if html is False:
exportWp.downloadHTML(webpage)
if args.img is False:
exportWp.downloadImg(webpage)
del exportWp
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial):
canalblog = canalblog.split(",")
wordpress = wordpress.split(",")
name = "Thread-{0}".format(int(name_thread) + 1)
if serial is False:
for canal in canalblog:
try:
o = urlparse(canal)
o = o._replace(scheme="https")
url = o.geturl().replace(":///", "://")
except Exception as err:
logger.error("{0} : parsing error : {1}".format(name, err))
exit(1)
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp
for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
importWp.fromUrl(webpage)
del importWp
else:
if len(canalblog) != len(wordpress):
logger.error("{0} : ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress".format(name))
exit(1)
for i in range(0, len(canalblog)-1):
try:
o = urlparse(canalblog[i])
o = o._replace(scheme="https")
url = o.geturl().replace(":///", "://")
except Exception as err:
logger.error("parsing error : {0}".format(err))
exit(1)
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser)
webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser)
importWp.fromUrl(webpage)
del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial):
name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",")
wordpress = wordpress.split(",")
if serial is False:
for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser)
for j in directory:
importWp.fromDirectory(j, name_thread, max_thread)
del importWp
else:
if len(directory) != len(wordpress):
logger.error("{0} : Error : Number directory is differant than wordpress".format(name))
exit(1)
for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser)
importWp.fromDirectory(directory[i])
del importWp
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--debug", help="Verbosity", action="store_true")
parser.add_argument("--logfile", help="Log file", default="")
parser.add_argument("--quiet", help="No console output", action="store_true")
parser.add_argument("--parser", help="Parser content", default="html.parser")
parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1)
subparsers = parser.add_subparsers(dest="command")
@ -76,64 +154,31 @@ if __name__ == '__main__':
if len(args.file) > 0:
for i in wordpress:
importWp.setUrl(i)
importWp.fromFile(args.file.split(","))
exit(0)
importWp.fromFile(files=args.file.split(","))
if len(args.directory) > 0:
directory = args.directory.split(",")
if args.serial is False:
for i in wordpress:
importWp.setUrl(i)
for j in directory:
importWp.fromDirectory(j)
else:
if len(directory) != len(wordpress):
logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress")
exit(1)
for i in range(0, len(wordpress)-1):
importWp.setUrl(wordpress[i])
importWp.fromDirectory(directory[i])
exit(0)
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial)
for i in range(0, int(args.parallel))
]
except Exception as err:
logger.error("Threading error : {0}".format(err))
if len(args.canalblog) > 0:
exportWp = WPExport("", logger, args.parser, args.directory)
canalblog = args.canalblog.split(",")
wordpress = args.wordpress.split(",")
if args.serial is False:
for canal in canalblog:
try:
o = urlparse(canal)
o = o._replace(scheme="https")
url = o.geturl().replace(":///", "://")
except Exception as err:
logger.error("parsing error : {0}".format(err))
exit(1)
exportWp.setUrl(url)
webpage = exportWp.getUrlPage()
for j in wordpress:
importWp.setUrl(j)
importWp.fromUrl(webpage)
else:
if len(canalblog) != len(wordpress):
logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress")
exit(1)
for i in range(0, len(canalblog)-1):
try:
o = urlparse(canalblog[i])
o = o._replace(scheme="https")
url = o.geturl().replace(":///", "://")
except Exception as err:
logger.error("parsing error : {0}".format(err))
exit(1)
exportWp.setUrl(url)
webpage = exportWp.getUrlPage()
importWp.setUrl(wordpress[i])
importWp.fromUrl(webpage)
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial)
for i in range(0, int(args.parallel))
]
except Exception as err:
logger.error("Threading error : {0}".format(err))
exit(0)
if args.command == "export":
canalblog = args.url.split(",")
exportWp = WPExport("", logger, args.parser, args.directory)
exportWp = WPExport(logger=logger, parser=args.parser, directory=args.directory)
for canal in canalblog:
try:
o = urlparse(canal)
@ -148,12 +193,17 @@ if __name__ == '__main__':
if args.css is False:
exportWp.downloadCss()
del exportWp
if args.html is False or args.img is False:
webpage = exportWp.getUrlPage()
if args.html is False:
exportWp.downloadHTML(webpage)
if args.img is False:
exportWp.downloadImg(webpage)
try:
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
wait_for = [
ex.submit(download, i, int(args.parallel), url, logger, args.parser, args.directory, args.html, args.img)
for i in range(0, int(args.parallel))
]
except Exception as err:
logger.error("Threading error : {0}".format(err))
exit(0)

View File

@ -6,11 +6,12 @@ from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPExport:
def __init__(self, url, logger, parser, directory):
def __init__(self, name = "Thread-0", url = "", logger = None, parser = "html.parser", directory = "backup"):
self._url = url
self._logger = logger
self._parser = parser
self._dir = directory
self._name = name
self._request = requests.Session()
@ -20,8 +21,17 @@ class WPExport:
self._request.mount('http://', HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
self._logger.info("{0} : Export finished for {1}".format(self._name, self._url))
# Public method
# Set name
def setName(self, name):
self._name = "Thread-{0}".format(int(name) + 1)
# Set URL
def setUrl(self, url):
@ -55,11 +65,11 @@ class WPExport:
# Get URL
def getUrlPage(self):
def getUrlPage(self, index_thread, max_thread):
try:
page = self._request.get(self._url)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
page_url = []
if page.status_code == 200:
@ -70,8 +80,8 @@ class WPExport:
if href != "#":
page_url.append(href)
else:
self._logger.error("Url did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : URL did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
webpage = []
@ -79,10 +89,10 @@ class WPExport:
try:
page = self._request.get(i)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
self._logger.info("page : {0}".format(i))
self._logger.info("{0} : page : {1}".format(self._name, i))
if i not in webpage:
webpage.append(i)
soup = BeautifulSoup(page.text, self._parser)
@ -94,13 +104,22 @@ class WPExport:
element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
number_page = element_lastpage.split("-")[0].split("p")[1]
number_lastpage = int(number_page) / 10
for j in range(1,int(number_lastpage)):
setPageDivided = int(number_lastpage) / max_thread
setPagePart = setPageDivided * (index_thread + 1)
firstPagePart = (setPagePart - setPageDivided)
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart)))
for j in range(int(firstPagePart),int(setPagePart)):
paging = j * 10
categorie = urlparse(i).path.split("/")
url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging)
if len(categorie) > 2:
url_paging = "{0}/archives/{1}/p{2}-10.html".format(self._url, categorie[2], paging)
self._logger.info(url_paging)
self._logger.info("{0} : {1}".format(self._name, url_paging))
if url_paging not in webpage:
webpage.append(url_paging)
page = self._request.get(url_paging)
@ -118,7 +137,7 @@ class WPExport:
exit(1)
webpage.append(o)
else:
self._logger.error("web didn't get due status code : {0}".format(page.status_code))
self._logger.error("{0} : web didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug(page.content)
return webpage
@ -135,7 +154,7 @@ class WPExport:
makedir.append(i)
repath = "/".join(makedir)
if not os.path.exists(repath):
self._logger.debug("Dossier crée : {0}".format(repath))
self._logger.debug("{0} : Dossier crée : {1}".format(self._name, repath))
try:
if len(repath) > 0:
os.mkdir(repath)
@ -201,21 +220,21 @@ class WPExport:
try:
page = self._request.get(i)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser)
img = soup.find_all("img")
self._logger.info("image from page: {0} : ".format(i))
self._logger.info("{0} : image from page: {1} : ".format(self._name,i))
for anchor in img:
src = anchor.get("src", "/")
if src != "/":
if src not in page_img:
self._logger.info("image: {0} : ".format(src))
self._logger.info("{0} : image: {1} : ".format(self._name, src))
page_img.append(src)
else:
self._logger.error("Image did not get due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Image did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
return page_img
@ -243,7 +262,7 @@ class WPExport:
fileDownload = "{0}/{1}/index.html".format(backup_dir, o.netloc)
if len(dir_page_web) > 0 and len(filePageWeb) > 0:
fileDownload = "{0}/{1}{2}/{3}".format(backup_dir, o.netloc, dir_page_web, filePageWeb)
self._logger.info("{0}/{1} : {2}".format(i+1, len(webpage), fileDownload))
self._logger.info("{0} : {1}/{2} : {3}".format(self._name, i+1, len(webpage), fileDownload))
try:
open(fileDownload, "wb").write(r.content)
except Exception as err:

View File

@ -8,7 +8,8 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport:
# Constructor
def __init__(self, basic, wordpress, logger, parser):
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser"):
self._name = name
self._basic = basic
self._wordpress = wordpress
self._logger = logger
@ -22,6 +23,11 @@ class WPimport:
self._request.mount('http://', HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
self._logger.info("{0} : Import finished for {1}".format(self._name, self._wordpress))
# Public method
def setUrl(self, wordpress):
@ -32,10 +38,10 @@ class WPimport:
try:
r = self._request.get(webpage[i])
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
exit(1)
if r.status_code == 200:
self._logger.info("({0}/{1} : Page en cours d'import : {2}".format(i+1, len(webpage), webpage[i]))
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
soup = BeautifulSoup(r.content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
if len(articlebody) > 0:
@ -43,25 +49,38 @@ class WPimport:
else:
self._addOrUpdateFeaturedMedia(soup)
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
self._logger.error("{0} : Connection error for get url {1} with status code : {2}".format(self._name, webpage[i], r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
def fromDirectory(self, directory):
def fromDirectory(self, directory="", number_thread=1, max_thread=1):
directory = "{0}/archives".format(directory)
directories = self._getDirectories([], "{0}".format(directory))
files = self._getFiles(directories)
self.fromFile(files)
if len(directories) > 0:
files = self._getFiles(directories)
self.fromFile(files, number_thread, max_thread)
else:
self._logger.error("{0} : No files for {1}".format(self._name, directory))
def fromFile(self, files):
for file in files:
if os.path.exists(file):
self._logger.info("Fichier en cours de traitement : {0}".format(file))
with open(file, 'r') as f:
def fromFile(self, files=[], number_thread=1, max_thread=1):
divFiles = int(len(files) / max_thread)
currentRangeFiles = int(divFiles * (number_thread+1))
firstRange = int(currentRangeFiles - divFiles)
self._logger.debug("{0} : index : {1}".format(self._name,number_thread))
self._logger.debug("{0} : first range : {1}".format(self._name,firstRange))
self._logger.debug("{0} : last range : {1}".format(self._name,currentRangeFiles))
for i in range(firstRange, currentRangeFiles):
if os.path.exists(files[i]):
self._logger.info("{0} : ({1}/{2}) File is being processed : {3}".format(self._name, i+1, currentRangeFiles + 1, files[i]))
with open(files[i], 'r') as f:
content = f.read()
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
soup = BeautifulSoup(content, self._parser)
articlebody = soup.find_all("div", class_="articlebody")
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
if len(articlebody) > 0:
self._addOrUpdatePost(soup)
else:
@ -99,7 +118,7 @@ class WPimport:
try:
page = self._request.get("http://{0}/wp-json/wp/v2/search".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
@ -111,7 +130,7 @@ class WPimport:
try:
page = self._request.get(img_src)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for get featured media : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
name_img = img_src.replace("_q", "")
@ -120,7 +139,7 @@ class WPimport:
try:
page = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error search featured media : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
res = page.json()
@ -130,27 +149,27 @@ class WPimport:
try:
r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for post media featured : {1}".format(self._name, err))
exit(1)
if r.status_code == 200:
self._logger.info("Ajout media featured : {0}".format(r.json()["title"]["raw"]))
self._logger.info("{0} : Add media featured : {1}".format(self._name, r.json()["title"]["raw"]))
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
else:
self._logger.info("Aucun media trouvé pour {0}".format(h2))
self._logger.info("{0} : No media found for {1}".format(self._name, h2))
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Connection error with status code for search featured media: {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Connection error for get featured media with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Connection error with status code for featured media : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
## Association image to post
@ -161,13 +180,13 @@ class WPimport:
try:
r = self._request.post("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, i["id"]), auth=self._basic, data=data)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for link image to post : {1}".format(self._name, err))
exit(1)
if r.status_code == 200:
self._logger.info("Association d'une image à l'article {0}".format(title))
self._logger.info("{0} : Link image to post {1}".format(self._name, title))
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
self._logger.error("{0} Connection error with status code for link image to post : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
## Add or update img
@ -176,12 +195,14 @@ class WPimport:
media = {"id":"", "rendered":""}
split_fileimg = href_img.split("/")
img_name = split_fileimg[len(split_fileimg)-1]
self._logger.debug("{0} : Search for image {1} with URL {2}".format(self._name, img_name, "http://{0}/wp-json/wp/v2/media".format(self._wordpress)))
params = { "search": img_name}
try:
r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Search for image {1} and his status code {2}".format(self._name, img_name, r.status_code))
if r.status_code == 200:
res = r.json()
if len(res) > 0:
@ -189,13 +210,13 @@ class WPimport:
try:
r = self._request.delete("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"]), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} Connection error for delete image : {1}".format(self._name, err))
exit(1)
if r.status_code == 200:
self._logger.info("Image removed {0}".format(img_name))
self._logger.info("{0} : Image removed {1}".format(self._name, img_name))
else:
self._logger.error("Image not removed due status code : {0}".format(r.status_code))
self._logger.debug(r.content)
self._logger.error("{0} : Image not removed due status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
data = page.content
img_type = "image/png"
@ -205,20 +226,20 @@ class WPimport:
try:
r = self._request.post("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, headers=headers, data=data)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for add image : {1}".format(self._name, err))
exit(1)
if r.status_code == 201:
self._logger.info("Image added {0}".format(img_name))
self._logger.info("{0} : Image added {1}".format(self._name, img_name))
res = r.json()
media["id"] = res["id"]
media["rendered"] = res["guid"]["rendered"]
else:
self._logger.error("Image not added due status code : {0}".format(r.status_code))
self._logger.error("{0} : Image not added due status code : {1}".format(self._name, r.status_code))
self._logger.debug(r.content)
else:
self._logger.error("Connection error with status code : {0}".format(r.status_code))
self._logger.debug(r.content)
self._logger.error("{0} : Connection error for search image with status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))
return media
@ -231,7 +252,7 @@ class WPimport:
params = {"post": post, "author_name":i["author"], "date":i["date"]}
page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for search comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
@ -240,18 +261,18 @@ class WPimport:
params = {"force":1}
page = self._request.delete("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, j["id"]), params=params, auth=self._basic)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for delete comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
self._logger.info("Comment deleted for {0}".format(title))
self._logger.debug("Comment deleted : {0}".format(j))
self._logger.info("{0} : Comment deleted for {1}".format(self._name, title))
self._logger.debug("{0} : Comment deleted : {1}".format(self._name, j))
else:
self._logger.error("Comment not deleted for {0} due status code : {1}".format(title, page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Comment not deleted for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("Comment not listed for {0} due status code : {1}".format(title, page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Comment not listed for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
for i in comment:
data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"], "status": "approved"}
@ -262,27 +283,27 @@ class WPimport:
try:
page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for parent comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
if len(result) > 0:
data["parent"]=result[0]["id"]
else:
self._logger.error("Connection error for parent comment with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Connection error for parent comment with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
try:
page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for add comment : {1}".format(self._name, err))
exit(1)
if page.status_code == 201:
self._logger.info("Comment added for {0}".format(title))
self._logger.debug("Data : {0}".format(data))
self._logger.info("{0} : Comment added for {1}".format(self._name, title))
self._logger.debug("{0} : Data : {1}".format(self._name, data))
else:
self._logger.error("Comment not added for {0} due status code : {1}".format(title, page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Comment not added for {1} due status code : {2}".format(self._name, title, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
## Check class name
@ -346,6 +367,7 @@ class WPimport:
listelement[i] = []
articletitle = soup.find_all("h2", class_="articletitle")
self._logger.debug("{0} : Title of the article : {1}".format(self._name, articletitle))
articlebody = soup.find_all("div", class_="articlebody")
articledate = soup.find_all("span", class_="articledate")
articleacreator = soup.find_all("span", class_="articlecreator")
@ -353,10 +375,12 @@ class WPimport:
itemfooter = soup.find_all("div", class_="itemfooter")
comment = soup.find_all("li", class_="comment")
img_a = articlebody[0].find_all("a", {"target": "_blank"})
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
list_img = []
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
@ -365,16 +389,16 @@ class WPimport:
try:
page_img = self._request.get(href_img)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
if page_img.status_code == 404:
href_img = href_a
try:
page_img = self._request.get(href_a)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
@ -386,10 +410,10 @@ class WPimport:
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("Connection error with status code : {0}".format(page_img.status_code))
self._logger.debug(page_img.content)
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
comment_post = self._getComment(comment)
a = itemfooter[0].find_all("a", {"rel": True})
@ -408,7 +432,7 @@ class WPimport:
params = {"params":j}
page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for {1} : {2}".format(self._name, i, err))
exit(1)
if page.status_code == 200:
element_exist = True
@ -416,26 +440,26 @@ class WPimport:
listelement[i].append(result[0]["id"])
else:
self._logger.error("{0} not found due status code : {1}".format(i, page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : {1} not found due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
if element_exist is False:
data = {"name": j}
self._logger.debug("URL : {0} ".format("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i)))
self._logger.debug("data : {0}".format(data))
self._logger.debug("headers : {0}".format(self._headers_form))
self._logger.debug("{0} : URL : {1} ".format("http://{1}/wp-json/wp/v2/{2}".format(self._name, self._wordpress, i)))
self._logger.debug("{0} : data : {1}".format(self._name, data))
self._logger.debug("{0} : headers : {1}".format(self._name, self._headers_form))
try:
page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, headers=self._headers_json, data=data)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for post {1} : {2}".format(self._name, i, err))
exit(1)
if page.status_code == 201:
result = page.json()
listelement[i].append(result["id"])
else:
self._logger.error("{0} not added due status code : {1}".format(i, page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : {1} not added due status code : {2}".format(self._name, i, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
title = articletitle[0].text
author = articleacreator[0].text.lower()
@ -458,21 +482,21 @@ class WPimport:
try:
page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for get author : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
data["author"] = result[0]["id"]
else:
self._logger.error("Connection error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content))
params = {"search":title}
try:
page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params)
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err))
exit(1)
page_exist = True
headers = {'Content-Type': 'application/json', 'Accept':'application/json'}
@ -481,38 +505,38 @@ class WPimport:
if len(result) == 0:
page_exist = False
else:
self._logger.info("La page {0} existe deja et mis à jour".format(title))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
post_id = result[0]["id"]
try:
page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=headers, data=json.dumps(data))
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1)
if page.status_code == 200:
result = page.json()
self._logger.info("Article mis à jour : {0}".format(result["title"]["raw"]))
self._logger.info("{0} : Post updated : {1}".format(self._name, result["title"]["raw"]))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("Post not updated due status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.error("Connection for update post error with status code : {0}".format(page.status_code))
self._logger.debug(page.content)
self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
if page_exist == False:
try:
page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=headers, data=json.dumps(data))
except Exception as err:
self._logger.error("Connection error : {0}".format(err))
self._logger.error("{0} : Connection error for create post : {1}".format(self._name, err))
exit(1)
if page.status_code == 201:
result = page.json()
self._logger.info("Article ajoute : {0}".format(result["title"]["raw"]))
self._logger.info("{0} : Post added : {1}".format(self._name, result["title"]["raw"]))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("Post not added due status code : {0}".format(r.status_code))
self._logger.debug(r.content)
self._logger.error("{0} : Post not added due status code : {1}".format(self._name, r.status_code))
self._logger.debug("{0} : {1}".format(self._name, r.content))