Merge pull request 'add-parameter' (#17) from add-parameter into master

Reviewed-on: #17
This commit is contained in:
v4l3n71n 2023-06-05 20:58:51 +00:00
commit 3e75f05340
2 changed files with 93 additions and 85 deletions

View File

@ -47,7 +47,7 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img,
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog):
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial, ssl_wordpress, ssl_canalblog, create, update, image):
canalblog = canalblog.split(",")
wordpress = wordpress.split(",")
name = "Thread-{0}".format(int(name_thread) + 1)
@ -67,7 +67,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp
for j in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
for k in ["article", "page"]:
for l in ["publications", "principal"]:
importWp.fromUrl(webpage[l][k])
@ -88,7 +88,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser, ssl_canalblog=ssl_canalblog)
webpage = exportWp.getUrlPage(name_thread, max_thread)
del exportWp
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
for k in ["article", "page"]:
for l in ["publications", "principal"]:
@ -97,13 +97,13 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
del importWp
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress):
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial, ssl_wordpress, create, update, image):
name = "Thread-{0}".format(int(name_thread) + 1)
directory = directory.split(",")
wordpress = wordpress.split(",")
if serial is False:
for i in wordpress:
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
for j in directory:
importWp.fromDirectory(j, name_thread, max_thread)
del importWp
@ -113,7 +113,7 @@ def importDirectory(name_thread, max_thread, directory, logger, parser, wordpres
logger.error("{0} : Error : Number directory is differant than wordpress".format(name))
exit(1)
for i in range(0, len(wordpress)-1):
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress)
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser, ssl_wordpress=ssl_wordpress, no_create=create, no_update=update, no_image=image)
importWp.fromDirectory(directory[i])
del importWp
@ -143,6 +143,10 @@ if __name__ == '__main__':
import_parser.add_argument("--remove-categories", help="Remove all categories", dest="categories", action="store_true")
import_parser.add_argument("--remove-tags", help="Remove all tags", dest="tags", action="store_true")
import_parser.add_argument("--remove-media", help="Remove all media", dest="media", action="store_true")
import_parser.add_argument("--no-create", help="No create post", dest="create", default="store_false", action="store_true")
import_parser.add_argument("--no-update", help="No update post", dest="update", default="store_false", action="store_true")
import_parser.add_argument("--no-image", help="No image add or update", dest="image", default="store_false", action="store_true")
remove_parser = subparsers.add_parser("remove")
@ -226,7 +230,7 @@ if __name__ == '__main__':
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED)
wait_for = [
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress)
ex.submit(importDirectory, i, int(args.parallel), args.directory, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, args.create, args.update, args.image)
for i in range(0, int(args.parallel))
]
except Exception as err:
@ -237,7 +241,7 @@ if __name__ == '__main__':
wait_for = [ ex.submit(remove, i, args.parallel, args, basic, logger, ssl_wordpress) for i in range(0, int(args.parallel)) ]
wait(wait_for, return_when=ALL_COMPLETED)
wait_for = [
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog)
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial, ssl_wordpress, ssl_canalblog, args.create, args.update, args.image)
for i in range(0, int(args.parallel))
]

View File

@ -8,7 +8,7 @@ from requests.packages.urllib3.util.retry import Retry
class WPimport:
# Constructor
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True):
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser", ssl_wordpress=True, no_create=False, no_update=False, no_image=False):
self._name = name
self._basic = basic
self._wordpress = wordpress
@ -24,7 +24,9 @@ class WPimport:
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
self._no_create = no_create
self._no_update = no_update
self._no_image = no_image
# Destructor
def __del__(self):
@ -435,48 +437,49 @@ class WPimport:
img_a = articlebody[0].find_all("a", {"target": "_blank"})
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
list_img = []
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
page_img = self._request.get(href_img)
if self._no_image is False:
for i in img_a:
new_img = {}
img = i.find_all("img")
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
new_img["old_src"]=href_img
new_img["old_href"]=href_a
try:
page_img = self._request.get(href_img)
if page_img.status_code == 404:
href_img = href_a
try:
page_img = self._request.get(href_a)
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if href_img != href_a:
media=self._addOrUpdateMedia(href_a, page_img)
if page_img.status_code == 404:
href_img = href_a
try:
page_img = self._request.get(href_a)
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
if href_img != href_a:
media=self._addOrUpdateMedia(href_a, page_img)
new_img["id"]=media["id"]
new_img["new_src"]=media["rendered"]
list_img.append(new_img)
if page_img.status_code not in [200, 404]:
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for get image : {1}".format(self._name, err))
exit(1)
self._logger.debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
comment_post = self._getComment(comment)
@ -615,45 +618,46 @@ class WPimport:
self._logger.debug("{0} : Search title posts for |{2}| : |{1}|".format(self._name, title_rendered, title))
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered)))
if title_rendered == title:
page_is_exist = True
post_id = i["id"]
count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
if page.status_code == 200:
self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
if self._no_update is False:
page_is_exist = True
post_id = i["id"]
count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
try:
params = {"force":1}
page = self._request.delete("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, params=params)
if page.status_code == 200:
self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
try:
page = self._request.post("{2}://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 200:
result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title))
self._addOrUpdateComment(result["id"], comment_post, result["title"]["raw"])
self._linkImgPost(result["title"]["raw"], list_img, result["id"])
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
@ -667,7 +671,7 @@ class WPimport:
except Exception as err:
self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err))
if page_is_exist is False:
if page_is_exist is False and self._no_create is False:
try:
self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"]))
page = self._request.post("{1}://{0}/wp-json/wp/v2/posts".format(self._wordpress, self._protocol), auth=self._basic, headers=self._headers_json, data=json.dumps(data))