add retry

This commit is contained in:
Valentin CZERYBA 2023-04-13 21:54:35 +02:00
parent f5e82fe4c4
commit 1311ef2ff2
2 changed files with 44 additions and 26 deletions

View File

@ -2,6 +2,8 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
import requests, os, argparse, logging import requests, os, argparse, logging
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPExport: class WPExport:
def __init__(self, url, logger, parser, directory): def __init__(self, url, logger, parser, directory):
@ -10,6 +12,13 @@ class WPExport:
self._parser = parser self._parser = parser
self._dir = directory self._dir = directory
self._request = requests.Session()
retries = Retry(total=5,
status_forcelist=[429, 500, 502, 503, 504])
self._request.mount('http://', HTTPAdapter(max_retries=retries))
# Public method # Public method
@ -48,7 +57,7 @@ class WPExport:
# Get URL # Get URL
def getUrlPage(self): def getUrlPage(self):
try: try:
page = requests.get(self._url) page = self._request.get(self._url)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
@ -64,7 +73,7 @@ class WPExport:
webpage = [] webpage = []
for i in page_url: for i in page_url:
try: try:
page = requests.get(i) page = self._request.get(i)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
@ -90,7 +99,7 @@ class WPExport:
self._logger.info(url_paging) self._logger.info(url_paging)
if url_paging not in webpage: if url_paging not in webpage:
webpage.append(url_paging) webpage.append(url_paging)
page = requests.get(url_paging) page = self._request.get(url_paging)
if page.status_code == 200: if page.status_code == 200:
soup = BeautifulSoup(page.text, self._parser) soup = BeautifulSoup(page.text, self._parser)
h2 = soup.find_all("h2") h2 = soup.find_all("h2")
@ -131,7 +140,7 @@ class WPExport:
# Get Css and JS # Get Css and JS
def _getScriptCss(self, js, css): def _getScriptCss(self, js, css):
try: try:
page = requests.get(self._url) page = self._request.get(self._url)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
@ -178,7 +187,7 @@ class WPExport:
page_img = [] page_img = []
for i in webpage: for i in webpage:
try: try:
page = requests.get(i) page = self._request.get(i)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)
@ -210,7 +219,7 @@ class WPExport:
dir_page_web = "/".join(path_web) dir_page_web = "/".join(path_web)
self._mkdirPath("{0}/{1}/{2}".format(backup_dir, o.netloc, dir_page_web)) self._mkdirPath("{0}/{1}/{2}".format(backup_dir, o.netloc, dir_page_web))
try: try:
r = requests.get(webpage[i]) r = self._request.get(webpage[i])
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("Connection error : {0}".format(err))
exit(1) exit(1)

View File

@ -3,6 +3,8 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
import requests, os, logging, re, json import requests, os, logging, re, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPimport: class WPimport:
# Constructor # Constructor
@ -12,6 +14,13 @@ class WPimport:
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
self._request = requests.Session()
retries = Retry(total=5,
status_forcelist=[429, 500, 502, 503, 504])
self._request.mount('http://', HTTPAdapter(max_retries=retries))
# Public method # Public method
def setUrl(self, wordpress): def setUrl(self, wordpress):
@ -19,7 +28,7 @@ class WPimport:
def fromUrl(self, webpage): def fromUrl(self, webpage):
for i in range(0, len(webpage)): for i in range(0, len(webpage)):
r = requests.get(webpage[i]) r = self._request.get(webpage[i])
if r.status_code == 200: if r.status_code == 200:
self._logger.info("({0}/{1} : Page en cours d'import : {2}".format(i+1, len(webpage), webpage[i])) self._logger.info("({0}/{1} : Page en cours d'import : {2}".format(i+1, len(webpage), webpage[i]))
soup = BeautifulSoup(r.content, self._parser) soup = BeautifulSoup(r.content, self._parser)
@ -79,7 +88,7 @@ class WPimport:
for i in item_div: for i in item_div:
h2 = i.find_all("h2")[0].text h2 = i.find_all("h2")[0].text
params = {"search":h2, "type":"post"} params = {"search":h2, "type":"post"}
page = requests.get("http://{0}/wp-json/wp/v2/search".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/search".format(self._wordpress), auth=self._basic, params=params)
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
if len(result) > 0: if len(result) > 0:
@ -87,19 +96,19 @@ class WPimport:
img = i.find_all("img") img = i.find_all("img")
if len(img) > 0: if len(img) > 0:
img_src = img[0].get("src") img_src = img[0].get("src")
page = requests.get(img_src) page = self._request.get(img_src)
if page.status_code == 200: if page.status_code == 200:
name_img = img_src.replace("_q", "") name_img = img_src.replace("_q", "")
name_img = name_img.split("/")[len(name_img.split("/"))-1] name_img = name_img.split("/")[len(name_img.split("/"))-1]
params = {"search": name_img} params = {"search": name_img}
page = requests.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
if page.status_code == 200: if page.status_code == 200:
res = page.json() res = page.json()
if len(res) > 0: if len(res) > 0:
id_media = res[0]["id"] id_media = res[0]["id"]
headers = {'Content-Type': 'application/json', 'Accept':'application/json'} headers = {'Content-Type': 'application/json', 'Accept':'application/json'}
data = {"featured_media": id_media} data = {"featured_media": id_media}
r = requests.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=headers, data=json.dumps(data)) r = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, result[0]["id"]), auth=self._basic, headers=headers, data=json.dumps(data))
if r.status_code == 200: if r.status_code == 200:
self._logger.info("Ajout media featured : {0}".format(r.json()["title"]["raw"])) self._logger.info("Ajout media featured : {0}".format(r.json()["title"]["raw"]))
else: else:
@ -110,7 +119,7 @@ class WPimport:
def _linkImgPost(self, title, list_img, post_id): def _linkImgPost(self, title, list_img, post_id):
for i in list_img: for i in list_img:
data = {"post": post_id} data = {"post": post_id}
r = requests.post("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, i["id"]), auth=self._basic, data=data) r = self._request.post("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, i["id"]), auth=self._basic, data=data)
if r.status_code == 200: if r.status_code == 200:
self._logger.info("Association d'une image à l'article {0}".format(title)) self._logger.info("Association d'une image à l'article {0}".format(title))
@ -121,12 +130,12 @@ class WPimport:
split_fileimg = href_img.split("/") split_fileimg = href_img.split("/")
img_name = split_fileimg[len(split_fileimg)-1] img_name = split_fileimg[len(split_fileimg)-1]
params = { "search": img_name} params = { "search": img_name}
r = requests.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params) r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
if r.status_code == 200: if r.status_code == 200:
res = r.json() res = r.json()
if len(res) > 0: if len(res) > 0:
params = {"force":1} params = {"force":1}
r = requests.delete("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"]), auth=self._basic, params=params) r = self._request.delete("http://{0}/wp-json/wp/v2/media/{1}".format(self._wordpress, res[0]["id"]), auth=self._basic, params=params)
if r.status_code == 200: if r.status_code == 200:
self._logger.info("Image supprimé {0}".format(img_name)) self._logger.info("Image supprimé {0}".format(img_name))
data = page.content data = page.content
@ -134,7 +143,7 @@ class WPimport:
if img_name.split(".")[1] == "jpg" or img_name.split(".")[1] == "jpeg": if img_name.split(".")[1] == "jpg" or img_name.split(".")[1] == "jpeg":
img_type = "image/jpg" img_type = "image/jpg"
headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)} headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)}
r = requests.post("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, headers=headers, data=data) r = self._request.post("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, headers=headers, data=data)
if r.status_code == 201: if r.status_code == 201:
self._logger.info("Ajout d'image {0}".format(img_name)) self._logger.info("Ajout d'image {0}".format(img_name))
res = r.json() res = r.json()
@ -147,7 +156,7 @@ class WPimport:
def _addOrUpdateComment(self, post, comment, title): def _addOrUpdateComment(self, post, comment, title):
params = {"post": post} params = {"post": post}
block = True block = True
page = requests.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, params=params)
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
for i in comment: for i in comment:
@ -158,11 +167,11 @@ class WPimport:
id_comment = j["id"] id_comment = j["id"]
data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"]} data = {"post": post, "content": i["content"], "date": i["date"], "author_name": i["author"]}
if comment_exist is True: if comment_exist is True:
page = page = requests.post("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, id_comment), auth=self._basic, data=data) page = page = self._request.post("http://{0}/wp-json/wp/v2/comments/{1}".format(self._wordpress, id_comment), auth=self._basic, data=data)
if page.status_code == 200: if page.status_code == 200:
self._logger.info("Commentaire mise à jour pour {0}".format(title)) self._logger.info("Commentaire mise à jour pour {0}".format(title))
else: else:
page = requests.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data) page = self._request.post("http://{0}/wp-json/wp/v2/comments".format(self._wordpress), auth=self._basic, data=data)
if page.status_code == 201: if page.status_code == 201:
self._logger.info("Commentaire ajoute pour {0}".format(title)) self._logger.info("Commentaire ajoute pour {0}".format(title))
@ -177,7 +186,7 @@ class WPimport:
listelement = {} listelement = {}
for i in liste: for i in liste:
page = requests.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress,i)) page = self._request.get("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress,i))
if page.status_code == 200: if page.status_code == 200:
elements[i] = page.json() elements[i] = page.json()
element[i] = [] element[i] = []
@ -200,10 +209,10 @@ class WPimport:
href_img = img[0].get("src") href_img = img[0].get("src")
new_img["old_src"]=href_img new_img["old_src"]=href_img
new_img["old_href"]=href_a new_img["old_href"]=href_a
page_img = requests.get(href_img) page_img = self._request.get(href_img)
if page_img.status_code == 404: if page_img.status_code == 404:
href_img = href_a href_img = href_a
page_img = requests.get(href_a) page_img = self._request.get(href_a)
if page_img.status_code == 200: if page_img.status_code == 200:
media=self._addOrUpdateMedia(href_img, page_img) media=self._addOrUpdateMedia(href_img, page_img)
new_img["id"]=media["id"] new_img["id"]=media["id"]
@ -245,7 +254,7 @@ class WPimport:
listelement[i].append(k["id"]) listelement[i].append(k["id"])
if element_exist is False: if element_exist is False:
data = {"name": j} data = {"name": j}
page = requests.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, data=data) page = self._request.post("http://{0}/wp-json/wp/v2/{1}".format(self._wordpress, i), auth=self._basic, data=data)
if page.status_code == 201: if page.status_code == 201:
result = page.json() result = page.json()
listelement[i].append(result["id"]) listelement[i].append(result["id"])
@ -268,13 +277,13 @@ class WPimport:
time = dateheader[0].text.split(" ") time = dateheader[0].text.split(" ")
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
params = {"search":author} params = {"search":author}
page = requests.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params)
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
data["author"] = result[0]["id"] data["author"] = result[0]["id"]
params = {"search":title} params = {"search":title}
page = requests.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params)
page_exist = True page_exist = True
headers = {'Content-Type': 'application/json', 'Accept':'application/json'} headers = {'Content-Type': 'application/json', 'Accept':'application/json'}
if page.status_code == 200: if page.status_code == 200:
@ -284,7 +293,7 @@ class WPimport:
else: else:
self._logger.info("La page {0} existe deja et mis à jour".format(title)) self._logger.info("La page {0} existe deja et mis à jour".format(title))
post_id = result[0]["id"] post_id = result[0]["id"]
page = requests.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=headers, data=json.dumps(data)) page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=headers, data=json.dumps(data))
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
self._logger.info("Article mis à jour : {0}".format(result["title"]["raw"])) self._logger.info("Article mis à jour : {0}".format(result["title"]["raw"]))
@ -292,7 +301,7 @@ class WPimport:
self._linkImgPost(result["title"]["raw"], list_img, result["id"]) self._linkImgPost(result["title"]["raw"], list_img, result["id"])
if page_exist == False: if page_exist == False:
page = requests.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=headers, data=json.dumps(data)) page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=headers, data=json.dumps(data))
if page.status_code == 201: if page.status_code == 201:
result = page.json() result = page.json()
self._logger.info("Article ajoute : {0}".format(result["title"]["raw"])) self._logger.info("Article ajoute : {0}".format(result["title"]["raw"]))