remove doublon

This commit is contained in:
Valentin CZERYBA 2023-05-21 21:12:00 +02:00
parent 769b7f43fc
commit 75772ba7f0

View File

@ -5,6 +5,7 @@ from urllib.parse import urlparse
import requests, os, logging, re, json import requests, os, logging, re, json
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry
from slugify import slugify
class WPimport: class WPimport:
# Constructor # Constructor
@ -14,7 +15,7 @@ class WPimport:
self._wordpress = wordpress self._wordpress = wordpress
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'} self._headers_json = {'Content-Type': 'application/json; charset=utf-8', 'Accept':'application/json'}
self._request = requests.Session() self._request = requests.Session()
@ -487,7 +488,7 @@ class WPimport:
hour = articledate[0].text hour = articledate[0].text
time = dateheader[0].text.split(" ") time = dateheader[0].text.split(" ")
data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]} data = {"title":title, "content":bodyhtml, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
params = {"search":author} params = {"search":author, "per_page":100}
try: try:
self._logger.info("{0} : Search author : {1}".format(self._name, author)) self._logger.info("{0} : Search author : {1}".format(self._name, author))
page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/users".format(self._wordpress), auth=self._basic, params=params)
@ -503,33 +504,68 @@ class WPimport:
else: else:
self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection error with status code for get author : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(page.content)) self._logger.debug("{0} : {1}".format(page.content))
if title[len(title)-1] == " ":
title = title[:-1]
page_is_exist = False
params = {"search": title} for index in range(1,10):
params = {"search": title, "per_page":100, "page": index}
try: try:
self._logger.info("{0} : Search post : {1}".format(self._name, title)) self._logger.info("{0} : Search post : {1}".format(self._name, title))
page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params) page = self._request.get("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, params=params, headers=self._headers_json)
except ConnectionError as err: except ConnectionError as err:
self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for search post : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err: except Exception as err:
self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err)) self._logger.error("{0} : Exception error for search post : {1}".format(self._name, err))
page_is_exist = False
if page.status_code == 200: if page.status_code == 200:
self._logger.debug("{0} : Encoding : {1}".format(self._name, page.encoding))
page.encoding = "utf-8"
result = page.json() result = page.json()
if len(result) == 0:
break
self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result))) self._logger.info("{0} : Number result posts : {1}".format(self._name, len(result)))
count = 0
for i in result: for i in result:
self._logger.info("{0} : Search title posts for {2} : {1}".format(self._name, i["title"]["rendered"], title))
if i["title"]["rendered"] == title: self._logger.info("{0} : Search title posts for |{2}| : |{1}|".format(self._name, i["title"]["rendered"], title))
title_rendered = i["title"]["rendered"].replace('’', "'")
title_rendered = title_rendered.replace('–', '-')
title_rendered = title_rendered.replace('…', '...')
title_rendered = title_rendered.replace('« ', '"')
title_rendered = title_rendered.replace(' »', '"')
self._logger.debug("{0} : SIze of title : {1} - {2}".format(self._name, len(title), len(title_rendered)))
if title_rendered == title:
page_is_exist = True page_is_exist = True
post_id = i["id"] post_id = i["id"]
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, result[0])) count = count + 1
if count > 1:
self._logger.info("{0} : Page {1} is double and going to delete".format(self._name, title))
try:
params = {"force":1}
page = self._request.delete("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, params=params)
except ConnectionError as err:
self._logger.error("{0} : Connection error for deleted post : {1}".format(self._name, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for deleted post : {1}".format(self._name, err))
if page.status_code == 200:
self._logger.info("{0} : Post deleted : {1}".format(self._name, title))
else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
else:
self._logger.debug("{0} : Data for post to update : {1}".format(self._name, i))
self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title)) self._logger.info("{0} : Page {1} already exist and going to update".format(self._name, title))
try: try:
page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) page = self._request.post("http://{0}/wp-json/wp/v2/posts/{1}".format(self._wordpress, post_id), auth=self._basic, headers=self._headers_json, data=json.dumps(data))
except Exception as err: except ConnectionError as err:
self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err)) self._logger.error("{0} : Connection error for update post : {1}".format(self._name, err))
exit(1) exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for update post : {1}".format(self._name, err))
if page.status_code == 200: if page.status_code == 200:
result = page.json() result = page.json()
self._logger.info("{0} : Post updated : {1}".format(self._name, title)) self._logger.info("{0} : Post updated : {1}".format(self._name, title))
@ -538,11 +574,15 @@ class WPimport:
else: else:
self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Post not updated due status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
if page.status_code == 400:
self._logger.error("{0} : Connection for update post unauthorized : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content))
break
else: else:
self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code)) self._logger.error("{0} : Connection for update post error with status code : {1}".format(self._name, page.status_code))
self._logger.debug("{0} : {1}".format(self._name, page.content)) self._logger.debug("{0} : {1}".format(self._name, page.content))
if page_is_exist == False: if page_is_exist is False:
try: try:
self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"])) self._logger.info("{0} : Creating posts : {1}".format(self._name, data["title"]))
page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data)) page = self._request.post("http://{0}/wp-json/wp/v2/posts".format(self._wordpress), auth=self._basic, headers=self._headers_json, data=json.dumps(data))