web_scrap/lib/WPRemove.py

120 lines
6.0 KiB
Python

#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os, logging, re, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class WPRemove:
# Constructor
def __init__(self, index_name=1, number_thread=1, basic=None, wordpress="", logger=None, ssl_wordpress=True):
self._basic = basic
self._wordpress = wordpress
self._logger = logger
self._headers_json = {'Content-Type': 'application/json', 'Accept':'application/json'}
self._name = "Thread-{0}".format(index_name)
self._index_thread = index_name
self._protocol = "https"
self._number_thread = number_thread
if ssl_wordpress is False:
self._protocol = "http"
self._request = requests.Session()
retries = Retry(connect=10, read=10, redirect=5,
status_forcelist=[429, 500, 502, 503, 504], backoff_factor=2)
self._request.mount('{0}://'.format(self._protocol), HTTPAdapter(max_retries=retries))
# Destructor
def __del__(self):
print("{0} : Import finished for {1}".format(self._name, self._wordpress))
# Public method
def _getCount(self, composant):
count = 0
try:
params = {"per_page":1}
self._logger.info("{0} : Get count {2} to remove for url : {1}".format(self._name, self._wordpress, composant))
r = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, composant, self._protocol), params=params, auth=self._basic, headers=self._headers_json)
if r.status_code == 200:
count = int(r.headers["X-WP-Total"])
else:
self._logger.error("{0} : Error for list to remove {1} due status code {2}".format(self._name, composant, r.status_code))
self._logger.debug("{0} : Content error for {1} : {2}".format(self._name, composant, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for list {1} to remove : {2}".format(self._name, composant, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for list {1} to remove : {2}".format(self._name, composant, err))
return count
def setUrl(self, wordpress):
self._wordpress = wordpress
def cleanPosts(self):
self._removeAll("posts")
def cleanTags(self):
self._removeAll("tags")
def cleanCategories(self):
self._removeAll("categories")
def cleanMedia(self):
self._removeAll("media")
# Private method
def _removeAll(self, composant):
count = self._getCount(composant)
self._logger.debug("{0} : Count for {1} : {2}".format(self._name, composant, count))
if count > 0:
self._logger.debug("{0} : Number thread for {1} : {2}".format(self._name, composant, self._number_thread))
page = count / int(self._number_thread)
self._logger.debug("{0} : Page for {1} : {2}".format(self._name, composant, page))
if page > int(page):
page = int(page) + 1
if page > 100:
page = 100
params = {"per_page":page, "page":self._index_thread}
self._logger.info("{0} : Params for {1} : {2}".format(self._name, composant, params))
try:
self._logger.info("{0} : List {2} to remove for url : {1}".format(self._name, self._wordpress, composant))
r = self._request.get("{2}://{0}/wp-json/wp/v2/{1}".format(self._wordpress, composant, self._protocol), auth=self._basic, params=params, headers=self._headers_json)
if r.status_code == 200:
result = r.json()
if len(result) > 0:
for i in result:
self._logger.info("{0} : Remove {2} for url {1} : {3}".format(self._name, self._wordpress, composant, i["title"]["rendered"]))
params = {"force":1}
try:
r = self._request.delete("{3}://{0}/wp-json/wp/v2/{1}/{2}".format(self._wordpress, composant, i["id"], self._protocol), auth=self._basic, headers=self._headers_json , params=params)
if r.status_code == 200:
self._logger.info("{0} : Post removed for URL {1} {2} : {3}".format(self._name, self._wordpress, composant, i["title"]["rendered"]))
else:
self._logger.error("{0} : Connection error for post {1} {2} {3} with status code {4}".format(self._name, self._wordpress, composant, i["title"]["rendered"], r.status_code))
except ConnectionError as err:
self._logger.error("{0} : Connection error for {1} remove : {2}".format(self._name, composant, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for {1} remove : {2}".format(self._name, composant, err))
self._removeAll(composant)
if r.status_code == 400:
self._logger.error("{0} : No content for {1} to remove : {2}".format(self._name, composant, r.status_code))
else:
self._logger.error("{0} : Error for list to remove {1} due status code {2}".format(self._name, composant, r.status_code))
self._logger.debug("{0} : Content error for {1} : {2}".format(self._name, composant, r.content))
except ConnectionError as err:
self._logger.error("{0} : Connection error for list {1} to remove : {2}".format(self._name, composant, err))
exit(1)
except Exception as err:
self._logger.error("{0} : Exception error for list {1} to remove : {2}".format(self._name, composant, err))