import threading for directory WIP
This commit is contained in:
parent
a0b816fe18
commit
bf4c2480f8
@ -10,7 +10,6 @@ from lib.WPExport import WPExport
|
||||
|
||||
|
||||
def download(name_thread, max_thread, url, logger, parser, directory, html, img):
|
||||
#def download(args):
|
||||
|
||||
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory)
|
||||
|
||||
@ -21,6 +20,46 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img)
|
||||
if args.img is False:
|
||||
exportWp.downloadImg(webpage)
|
||||
|
||||
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial):
|
||||
canalblog = canalblog.split(",")
|
||||
wordpress = wordpress.split(",")
|
||||
name = "Thread-{0}".format(int(name_thread) + 1)
|
||||
|
||||
if serial is False:
|
||||
for canal in canalblog:
|
||||
try:
|
||||
o = urlparse(canal)
|
||||
o = o._replace(scheme="https")
|
||||
url = o.geturl().replace(":///", "://")
|
||||
except Exception as err:
|
||||
logger.error("{0} : parsing error : {1}".format(name, err))
|
||||
exit(1)
|
||||
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
|
||||
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
||||
for j in wordpress:
|
||||
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
|
||||
importWp.fromUrl(webpage)
|
||||
else:
|
||||
if len(canalblog) != len(wordpress):
|
||||
logger.error("{0} : ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress".format(name))
|
||||
exit(1)
|
||||
for i in range(0, len(canalblog)-1):
|
||||
try:
|
||||
o = urlparse(canalblog[i])
|
||||
o = o._replace(scheme="https")
|
||||
url = o.geturl().replace(":///", "://")
|
||||
except Exception as err:
|
||||
logger.error("parsing error : {0}".format(err))
|
||||
exit(1)
|
||||
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser)
|
||||
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
||||
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser)
|
||||
importWp.fromUrl(webpage)
|
||||
|
||||
|
||||
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic):
|
||||
name = "Thread-{0}".format(int(name_thread) + 1)
|
||||
importWp = WPimport(name=name, basic=basic, wordpress=wordpress, logger=logger, parser=parser)
|
||||
|
||||
|
||||
|
||||
@ -112,40 +151,14 @@ if __name__ == '__main__':
|
||||
importWp.fromDirectory(directory[i])
|
||||
exit(0)
|
||||
if len(args.canalblog) > 0:
|
||||
exportWp = WPExport("", logger, args.parser, args.directory)
|
||||
canalblog = args.canalblog.split(",")
|
||||
wordpress = args.wordpress.split(",")
|
||||
|
||||
if args.serial is False:
|
||||
for canal in canalblog:
|
||||
try:
|
||||
o = urlparse(canal)
|
||||
o = o._replace(scheme="https")
|
||||
url = o.geturl().replace(":///", "://")
|
||||
except Exception as err:
|
||||
logger.error("parsing error : {0}".format(err))
|
||||
exit(1)
|
||||
exportWp.setUrl(url)
|
||||
webpage = exportWp.getUrlPage()
|
||||
for j in wordpress:
|
||||
importWp.setUrl(j)
|
||||
importWp.fromUrl(webpage)
|
||||
else:
|
||||
if len(canalblog) != len(wordpress):
|
||||
logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress")
|
||||
exit(1)
|
||||
for i in range(0, len(canalblog)-1):
|
||||
try:
|
||||
o = urlparse(canalblog[i])
|
||||
o = o._replace(scheme="https")
|
||||
url = o.geturl().replace(":///", "://")
|
||||
except Exception as err:
|
||||
logger.error("parsing error : {0}".format(err))
|
||||
exit(1)
|
||||
exportWp.setUrl(url)
|
||||
webpage = exportWp.getUrlPage()
|
||||
importWp.setUrl(wordpress[i])
|
||||
importWp.fromUrl(webpage)
|
||||
try:
|
||||
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
|
||||
wait_for = [
|
||||
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial)
|
||||
for i in range(0, int(args.parallel))
|
||||
]
|
||||
except Exception as err:
|
||||
logger.error("Threading error : {0}".format(err))
|
||||
|
||||
|
||||
|
||||
|
@ -8,7 +8,8 @@ from requests.packages.urllib3.util.retry import Retry
|
||||
|
||||
class WPimport:
|
||||
# Constructor
|
||||
def __init__(self, basic, wordpress, logger, parser):
|
||||
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser"):
|
||||
self._name = name
|
||||
self._basic = basic
|
||||
self._wordpress = wordpress
|
||||
self._logger = logger
|
||||
@ -69,6 +70,21 @@ class WPimport:
|
||||
|
||||
# Private method
|
||||
|
||||
## From files in split by thread
|
||||
|
||||
def fromFile(self, files):
|
||||
for i in range(0, len(files)):
|
||||
if os.path.exists(file):
|
||||
self._logger.info("Fichier en cours de traitement : {0}".format(files[i]))
|
||||
with open(file, 'r') as f:
|
||||
content = f.read()
|
||||
soup = BeautifulSoup(content, self._parser)
|
||||
articlebody = soup.find_all("div", class_="articlebody")
|
||||
if len(articlebody) > 0:
|
||||
self._addOrUpdatePost(soup)
|
||||
else:
|
||||
self._addOrUpdateFeaturedMedia(soup)
|
||||
|
||||
## Get all files
|
||||
|
||||
def _getFiles(self, item):
|
||||
|
Loading…
x
Reference in New Issue
Block a user