import threading for directory WIP
This commit is contained in:
parent
a0b816fe18
commit
bf4c2480f8
@ -10,7 +10,6 @@ from lib.WPExport import WPExport
|
|||||||
|
|
||||||
|
|
||||||
def download(name_thread, max_thread, url, logger, parser, directory, html, img):
|
def download(name_thread, max_thread, url, logger, parser, directory, html, img):
|
||||||
#def download(args):
|
|
||||||
|
|
||||||
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory)
|
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser, directory=directory)
|
||||||
|
|
||||||
@ -21,6 +20,46 @@ def download(name_thread, max_thread, url, logger, parser, directory, html, img)
|
|||||||
if args.img is False:
|
if args.img is False:
|
||||||
exportWp.downloadImg(webpage)
|
exportWp.downloadImg(webpage)
|
||||||
|
|
||||||
|
def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, basic, serial):
|
||||||
|
canalblog = canalblog.split(",")
|
||||||
|
wordpress = wordpress.split(",")
|
||||||
|
name = "Thread-{0}".format(int(name_thread) + 1)
|
||||||
|
|
||||||
|
if serial is False:
|
||||||
|
for canal in canalblog:
|
||||||
|
try:
|
||||||
|
o = urlparse(canal)
|
||||||
|
o = o._replace(scheme="https")
|
||||||
|
url = o.geturl().replace(":///", "://")
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("{0} : parsing error : {1}".format(name, err))
|
||||||
|
exit(1)
|
||||||
|
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
|
||||||
|
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
||||||
|
for j in wordpress:
|
||||||
|
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
|
||||||
|
importWp.fromUrl(webpage)
|
||||||
|
else:
|
||||||
|
if len(canalblog) != len(wordpress):
|
||||||
|
logger.error("{0} : ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress".format(name))
|
||||||
|
exit(1)
|
||||||
|
for i in range(0, len(canalblog)-1):
|
||||||
|
try:
|
||||||
|
o = urlparse(canalblog[i])
|
||||||
|
o = o._replace(scheme="https")
|
||||||
|
url = o.geturl().replace(":///", "://")
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("parsing error : {0}".format(err))
|
||||||
|
exit(1)
|
||||||
|
exportWp = WPExport(name=name, url=url, logger=logger, parser=parser)
|
||||||
|
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
||||||
|
importWp = WPimport(name=name, basic=basic, wordpress=wordpress[i], logger=logger, parser=parser)
|
||||||
|
importWp.fromUrl(webpage)
|
||||||
|
|
||||||
|
|
||||||
|
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic):
|
||||||
|
name = "Thread-{0}".format(int(name_thread) + 1)
|
||||||
|
importWp = WPimport(name=name, basic=basic, wordpress=wordpress, logger=logger, parser=parser)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -112,40 +151,14 @@ if __name__ == '__main__':
|
|||||||
importWp.fromDirectory(directory[i])
|
importWp.fromDirectory(directory[i])
|
||||||
exit(0)
|
exit(0)
|
||||||
if len(args.canalblog) > 0:
|
if len(args.canalblog) > 0:
|
||||||
exportWp = WPExport("", logger, args.parser, args.directory)
|
try:
|
||||||
canalblog = args.canalblog.split(",")
|
with futures.ThreadPoolExecutor(max_workers=int(args.parallel)) as ex:
|
||||||
wordpress = args.wordpress.split(",")
|
wait_for = [
|
||||||
|
ex.submit(importUrl, i, int(args.parallel), args.canalblog, logger, args.parser, args.wordpress, basic, args.serial)
|
||||||
if args.serial is False:
|
for i in range(0, int(args.parallel))
|
||||||
for canal in canalblog:
|
]
|
||||||
try:
|
except Exception as err:
|
||||||
o = urlparse(canal)
|
logger.error("Threading error : {0}".format(err))
|
||||||
o = o._replace(scheme="https")
|
|
||||||
url = o.geturl().replace(":///", "://")
|
|
||||||
except Exception as err:
|
|
||||||
logger.error("parsing error : {0}".format(err))
|
|
||||||
exit(1)
|
|
||||||
exportWp.setUrl(url)
|
|
||||||
webpage = exportWp.getUrlPage()
|
|
||||||
for j in wordpress:
|
|
||||||
importWp.setUrl(j)
|
|
||||||
importWp.fromUrl(webpage)
|
|
||||||
else:
|
|
||||||
if len(canalblog) != len(wordpress):
|
|
||||||
logger.error("ERREUR : Le nombre de dossier n'est pas equivalent au nombre d'URL wordpress")
|
|
||||||
exit(1)
|
|
||||||
for i in range(0, len(canalblog)-1):
|
|
||||||
try:
|
|
||||||
o = urlparse(canalblog[i])
|
|
||||||
o = o._replace(scheme="https")
|
|
||||||
url = o.geturl().replace(":///", "://")
|
|
||||||
except Exception as err:
|
|
||||||
logger.error("parsing error : {0}".format(err))
|
|
||||||
exit(1)
|
|
||||||
exportWp.setUrl(url)
|
|
||||||
webpage = exportWp.getUrlPage()
|
|
||||||
importWp.setUrl(wordpress[i])
|
|
||||||
importWp.fromUrl(webpage)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,7 +8,8 @@ from requests.packages.urllib3.util.retry import Retry
|
|||||||
|
|
||||||
class WPimport:
|
class WPimport:
|
||||||
# Constructor
|
# Constructor
|
||||||
def __init__(self, basic, wordpress, logger, parser):
|
def __init__(self, name="Thread-0", basic=None, wordpress="", logger=None, parser="html.parser"):
|
||||||
|
self._name = name
|
||||||
self._basic = basic
|
self._basic = basic
|
||||||
self._wordpress = wordpress
|
self._wordpress = wordpress
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
@ -69,6 +70,21 @@ class WPimport:
|
|||||||
|
|
||||||
# Private method
|
# Private method
|
||||||
|
|
||||||
|
## From files in split by thread
|
||||||
|
|
||||||
|
def fromFile(self, files):
|
||||||
|
for i in range(0, len(files)):
|
||||||
|
if os.path.exists(file):
|
||||||
|
self._logger.info("Fichier en cours de traitement : {0}".format(files[i]))
|
||||||
|
with open(file, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
soup = BeautifulSoup(content, self._parser)
|
||||||
|
articlebody = soup.find_all("div", class_="articlebody")
|
||||||
|
if len(articlebody) > 0:
|
||||||
|
self._addOrUpdatePost(soup)
|
||||||
|
else:
|
||||||
|
self._addOrUpdateFeaturedMedia(soup)
|
||||||
|
|
||||||
## Get all files
|
## Get all files
|
||||||
|
|
||||||
def _getFiles(self, item):
|
def _getFiles(self, item):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user