Compare commits
No commits in common. "939e744d1de5ddcda4b4967db0c4b1775b4da3c2" and "ab3720fbbc438440722aa2986a1a2c98fc417820" have entirely different histories.
939e744d1d
...
ab3720fbbc
@ -36,7 +36,6 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
|
||||
exit(1)
|
||||
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
|
||||
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
||||
|
||||
for j in wordpress:
|
||||
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
|
||||
importWp.fromUrl(webpage)
|
||||
@ -60,8 +59,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
|
||||
|
||||
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial):
|
||||
name = "Thread-{0}".format(int(name_thread) + 1)
|
||||
directory = directory.split(",")
|
||||
wordpress = wordpress.split(",")
|
||||
directory = args.directory.split(",")
|
||||
if serial is False:
|
||||
for i in wordpress:
|
||||
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser)
|
||||
|
@ -103,7 +103,7 @@ class WPExport:
|
||||
|
||||
setPageDivided = int(number_lastpage) / max_thread
|
||||
setPagePart = setPageDivided * (index_thread + 1)
|
||||
firstPagePart = (setPagePart - setPageDivided)
|
||||
firstPagePart = (setPagePart - setPageDivided) + 1
|
||||
|
||||
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
|
||||
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
|
||||
|
@ -36,7 +36,7 @@ class WPimport:
|
||||
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
|
||||
exit(1)
|
||||
if r.status_code == 200:
|
||||
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
|
||||
self._logger.info("{0} : ({1}/{2} : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
|
||||
soup = BeautifulSoup(r.content, self._parser)
|
||||
articlebody = soup.find_all("div", class_="articlebody")
|
||||
if len(articlebody) > 0:
|
||||
@ -72,7 +72,6 @@ class WPimport:
|
||||
self._logger.info("{0} : File is being processed : {1}".format(self._name, files[i]))
|
||||
with open(files[i], 'r') as f:
|
||||
content = f.read()
|
||||
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
|
||||
soup = BeautifulSoup(content, self._parser)
|
||||
articlebody = soup.find_all("div", class_="articlebody")
|
||||
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
|
||||
@ -190,14 +189,12 @@ class WPimport:
|
||||
media = {"id":"", "rendered":""}
|
||||
split_fileimg = href_img.split("/")
|
||||
img_name = split_fileimg[len(split_fileimg)-1]
|
||||
self._logger.debug("{0} : Search for image {1} with URL {2}".format(self._name, img_name, "http://{0}/wp-json/wp/v2/media".format(self._wordpress)))
|
||||
params = { "search": img_name}
|
||||
try:
|
||||
r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
|
||||
except Exception as err:
|
||||
self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err))
|
||||
exit(1)
|
||||
self._logger.debug("{0} : Search for image {1} and his status code {2}".format(self._name, img_name, r.status_code))
|
||||
if r.status_code == 200:
|
||||
res = r.json()
|
||||
if len(res) > 0:
|
||||
@ -362,7 +359,6 @@ class WPimport:
|
||||
listelement[i] = []
|
||||
|
||||
articletitle = soup.find_all("h2", class_="articletitle")
|
||||
self._logger.debug("{0} : Title of the article : {1}".format(self._name, articletitle))
|
||||
articlebody = soup.find_all("div", class_="articlebody")
|
||||
articledate = soup.find_all("span", class_="articledate")
|
||||
articleacreator = soup.find_all("span", class_="articlecreator")
|
||||
@ -370,12 +366,10 @@ class WPimport:
|
||||
itemfooter = soup.find_all("div", class_="itemfooter")
|
||||
comment = soup.find_all("li", class_="comment")
|
||||
img_a = articlebody[0].find_all("a", {"target": "_blank"})
|
||||
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
|
||||
list_img = []
|
||||
for i in img_a:
|
||||
new_img = {}
|
||||
img = i.find_all("img")
|
||||
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
|
||||
if len(img) > 0:
|
||||
href_a = i.get("href")
|
||||
href_img = img[0].get("src")
|
||||
@ -393,7 +387,7 @@ class WPimport:
|
||||
except Exception as err:
|
||||
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
|
||||
exit(1)
|
||||
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
|
||||
|
||||
if page_img.status_code == 200:
|
||||
media=self._addOrUpdateMedia(href_img, page_img)
|
||||
new_img["id"]=media["id"]
|
||||
@ -408,7 +402,7 @@ class WPimport:
|
||||
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
|
||||
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
|
||||
|
||||
self._debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
|
||||
|
||||
comment_post = self._getComment(comment)
|
||||
|
||||
a = itemfooter[0].find_all("a", {"rel": True})
|
||||
|
Loading…
x
Reference in New Issue
Block a user