Compare commits
No commits in common. "939e744d1de5ddcda4b4967db0c4b1775b4da3c2" and "ab3720fbbc438440722aa2986a1a2c98fc417820" have entirely different histories.
939e744d1d
...
ab3720fbbc
@ -36,7 +36,6 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
|
|||||||
exit(1)
|
exit(1)
|
||||||
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
|
exportWp = WPExport(name="Thread-{0}".format(int(name_thread) + 1), url=url, logger=logger, parser=parser)
|
||||||
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
webpage = exportWp.getUrlPage(name_thread, max_thread)
|
||||||
|
|
||||||
for j in wordpress:
|
for j in wordpress:
|
||||||
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
|
importWp = WPimport(name=name, basic=basic, wordpress=j, logger=logger, parser=parser)
|
||||||
importWp.fromUrl(webpage)
|
importWp.fromUrl(webpage)
|
||||||
@ -60,8 +59,7 @@ def importUrl(name_thread, max_thread, canalblog, logger, parser, wordpress, bas
|
|||||||
|
|
||||||
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial):
|
def importDirectory(name_thread, max_thread, directory, logger, parser, wordpress, basic, serial):
|
||||||
name = "Thread-{0}".format(int(name_thread) + 1)
|
name = "Thread-{0}".format(int(name_thread) + 1)
|
||||||
directory = directory.split(",")
|
directory = args.directory.split(",")
|
||||||
wordpress = wordpress.split(",")
|
|
||||||
if serial is False:
|
if serial is False:
|
||||||
for i in wordpress:
|
for i in wordpress:
|
||||||
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser)
|
importWp = WPimport(name=name, basic=basic, wordpress=i, logger=logger, parser=parser)
|
||||||
|
@ -103,7 +103,7 @@ class WPExport:
|
|||||||
|
|
||||||
setPageDivided = int(number_lastpage) / max_thread
|
setPageDivided = int(number_lastpage) / max_thread
|
||||||
setPagePart = setPageDivided * (index_thread + 1)
|
setPagePart = setPageDivided * (index_thread + 1)
|
||||||
firstPagePart = (setPagePart - setPageDivided)
|
firstPagePart = (setPagePart - setPageDivided) + 1
|
||||||
|
|
||||||
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
|
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
|
||||||
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
|
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
|
||||||
|
@ -36,7 +36,7 @@ class WPimport:
|
|||||||
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
|
self._logger.error("{0} : Connection error for get url {1} : {2}".format(self._name, webpage[i], err))
|
||||||
exit(1)
|
exit(1)
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
self._logger.info("{0} : ({1}/{2}) : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
|
self._logger.info("{0} : ({1}/{2} : Page is importing : {3}".format(self._name, i+1, len(webpage), webpage[i]))
|
||||||
soup = BeautifulSoup(r.content, self._parser)
|
soup = BeautifulSoup(r.content, self._parser)
|
||||||
articlebody = soup.find_all("div", class_="articlebody")
|
articlebody = soup.find_all("div", class_="articlebody")
|
||||||
if len(articlebody) > 0:
|
if len(articlebody) > 0:
|
||||||
@ -72,7 +72,6 @@ class WPimport:
|
|||||||
self._logger.info("{0} : File is being processed : {1}".format(self._name, files[i]))
|
self._logger.info("{0} : File is being processed : {1}".format(self._name, files[i]))
|
||||||
with open(files[i], 'r') as f:
|
with open(files[i], 'r') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
self._logger.debug("{0} : Size of article : {1}".format(self._name, len(content)))
|
|
||||||
soup = BeautifulSoup(content, self._parser)
|
soup = BeautifulSoup(content, self._parser)
|
||||||
articlebody = soup.find_all("div", class_="articlebody")
|
articlebody = soup.find_all("div", class_="articlebody")
|
||||||
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
|
self._logger.debug("{0} : Number of article : {1}".format(self._name, len(articlebody)))
|
||||||
@ -190,14 +189,12 @@ class WPimport:
|
|||||||
media = {"id":"", "rendered":""}
|
media = {"id":"", "rendered":""}
|
||||||
split_fileimg = href_img.split("/")
|
split_fileimg = href_img.split("/")
|
||||||
img_name = split_fileimg[len(split_fileimg)-1]
|
img_name = split_fileimg[len(split_fileimg)-1]
|
||||||
self._logger.debug("{0} : Search for image {1} with URL {2}".format(self._name, img_name, "http://{0}/wp-json/wp/v2/media".format(self._wordpress)))
|
|
||||||
params = { "search": img_name}
|
params = { "search": img_name}
|
||||||
try:
|
try:
|
||||||
r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
|
r = self._request.get("http://{0}/wp-json/wp/v2/media".format(self._wordpress), auth=self._basic, params=params)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err))
|
self._logger.error("{0} : Connection error for search media : {1}".format(self._name, err))
|
||||||
exit(1)
|
exit(1)
|
||||||
self._logger.debug("{0} : Search for image {1} and his status code {2}".format(self._name, img_name, r.status_code))
|
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
res = r.json()
|
res = r.json()
|
||||||
if len(res) > 0:
|
if len(res) > 0:
|
||||||
@ -362,7 +359,6 @@ class WPimport:
|
|||||||
listelement[i] = []
|
listelement[i] = []
|
||||||
|
|
||||||
articletitle = soup.find_all("h2", class_="articletitle")
|
articletitle = soup.find_all("h2", class_="articletitle")
|
||||||
self._logger.debug("{0} : Title of the article : {1}".format(self._name, articletitle))
|
|
||||||
articlebody = soup.find_all("div", class_="articlebody")
|
articlebody = soup.find_all("div", class_="articlebody")
|
||||||
articledate = soup.find_all("span", class_="articledate")
|
articledate = soup.find_all("span", class_="articledate")
|
||||||
articleacreator = soup.find_all("span", class_="articlecreator")
|
articleacreator = soup.find_all("span", class_="articlecreator")
|
||||||
@ -370,12 +366,10 @@ class WPimport:
|
|||||||
itemfooter = soup.find_all("div", class_="itemfooter")
|
itemfooter = soup.find_all("div", class_="itemfooter")
|
||||||
comment = soup.find_all("li", class_="comment")
|
comment = soup.find_all("li", class_="comment")
|
||||||
img_a = articlebody[0].find_all("a", {"target": "_blank"})
|
img_a = articlebody[0].find_all("a", {"target": "_blank"})
|
||||||
self._logger.debug("{0} : Number of image's link : {1}".format(self._name, len(img_a)))
|
|
||||||
list_img = []
|
list_img = []
|
||||||
for i in img_a:
|
for i in img_a:
|
||||||
new_img = {}
|
new_img = {}
|
||||||
img = i.find_all("img")
|
img = i.find_all("img")
|
||||||
self._logger.debug("{0} : Number of image's tag : {1}".format(self._name, len(img)))
|
|
||||||
if len(img) > 0:
|
if len(img) > 0:
|
||||||
href_a = i.get("href")
|
href_a = i.get("href")
|
||||||
href_img = img[0].get("src")
|
href_img = img[0].get("src")
|
||||||
@ -393,7 +387,7 @@ class WPimport:
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
|
self._logger.error("{0} : Connection error for get image : {1}".format(self._name, err))
|
||||||
exit(1)
|
exit(1)
|
||||||
self._logger.debug("{0} : Status code for image {1} : {2}".format(self._name, href_img, page_img.status_code))
|
|
||||||
if page_img.status_code == 200:
|
if page_img.status_code == 200:
|
||||||
media=self._addOrUpdateMedia(href_img, page_img)
|
media=self._addOrUpdateMedia(href_img, page_img)
|
||||||
new_img["id"]=media["id"]
|
new_img["id"]=media["id"]
|
||||||
@ -408,7 +402,7 @@ class WPimport:
|
|||||||
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
|
self._logger.error("{0} : Connection error with status code for get image : {1}".format(self._name, page_img.status_code))
|
||||||
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
|
self._logger.debug("{0} : {1}".format(self._name, page_img.content))
|
||||||
|
|
||||||
self._debug("{0} : Number of image : {1}".format(self._name, len(list_img)))
|
|
||||||
comment_post = self._getComment(comment)
|
comment_post = self._getComment(comment)
|
||||||
|
|
||||||
a = itemfooter[0].find_all("a", {"rel": True})
|
a = itemfooter[0].find_all("a", {"rel": True})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user