Compare commits

..

2 Commits

Author SHA1 Message Date
c92f24e6af check image exist in media 2023-03-30 23:50:25 +02:00
301f1e2d4b add img successful in media 2023-03-30 23:29:29 +02:00

View File

@ -39,9 +39,46 @@ class WPimport:
dateheader = soup.find_all("div", class_="dateheader") dateheader = soup.find_all("div", class_="dateheader")
itemfooter = soup.find_all("div", class_="itemfooter") itemfooter = soup.find_all("div", class_="itemfooter")
comment = soup.find_all("div", class_="comment_item") comment = soup.find_all("div", class_="comment_item")
img = articlebody[0].find_all("img") img_a = articlebody[0].find_all("a", {"target": "_blank"})
print(img) list_img = []
exit(0) for i in img_a:
new_img = {}
img = i.find_all("img")
if len(img) > 0:
href_a = i.get("href")
href_img = img[0].get("src")
page_img = requests.get(href_img)
img_break = False
if page_img.status_code == 404:
href_img = href_a
img_break = True
page = requests.get(href_img)
if page.status_code == 200:
split_fileimg = href_img.split("/")
img_name = split_fileimg[len(split_fileimg)-1]
params = { "search": img_name}
r = requests.get("http://{0}/wp-json/wp/v2/media".format(self.wordpress), auth=self.basic, params=params)
if r.status_code == 200:
res = r.json()
if len(res) == 0:
data = page.content
img_type = "image/png"
if img_name.split(".")[1]:
img_type = "image/jpg"
headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)}
r = requests.post("http://{0}/wp-json/wp/v2/media".format(self.wordpress), auth=self.basic, headers=headers, data=data)
if r.status_code == 201:
res = r.json()
new_img["old_src"]=href_img
new_img["old_href"]=href_a
new_img["id"]=res["id"]
new_img["new_src"]=res["guid"]["rendered"]
new_img["break"]=img_break
list_img.append(new_img)
comment_post = [] comment_post = []
for i in comment: for i in comment:
comment_item = i.text.split("\n") comment_item = i.text.split("\n")
@ -87,6 +124,16 @@ class WPimport:
else: else:
bodyhtml = bodyhtml + str(i).replace("<p>", "").replace("</p>", "").replace("<br>", "<br />") + "<br />" bodyhtml = bodyhtml + str(i).replace("<p>", "").replace("</p>", "").replace("<br>", "<br />") + "<br />"
bodyhtml = bodyhtml + "</p>" bodyhtml = bodyhtml + "</p>"
for i in list_img:
o = urlparse(i["new_src"])
if i == True:
print(i["old_href"])
bodyhtml = bodyhtml.replace(i["old_href"], o.path)
else:
print(i["old_src"])
bodyhtml = bodyhtml.replace(i["old_src"], o.path)
print(bodyhtml)
exit(0)
hour = articledate[0].text hour = articledate[0].text
time = dateheader[0].text.split(" ") time = dateheader[0].text.split(" ")