diff --git a/insert_wordpress.py b/insert_wordpress.py index 216b483..389cb7f 100644 --- a/insert_wordpress.py +++ b/insert_wordpress.py @@ -39,8 +39,33 @@ class WPimport: dateheader = soup.find_all("div", class_="dateheader") itemfooter = soup.find_all("div", class_="itemfooter") comment = soup.find_all("div", class_="comment_item") - img = articlebody[0].find_all("img") - print(img) + img_a = articlebody[0].find_all("a", {"target": "_blank"}) + list_img = [] + for i in img_a: + new_img = {} + img = i.find_all("img") + if len(img) > 0: + href_a = i.get("href") + href_img = img[0].get("src") + page_img = requests.get(href_img) + if page_img.status_code == 404: + href_img = href_a + page = requests.get(href_img) + if page.status_code == 200: + split_fileimg = href_img.split("/") + img_name = split_fileimg[len(split_fileimg)-1] + data = page.content + img_type = "image/png" + if img_name.split(".")[1]: + img_type = "image/jpg" + headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)} + r = requests.post("http://{0}/wp-json/wp/v2/media".format(self.wordpress), auth=self.basic, headers=headers, data=data) + if r.status_code == 201: + new_img["old_src"]=href_img + new_img["id"]=r.json()["id"] + new_img["new_src"]=r.json()["guid"]["rendered"] + list_img.append(new_img) + print(list_img) exit(0) comment_post = [] for i in comment: