diff --git a/insert_wordpress.py b/insert_wordpress.py index 389cb7f..780844d 100644 --- a/insert_wordpress.py +++ b/insert_wordpress.py @@ -48,25 +48,37 @@ class WPimport: href_a = i.get("href") href_img = img[0].get("src") page_img = requests.get(href_img) + img_break = False if page_img.status_code == 404: href_img = href_a + img_break = True page = requests.get(href_img) if page.status_code == 200: + split_fileimg = href_img.split("/") img_name = split_fileimg[len(split_fileimg)-1] - data = page.content - img_type = "image/png" - if img_name.split(".")[1]: - img_type = "image/jpg" - headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)} - r = requests.post("http://{0}/wp-json/wp/v2/media".format(self.wordpress), auth=self.basic, headers=headers, data=data) - if r.status_code == 201: + params = { "search": img_name} + r = requests.get("http://{0}/wp-json/wp/v2/media".format(self.wordpress), auth=self.basic, params=params) + if r.status_code == 200: + res = r.json() + if len(res) == 0: + data = page.content + img_type = "image/png" + if img_name.split(".")[1]: + img_type = "image/jpg" + headers={ 'Content-Type': img_type,'Content-Disposition' : 'attachment; filename={0}'.format(img_name)} + r = requests.post("http://{0}/wp-json/wp/v2/media".format(self.wordpress), auth=self.basic, headers=headers, data=data) + if r.status_code == 201: + res = r.json() + new_img["old_src"]=href_img - new_img["id"]=r.json()["id"] - new_img["new_src"]=r.json()["guid"]["rendered"] + new_img["old_href"]=href_a + new_img["id"]=res["id"] + new_img["new_src"]=res["guid"]["rendered"] + new_img["break"]=img_break list_img.append(new_img) - print(list_img) - exit(0) + + comment_post = [] for i in comment: comment_item = i.text.split("\n") @@ -112,6 +124,16 @@ class WPimport: else: bodyhtml = bodyhtml + str(i).replace("

", "").replace("

", "").replace("
", "
") + "
" bodyhtml = bodyhtml + "

" + for i in list_img: + o = urlparse(i["new_src"]) + if i == True: + print(i["old_href"]) + bodyhtml = bodyhtml.replace(i["old_href"], o.path) + else: + print(i["old_src"]) + bodyhtml = bodyhtml.replace(i["old_src"], o.path) + print(bodyhtml) + exit(0) hour = articledate[0].text time = dateheader[0].text.split(" ")