distinct tags and categories

This commit is contained in:
Valentin CZERYBA 2023-03-27 23:51:51 +02:00
parent 3622e37942
commit 0c41dc3e65

View File

@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
from requests.auth import HTTPBasicAuth from requests.auth import HTTPBasicAuth
from getpass import getpass from getpass import getpass
import requests, os, argparse, logging import requests, os, argparse, logging, re
if __name__ == '__main__': if __name__ == '__main__':
@ -19,13 +19,19 @@ if __name__ == '__main__':
exit(1) exit(1)
basic = HTTPBasicAuth(args.user, password) basic = HTTPBasicAuth(args.user, password)
page = requests.get("http://localhost:8080/wp-json/wp/v2/tags") liste = ["categories", "tags"]
elements = {}
if page.status_code == 200: element = {}
tags = page.json() listelement = {}
print(tags)
for i in liste:
page = requests.get("http://localhost:8080/wp-json/wp/v2/{0}".format(i))
if page.status_code == 200:
elements[i] = page.json()
element[i] = []
listelement[i] = []
with open(args.file, 'r') as f: with open(args.file, 'r') as f:
contents = f.read() contents = f.read()
@ -36,34 +42,36 @@ if __name__ == '__main__':
dateheader = soup.find_all("div", class_="dateheader") dateheader = soup.find_all("div", class_="dateheader")
itemfooter = soup.find_all("div", class_="itemfooter") itemfooter = soup.find_all("div", class_="itemfooter")
a = itemfooter[0].find_all("a", {"rel": True}) a = itemfooter[0].find_all("a", {"rel": True})
tag = []
for i in a: for i in a:
rel = i.get("rel") rel = i.get("rel")
if rel[0] == 'tag': if rel[0] == 'tag':
tag.append(i.text) href = i.get("href")
listtag = [] if re.search(r'/tag/', href):
for i in tag: element["tags"].append(i.text)
tag_exist = False if re.search(r'/archives/', href):
for j in tags: element["categories"].append(i.text)
if j["name"] == i: for i in liste:
tag_exist = True for j in element[i]:
listtag.append(j["id"]) element_exist = False
if tag_exist is False: for k in elements[i]:
data = {"name": i} if k["name"] == j:
page = requests.post("http://localhost:8080/wp-json/wp/v2/tags", auth=basic, data=data) element_exist = True
if page.status_code == 201: array = listelement[i].append(k["id"])
result = page.json() if element_exist is False:
listtag.append(result["id"]) data = {"name": j}
page = requests.post("http://localhost:8080/wp-json/wp/v2/{0}".format(i), auth=basic, data=data)
if page.status_code == 201:
result = page.json()
listelement[i].append(result["id"])
title = articletitle[0].text title = articletitle[0].text
body = articlebody[0] body = articlebody[0]
hour = articledate[0].text hour = articledate[0].text
time = dateheader[0].text.split(" ") time = dateheader[0].text.split(" ")
data = {"title":title, "content":body, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listtag} data = {"title":title, "content":body, "status":"publish", "date": "{0}-{1}-{2}T{3}:00".format(time[2],month[time[1]],time[0], hour), "tags": listelement["tags"], "categories": listelement["categories"]}
print(data)
exit(0)
page = requests.post("http://localhost:8080/wp-json/wp/v2/posts", auth=basic, data=data) page = requests.post("http://localhost:8080/wp-json/wp/v2/posts", auth=basic, data=data)
print(page.status_code) print(page.status_code)