Add parallelism

This commit is contained in:
Valentin CZERYBA 2023-04-24 23:15:29 +02:00
parent a39e2200bd
commit 88f258ffba
12 changed files with 172 additions and 18 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
@font-face{font-family:icomoon;src:url(fonts/icomoon.eot?ixzlj0);src:url(fonts/icomoon.eot?ixzlj0#iefix) format('embedded-opentype'),url(fonts/icomoon.ttf?ixzlj0) format('truetype'),url(fonts/icomoon.woff?ixzlj0) format('woff'),url(fonts/icomoon.svg?ixzlj0#icomoon) format('svg');font-weight:400;font-style:normal;font-display:block}[class^=icon-],[class*=" icon-"]{font-family:icomoon!important;speak:never;font-style:normal;font-weight:400;font-variant:normal;text-transform:none;line-height:1;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.icon-pinterest:before{content:"\ead1"}.icon-home:before{content:"\e900"}.icon-home1:before{content:"\e901"}.icon-newspaper:before{content:"\e904"}.icon-pencil:before{content:"\e905"}.icon-image:before{content:"\e90d"}.icon-images:before{content:"\e90e"}.icon-camera:before{content:"\e90f"}.icon-play:before{content:"\e912"}.icon-film:before{content:"\e913"}.icon-book:before{content:"\e91f"}.icon-file-text:before{content:"\e922"}.icon-files-empty:before{content:"\e925"}.icon-folder-open:before{content:"\e930"}.icon-price-tag:before{content:"\e935"}.icon-price-tag1:before{content:"\e936"}.icon-phone:before{content:"\e942"}.icon-envelop:before{content:"\e945"}.icon-pushpin:before{content:"\e946"}.icon-location:before{content:"\e947"}.icon-pushpin1:before{content:"\e948"}.icon-compass:before{content:"\e949"}.icon-clock:before{content:"\e94e"}.icon-printer:before{content:"\e954"}.icon-display:before{content:"\e956"}.icon-mobile:before{content:"\e958"}.icon-bubble:before{content:"\e96b"}.icon-bubble2:before{content:"\e96e"}.icon-user:before{content:"\e971"}.icon-users:before{content:"\e972"}.icon-users1:before{content:"\e973"}.icon-spinner:before{content:"\e97a"}.icon-search:before{content:"\e986"}.icon-lock:before{content:"\e98f"}.icon-wrench:before{content:"\e991"}.icon-equalizer:before{content:"\e992"}.icon-cog:before{content:"\e994"}.icon-cogs:before{content:"\e995"}.icon-stats-dots:before{content:"\e99b"}.icon-trophy:before{content:"\e99e"}.icon-gift:before{content:"\e99f"}.icon-fire:before{content:"\e9a9"}.icon-bin:before{content:"\e9ac"}.icon-switch:before{content:"\e9b6"}.icon-list:before{content:"\e9ba"}.icon-earth:before{content:"\e9ca"}.icon-link:before{content:"\e9cb"}.icon-attachment:before{content:"\e9cd"}.icon-eye:before{content:"\e9ce"}.icon-eye1:before{content:"\e9cf"}.icon-sun:before{content:"\e9d4"}.icon-star-full:before{content:"\e9d9"}.icon-heart:before{content:"\e9da"}.icon-heart1:before{content:"\e9db"}.icon-smile2:before{content:"\e9e2"}.icon-sad2:before{content:"\e9e6"}.icon-warning:before{content:"\ea07"}.icon-question:before{content:"\ea09"}.icon-cross:before{content:"\ea0f"}.icon-checkmark:before{content:"\ea10"}.icon-exit:before{content:"\ea14"}.icon-play3:before{content:"\ea1c"}.icon-circle-up:before{content:"\ea41"}.icon-circle-right:before{content:"\ea42"}.icon-circle-down:before{content:"\ea43"}.icon-circle-left:before{content:"\ea44"}.icon-share:before{content:"\ea7d"}.icon-share2:before{content:"\ea82"}.icon-google:before{content:"\ea88"}.icon-facebook:before{content:"\ea90"}.icon-instagram:before{content:"\ea92"}.icon-whatsapp:before{content:"\ea93"}.icon-telegram:before{content:"\ea95"}.icon-twitter:before{content:"\ea96"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,22 @@
/*1682370513,,JIT Construction: v1007368760,fr_FR*/
/**
* Copyright (c) 2017-present, Facebook, Inc. All rights reserved.
*
* You are hereby granted a non-exclusive, worldwide, royalty-free license to use,
* copy, modify, and distribute this software in source code or binary form for use
* in connection with the web services and APIs provided by Facebook.
*
* As with any software that integrates with the Facebook platform, your use of
* this software is subject to the Facebook Platform Policy
* [http://developers.facebook.com/policy/]. This copyright notice shall be
* included in all copies or substantial portions of the software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
(function _(a,b,c,d,e){var f=window.console;f&&Math.floor(new Date().getTime()/1e3)-b>7*24*60*60&&f.warn("The Facebook JSSDK is more than 7 days old.");if(window[c])return;if(!window.JSON)return;var g=window[c]={__buffer:{replay:function(){var a=this,b=function(d){var b=window[c];a.calls[d][0].split(".").forEach(function(a){return b=b[a]});b.apply(null,a.calls[d][1])};for(var d=0;d<this.calls.length;d++)b(d);this.calls=[]},calls:[],opts:null},getUserID:function(){return""},getAuthResponse:function(){return null},getAccessToken:function(){return null},init:function(a){g.__buffer.opts=a}};for(b=0;b<d.length;b++){f=d[b];if(f in g)continue;var h=f.split("."),i=h.pop(),j=g;for(var k=0;k<h.length;k++)j=j[h[k]]||(j[h[k]]={});j[i]=function(a){if(a==="init")return;return function(){g.__buffer.calls.push([a,Array.prototype.slice.call(arguments)])}}(f)}k=document.createElement("script");k.src=a;k.async=!0;e&&(k.crossOrigin="anonymous");h=document.getElementsByTagName("script")[0];h.parentNode&&h.parentNode.insertBefore(k,h)})("https:\/\/connect.facebook.net\/fr_FR\/sdk.js?hash=370ca02dcc435704d55ca884c8590ed8", 1682370513, "FB", ["AppEvents.EventNames","AppEvents.ParameterNames","AppEvents.activateApp","AppEvents.clearAppVersion","AppEvents.clearUserID","AppEvents.getAppVersion","AppEvents.getUserID","AppEvents.logEvent","AppEvents.logPageView","AppEvents.logPurchase","AppEvents.setAppVersion","AppEvents.setUserID","AppEvents.updateUserProperties","Canvas.Plugin.showPluginElement","Canvas.Plugin.hidePluginElement","Canvas.Prefetcher.addStaticResource","Canvas.Prefetcher.setCollectionMode","Canvas.getPageInfo","Canvas.scrollTo","Canvas.setAutoGrow","Canvas.setDoneLoading","Canvas.setSize","Canvas.setUrlHandler","Canvas.startTimer","Canvas.stopTimer","Event.subscribe","Event.unsubscribe","XFBML.parse","addFriend","api","getAccessToken","getAuthResponse","getLoginStatus","getUserID","init","login","logout","publish","share","ui"], true);

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,5 @@
window.gdprAppliesGlobally=true;(function(){function a(e){if(!window.frames[e]){if(document.body&&document.body.firstChild){var t=document.body;var n=document.createElement("iframe");n.style.display="none";n.name=e;n.title=e;t.insertBefore(n,t.firstChild)}
else{setTimeout(function(){a(e)},5)}}}function e(n,r,o,c,s){function e(e,t,n,a){if(typeof n!=="function"){return}if(!window[r]){window[r]=[]}var i=false;if(s){i=s(e,t,n)}if(!i){window[r].push({command:e,parameter:t,callback:n,version:a})}}e.stub=true;function t(a){if(!window[n]||window[n].stub!==true){return}if(!a.data){return}
var i=typeof a.data==="string";var e;try{e=i?JSON.parse(a.data):a.data}catch(t){return}if(e[o]){var r=e[o];window[n](r.command,r.parameter,function(e,t){var n={};n[c]={returnValue:e,success:t,callId:r.callId};a.source.postMessage(i?JSON.stringify(n):n,"*")},r.version)}}
if(typeof window[n]!=="function"){window[n]=e;if(window.addEventListener){window.addEventListener("message",t,false)}else{window.attachEvent("onmessage",t)}}}e("__tcfapi","__tcfapiBuffer","__tcfapiCall","__tcfapiReturn");a("__tcfapiLocator");(function(e,tgt){
var t=document.createElement("script");t.id="spcloader";t.type="text/javascript";t.async=true;t.src="https://sdk.privacy-center.org/"+e+"/loader.js?target_type=notice&target="+tgt;t.charset="utf-8";var n=document.getElementsByTagName("script")[0];n.parentNode.insertBefore(t,n)})("e23a01f6-a508-4e71-8f50-c1a9cae7c0d0","FXUNFVFn")})();

View File

@ -0,0 +1 @@
var e=document.createElement('div');e.id='OoqBWgJyUzcQ';e.style.display='none';document.body.appendChild(e);

View File

@ -3,13 +3,14 @@ from requests.auth import HTTPBasicAuth
from getpass import getpass from getpass import getpass
from urllib.parse import urlparse from urllib.parse import urlparse
import argparse, logging, threading import argparse, logging, threading
import multiprocessing
from lib.WPImport import WPimport from lib.WPImport import WPimport
from lib.WPExport import WPExport from lib.WPExport import WPExport
def download(exportWp, html, img): def download(name_thread, max_thread, exportWp, html, img):
webpage = exportWp.getUrlPage() exportWp.setName(name_thread)
webpage = exportWp.getUrlPage(name_thread, max_thread)
if html is False: if html is False:
exportWp.downloadHTML(webpage) exportWp.downloadHTML(webpage)
@ -25,6 +26,7 @@ if __name__ == '__main__':
parser.add_argument("--logfile", help="Log file", default="") parser.add_argument("--logfile", help="Log file", default="")
parser.add_argument("--quiet", help="No console output", action="store_true") parser.add_argument("--quiet", help="No console output", action="store_true")
parser.add_argument("--parser", help="Parser content", default="html.parser") parser.add_argument("--parser", help="Parser content", default="html.parser")
parser.add_argument("--parallel", help="Define number thread (default : 1)", default=1)
subparsers = parser.add_subparsers(dest="command") subparsers = parser.add_subparsers(dest="command")
@ -162,10 +164,12 @@ if __name__ == '__main__':
exportWp.downloadCss() exportWp.downloadCss()
if args.html is False or args.img is False: if args.html is False or args.img is False:
webpage = exportWp.getUrlPage() threads = []
if args.html is False: for i in range(0, int(args.parallel)):
exportWp.downloadHTML(webpage) t1 = multiprocessing.Process(name="Process-{0}".format(i + 1), target=download, args=(i, 3, exportWp,args.html, args.img))
threads.append(t1)
if args.img is False: for thread in threads:
exportWp.downloadImg(webpage) thread.start()
thread.join()
exit(0) exit(0)

View File

@ -11,6 +11,7 @@ class WPExport:
self._logger = logger self._logger = logger
self._parser = parser self._parser = parser
self._dir = directory self._dir = directory
self._name = "Process-0"
self._request = requests.Session() self._request = requests.Session()
@ -22,6 +23,11 @@ class WPExport:
# Public method # Public method
# Set name
def setName(self, name):
self._name = "Process-{}".format(int(name) + 1)
# Set URL # Set URL
def setUrl(self, url): def setUrl(self, url):
@ -55,11 +61,11 @@ class WPExport:
# Get URL # Get URL
def getUrlPage(self): def getUrlPage(self, index_thread, max_thread):
try: try:
page = self._request.get(self._url) page = self._request.get(self._url)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1) exit(1)
page_url = [] page_url = []
if page.status_code == 200: if page.status_code == 200:
@ -70,8 +76,8 @@ class WPExport:
if href != "#": if href != "#":
page_url.append(href) page_url.append(href)
else: else:
self._logger.error("Url did not get due status code : {0}".format(page.status_code)) self._logger.error("{0} : URL did not get due status code : {1}".format(self._name, page.status_code))
self._logger.debug(page.content) self._logger.debug("{0} : {1}".format(self._name, page.content))
webpage = [] webpage = []
@ -79,10 +85,10 @@ class WPExport:
try: try:
page = self._request.get(i) page = self._request.get(i)
except Exception as err: except Exception as err:
self._logger.error("Connection error : {0}".format(err)) self._logger.error("{0} : Connection error : {1}".format(self._name, err))
exit(1) exit(1)
if page.status_code == 200: if page.status_code == 200:
self._logger.info("page : {0}".format(i)) self._logger.info("{0} : page : {1}".format(self._name, i))
if i not in webpage: if i not in webpage:
webpage.append(i) webpage.append(i)
soup = BeautifulSoup(page.text, self._parser) soup = BeautifulSoup(page.text, self._parser)
@ -94,13 +100,22 @@ class WPExport:
element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1] element_lastpage = lastpage.split("/")[len(lastpage.split("/"))-1]
number_page = element_lastpage.split("-")[0].split("p")[1] number_page = element_lastpage.split("-")[0].split("p")[1]
number_lastpage = int(number_page) / 10 number_lastpage = int(number_page) / 10
for j in range(1,int(number_lastpage)):
setPageDivided = int(number_lastpage) / max_thread
setPagePart = setPageDivided * (index_thread + 1)
firstPagePart = (setPagePart - setPageDivided) + 1
self._logger.debug("{0} : Total page : {1}".format(self._name,int(number_lastpage)))
self._logger.debug("{0} : First range : {1}".format(self._name, int(firstPagePart)))
self._logger.debug("{0} : Last range : {1}".format(self._name, int(setPagePart)))
for j in range(int(firstPagePart),int(setPagePart)):
paging = j * 10 paging = j * 10
categorie = urlparse(i).path.split("/") categorie = urlparse(i).path.split("/")
url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging) url_paging = "{0}/archives/p{1}-10.html".format(self._url, paging)
if len(categorie) > 2: if len(categorie) > 2:
url_paging = "{0}/archives/{1}/p{2}-10.html".format(self._url, categorie[2], paging) url_paging = "{0}/archives/{1}/p{2}-10.html".format(self._url, categorie[2], paging)
self._logger.info(url_paging) self._logger.info("{0} : {1}".format(self._name, url_paging))
if url_paging not in webpage: if url_paging not in webpage:
webpage.append(url_paging) webpage.append(url_paging)
page = self._request.get(url_paging) page = self._request.get(url_paging)
@ -118,7 +133,7 @@ class WPExport:
exit(1) exit(1)
webpage.append(o) webpage.append(o)
else: else:
self._logger.error("web didn't get due status code : {0}".format(page.status_code)) self._logger.error("{0} : web didn't get due status code : {1}".format(self._name, page.status_code))
self._logger.debug(page.content) self._logger.debug(page.content)
return webpage return webpage

25
test.py Normal file
View File

@ -0,0 +1,25 @@
import threading
def cube(n, name):
print("name : {0}".format(name))
print(f"Le cube: {n * n * n}")
def carre(n, name):
print("name : {0}".format(name))
print(f"Le carré: {n * n}")
threads = []
# création de thread
for i in range(0,10):
t1 = threading.Thread(target=carre, args=(3,"toto-{}".format(i)))
threads.append(t1)
for thread in threads:
thread.start()
thread.join()
# les deux thread sont exécutés
print("C'est fini!")

19
wp-navigation Normal file
View File

@ -0,0 +1,19 @@
"<!-- wp:page-list /-->\n\n
<!-- wp:navigation-submenu {"label":"Toto","type":"page","id":2,"url":"http://localhost:8080/page-d-exemple/","kind":"post-type"} -->\n
<!-- wp:navigation-link {"label":"Publications","type":"category","id":18,"url":"http://localhost:8080/category/publications/","kind":"taxonomy"} /-->\n
<!-- /wp:navigation-submenu -->"
URL de requête: http://localhost:8080/wp-json/wp/v2/navigation/161?_locale=user
"<!-- wp:page-list /-->\n\n
<!-- wp:navigation-submenu {\"label\":\"Toto\",\"type\":\"page\",\"id\":2,\"url\":\"http://localhost:8080/page-d-exemple/\",\"kind\":\"post-type\"} -->\n
<!-- wp:navigation-link {\"label\":\"Publications\",\"type\":\"category\",\"id\":18,\"url\":\"http://localhost:8080/category/publications/\",\"kind\":\"taxonomy\"} /-->\n\n
<!-- wp:navigation-link {\"label\":\"Osez 20 histoires érotiques dans un train\",\"type\":\"post\",\"id\":155,\"url\":\"http://localhost:8080/osez-20-histoires-erotiques-dans-un-train/\",\"kind\":\"post-type\"} /-->\n
<!-- /wp:navigation-submenu -->"
{"id":161,"content":"<!-- wp:page-list /-->\n\n<!-- wp:navigation-submenu {\"label\":\"Toto\",\"type\":\"page\",\"id\":2,\"url\":\"http://localhost:8080/page-d-exemple/\",\"kind\":\"post-type\"} -->\n<!-- wp:navigation-link {\"label\":\"Publications\",\"type\":\"category\",\"id\":18,\"url\":\"http://localhost:8080/category/publications/\",\"kind\":\"taxonomy\"} /-->\n\n<!-- wp:navigation-link {\"label\":\"Osez 20 histoires érotiques dans une masion close\",\"type\":\"post\",\"id\":155,\"url\":\"http://localhost:8080/osez-20-histoires-erotiques-dans-un-train/\",\"kind\":\"post-type\"} /-->\n<!-- /wp:navigation-submenu -->"}
curl -vvv -u v4l3n71n -X POST -d '{\"content\":\"< wp:page-list /-->\n\n< wp:navigation-submenu {\"label\":\"Toto\",\"type\":\"page\",\"id\":2,\"url\":\"http://localhost:8080/page-d-exemple/\",\"kind\":\"post-type\"} -->\n<!-- wp:navigation-link {\"label\":\"Publications\",\"type\":\"category\",\"id\":18,\"url\":\"http://localhost:8080/category/publications/\",\"kind\":\"taxonomy\"} /-->\n\n<!-- wp:navigation-link {\"label\":\"Osez 20 histoires érotiques dans un train\",\"type\":\"post\",\"id\":155,\"url\":\"http://localhost:8080/osez-20-histoires-erotiques-dans-un-train/\",\"kind\":\"post-type\"} /-->\n<!-- /wp:navigation-submenu -->"}' http://localhost:8080/wp-json/wp/v2/navigation
curl http://localhost:8080/wp-json/wp/v2/navigation |jq .