diff --git a/WPImport.py b/WPImport.py index 92f6681..d17adce 100644 --- a/WPImport.py +++ b/WPImport.py @@ -14,6 +14,18 @@ class WPimport: # Public method + def fromUrl(self, webpage): + for page in webpage: + r = requests.get(page) + if r.status_code == 200: + soup = BeautifulSoup(r.content, self._parser) + articlebody = soup.find_all("div", class_="articlebody") + if len(articlebody) > 0: + self._addOrUpdatePost(soup) + else: + self._addOrUpdateFeaturedMedia(soup) + + def fromDirectory(self, directory): directory = "{0}/archives".format(directory) directories = self._getDirectories([], "{0}".format(directory)) diff --git a/import_export_canalblog.py b/import_export_canalblog.py index 6b8325d..96314ec 100644 --- a/import_export_canalblog.py +++ b/import_export_canalblog.py @@ -37,7 +37,7 @@ if __name__ == '__main__': args = parser.parse_args() - logger = logging.getLogger('insert wordpress') + logger = logging.getLogger('import export canalblog') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.quiet is False: @@ -74,7 +74,20 @@ if __name__ == '__main__': exit(0) if len(args.directory) > 0: importWp.fromDirectory(args.directory) - exit(0) + exit(0) + if len(args.canalblog) > 0: + try: + o = urlparse(args.canalblog) + o = o._replace(scheme="https") + url = o.geturl().replace(":///", "://") + except Exception as err: + logger.error("parsing error : {0}".format(err)) + exit(1) + exportWp = WPExport.WPExport(url, logger, args.parser, args.directory) + webpage = exportWp.getUrlPage() + importWp.fromUrl(webpage) + + if args.command == "export": try: o = urlparse(args.url)