This commit is contained in:
Askill 2022-11-06 15:18:57 +01:00
parent 9d0c5d3f73
commit 563c6c719c
2 changed files with 6 additions and 2 deletions

View File

@ -1 +1,2 @@
https://www.patricematz.de/
https://www.patricematz.de/
https://www.heise.de/

View File

@ -24,12 +24,15 @@ class Watcher:
keywords = self.read_txt_file(self.keywords_source_path)
sites = self.read_txt_file(self.sites_source_path)
crawler = Crawler()
for site in sites:
crawler = Crawler()
# TODO: add depth as param, to lmit traversal depth
crawler.run(site, 10)
crawler.persist(f"./cache/{self.remove_protocol(site)}/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json")
contents = [self.get_new_content(site) for site in sites]
# TODO: improve handleing of None
contents = [x for x in contents if x is not None and x is not {}]
matches = []
for content in contents: