From 9d0c5d3f73c8e3e5f526bfd69c27d4ec1c6d316a Mon Sep 17 00:00:00 2001 From: Askill Date: Sun, 6 Nov 2022 14:22:20 +0100 Subject: [PATCH] fixed duplication --- main.py | 2 +- src/Watcher.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 2be9c39..c208853 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ from src.Watcher import Watcher if __name__ == "__main__": - Watcher("../sites.txt", "../keywords.txt").watch(3600) \ No newline at end of file + Watcher("./sites.txt", "./keywords.txt").watch(3600) \ No newline at end of file diff --git a/src/Watcher.py b/src/Watcher.py index 69690a9..848391a 100644 --- a/src/Watcher.py +++ b/src/Watcher.py @@ -25,13 +25,11 @@ class Watcher: sites = self.read_txt_file(self.sites_source_path) crawler = Crawler() - crawled_sites = [] for site in sites: crawler.run(site, 10) - crawled_sites += crawler.get_nodes() crawler.persist(f"./cache/{self.remove_protocol(site)}/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json") - contents = [self.get_new_content(site) for site in crawled_sites] + contents = [self.get_new_content(site) for site in sites] contents = [x for x in contents if x is not None and x is not {}] matches = [] for content in contents: