mirror of https://github.com/Askill/optar.git
fixed duplication
This commit is contained in:
parent
7202c0fe3e
commit
9d0c5d3f73
2
main.py
2
main.py
|
|
@ -1,4 +1,4 @@
|
||||||
from src.Watcher import Watcher
|
from src.Watcher import Watcher
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
Watcher("../sites.txt", "../keywords.txt").watch(3600)
|
Watcher("./sites.txt", "./keywords.txt").watch(3600)
|
||||||
|
|
@ -25,13 +25,11 @@ class Watcher:
|
||||||
sites = self.read_txt_file(self.sites_source_path)
|
sites = self.read_txt_file(self.sites_source_path)
|
||||||
|
|
||||||
crawler = Crawler()
|
crawler = Crawler()
|
||||||
crawled_sites = []
|
|
||||||
for site in sites:
|
for site in sites:
|
||||||
crawler.run(site, 10)
|
crawler.run(site, 10)
|
||||||
crawled_sites += crawler.get_nodes()
|
|
||||||
crawler.persist(f"./cache/{self.remove_protocol(site)}/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json")
|
crawler.persist(f"./cache/{self.remove_protocol(site)}/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json")
|
||||||
|
|
||||||
contents = [self.get_new_content(site) for site in crawled_sites]
|
contents = [self.get_new_content(site) for site in sites]
|
||||||
contents = [x for x in contents if x is not None and x is not {}]
|
contents = [x for x in contents if x is not None and x is not {}]
|
||||||
matches = []
|
matches = []
|
||||||
for content in contents:
|
for content in contents:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue