From 32e10d2665caa0370fb217db33c4701c0db7fe44 Mon Sep 17 00:00:00 2001 From: Askill Date: Sun, 11 Dec 2022 13:18:08 +0100 Subject: [PATCH] not adding links the go over depth limit --- src/Crawler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Crawler.py b/src/Crawler.py index 9f68bf1..adf3f1e 100644 --- a/src/Crawler.py +++ b/src/Crawler.py @@ -46,8 +46,6 @@ class Crawler: while unchecked: level, root = unchecked.pop() - if level > limit: - continue if root in self._links or self.url.rsplit('/')[2] not in root: continue if "https" not in root: @@ -75,9 +73,11 @@ class Crawler: for link in _links: if link not in n_links: if link.startswith("http"): - n_links.append((level+1, link)) + if level < limit: + n_links.append((level+1, link)) else: - n_links.append((level+1, urljoin(site.url, link))) + if level < limit: + n_links.append((level+1, urljoin(site.url, link))) unchecked += n_links self._links[root] = n_links