not adding links the go over depth limit

This commit is contained in:
Askill 2022-12-11 13:18:08 +01:00
parent c7c3fa6102
commit 32e10d2665
1 changed files with 4 additions and 4 deletions

View File

@ -46,8 +46,6 @@ class Crawler:
while unchecked:
level, root = unchecked.pop()
if level > limit:
continue
if root in self._links or self.url.rsplit('/')[2] not in root:
continue
if "https" not in root:
@ -75,9 +73,11 @@ class Crawler:
for link in _links:
if link not in n_links:
if link.startswith("http"):
n_links.append((level+1, link))
if level < limit:
n_links.append((level+1, link))
else:
n_links.append((level+1, urljoin(site.url, link)))
if level < limit:
n_links.append((level+1, urljoin(site.url, link)))
unchecked += n_links
self._links[root] = n_links