mirror of https://github.com/Askill/optar.git
not adding links the go over depth limit
This commit is contained in:
parent
c7c3fa6102
commit
32e10d2665
|
|
@ -46,8 +46,6 @@ class Crawler:
|
||||||
|
|
||||||
while unchecked:
|
while unchecked:
|
||||||
level, root = unchecked.pop()
|
level, root = unchecked.pop()
|
||||||
if level > limit:
|
|
||||||
continue
|
|
||||||
if root in self._links or self.url.rsplit('/')[2] not in root:
|
if root in self._links or self.url.rsplit('/')[2] not in root:
|
||||||
continue
|
continue
|
||||||
if "https" not in root:
|
if "https" not in root:
|
||||||
|
|
@ -75,8 +73,10 @@ class Crawler:
|
||||||
for link in _links:
|
for link in _links:
|
||||||
if link not in n_links:
|
if link not in n_links:
|
||||||
if link.startswith("http"):
|
if link.startswith("http"):
|
||||||
|
if level < limit:
|
||||||
n_links.append((level+1, link))
|
n_links.append((level+1, link))
|
||||||
else:
|
else:
|
||||||
|
if level < limit:
|
||||||
n_links.append((level+1, urljoin(site.url, link)))
|
n_links.append((level+1, urljoin(site.url, link)))
|
||||||
|
|
||||||
unchecked += n_links
|
unchecked += n_links
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue