Star-Mapper/sitemapper.py

55 lines
1.5 KiB
Python
Raw Normal View History

2019-04-22 15:04:52 +00:00
from urllib.parse import urljoin
import requests
import re
from requests_html import HTMLSession
class url:
url = "" # the url of the website to be checked
sites = dict() # dic. with all sites and urls on those sites
does_work = [] # array with all prev. positiv tested urls
does_not_work = dict() # dic. with all not working urls and the site that linked there
header_values = {
'Connection:' : 'Keep-alive',
'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'English',
'User-Agent': 'Mozilla 4/0'}
def __init__(self, url):
self.url = url
def run_check(self, root=None): # root is the url of the current Site
if root == None:
root = self.url
root = requests.get(root).url
if "Spezial" in root:
return
if root in self.sites or self.url.rsplit('/', 1)[1] not in root:
return
session = HTMLSession()
try:
response = session.get(root)
except:
return
links = response.html.absolute_links
nlinks = []
for link in links:
try:
nlinks.append(requests.get(link).url.replace("/./", "/").replace("/../", "/"))
except:
return
self.sites[root] = nlinks
print(root)
for each_link in nlinks:
self.run_check(each_link)