meh
This commit is contained in:
parent
bc2949296c
commit
6f34547432
|
|
@ -0,0 +1,86 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Python: Current File (Integrated Terminal)",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "Python: Flask",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"env": {
|
||||
"FLASK_APP": "app.py"
|
||||
},
|
||||
"args": [
|
||||
"run",
|
||||
"--no-debugger",
|
||||
"--no-reload"
|
||||
],
|
||||
"jinja": true
|
||||
},
|
||||
{
|
||||
"name": "Python: Remote Attach",
|
||||
"type": "python",
|
||||
"request": "attach",
|
||||
"port": 5678,
|
||||
"host": "localhost",
|
||||
"pathMappings": [
|
||||
{
|
||||
"localRoot": "${workspaceFolder}",
|
||||
"remoteRoot": "."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Python: Module",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "enter-your-module-name-here",
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "Python: Django",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/manage.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"runserver",
|
||||
"--noreload",
|
||||
"--nothreading"
|
||||
],
|
||||
"django": true
|
||||
},
|
||||
{
|
||||
"name": "Python: Flask",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"env": {
|
||||
"FLASK_APP": "app.py"
|
||||
},
|
||||
"args": [
|
||||
"run",
|
||||
"--no-debugger",
|
||||
"--no-reload"
|
||||
],
|
||||
"jinja": true
|
||||
},
|
||||
{
|
||||
"name": "Python: Current File (External Terminal)",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "externalTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
FROM tiangolo/uwsgi-nginx-flask:python3.7
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN pip install -r /requirements.txt
|
||||
COPY ./main.py /app
|
||||
COPY ./urlchecker.py /app
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,40 @@
|
|||
from flask import Flask, request
|
||||
import os
|
||||
import urlchecker
|
||||
import sitemapper
|
||||
import _pickle as cPickle
|
||||
import json
|
||||
#----------------------------------------------------------------------------#
|
||||
# App Config.
|
||||
#----------------------------------------------------------------------------#
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
#----------------------------------------------------------------------------#
|
||||
# Controllers.
|
||||
#----------------------------------------------------------------------------#
|
||||
|
||||
@app.route('/test/')
|
||||
def index():
|
||||
url = request.args.get("url")
|
||||
print(url)
|
||||
obj = sitemapper.url(url)
|
||||
obj.run_check()
|
||||
print(obj.sites)
|
||||
|
||||
with open('your_file.txt', 'w') as f:
|
||||
for item in obj.sites:
|
||||
f.write("%s\n" % item)
|
||||
|
||||
return obj.sites
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
port = int(os.environ.get('PORT', 80))
|
||||
app.run(host='0.0.0.0', port=port)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
Flask-WTF
|
||||
requests
|
||||
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
from urllib.parse import urljoin
|
||||
import requests
|
||||
import re
|
||||
from requests_html import HTMLSession
|
||||
|
||||
|
||||
class url:
|
||||
|
||||
url = "" # the url of the website to be checked
|
||||
sites = dict() # dic. with all sites and urls on those sites
|
||||
does_work = [] # array with all prev. positiv tested urls
|
||||
does_not_work = dict() # dic. with all not working urls and the site that linked there
|
||||
header_values = {
|
||||
'Connection:' : 'Keep-alive',
|
||||
'name' : 'Michael Foord',
|
||||
'location' : 'Northampton',
|
||||
'language' : 'English',
|
||||
'User-Agent': 'Mozilla 4/0'}
|
||||
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
|
||||
|
||||
def run_check(self, root=None): # root is the url of the current Site
|
||||
|
||||
if root == None:
|
||||
root = self.url
|
||||
|
||||
root = requests.get(root).url
|
||||
if "Spezial" in root:
|
||||
return
|
||||
if root in self.sites or self.url.rsplit('/', 1)[1] not in root:
|
||||
return
|
||||
|
||||
session = HTMLSession()
|
||||
|
||||
try:
|
||||
response = session.get(root)
|
||||
except:
|
||||
return
|
||||
|
||||
links = response.html.absolute_links
|
||||
nlinks = []
|
||||
for link in links:
|
||||
try:
|
||||
nlinks.append(requests.get(link).url.replace("/./", "/").replace("/../", "/"))
|
||||
except:
|
||||
return
|
||||
self.sites[root] = nlinks
|
||||
|
||||
print(root)
|
||||
|
||||
for each_link in nlinks:
|
||||
self.run_check(each_link)
|
||||
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
import urllib.request,urllib.parse,urllib.error
|
||||
from urllib.error import HTTPError
|
||||
from urllib.error import URLError
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
import re
|
||||
|
||||
|
||||
|
||||
class url:
|
||||
|
||||
url = "" # the url of the website to be checked
|
||||
sites = dict() # dic. with all sites and urls on those sites
|
||||
does_work = [] # array with all prev. positiv tested urls
|
||||
does_not_work = dict() # dic. with all not working urls and the site that linked there
|
||||
header_values = {
|
||||
'Connection:' : 'Keep-alive',
|
||||
'name' : 'Michael Foord',
|
||||
'location' : 'Northampton',
|
||||
'language' : 'English',
|
||||
'User-Agent': 'Mozilla 4/0'}
|
||||
|
||||
def __init__(self, url):
|
||||
self.url = urllib.request.urlopen(url).geturl()
|
||||
|
||||
|
||||
def make_url(self, link, start):
|
||||
ret_link = urljoin(start, link)
|
||||
|
||||
return ret_link
|
||||
|
||||
def test_url(self, link, root):
|
||||
|
||||
if link in self.sites or link in self.does_work:
|
||||
return True
|
||||
elif link in self.does_not_work:
|
||||
return False
|
||||
else:
|
||||
try:
|
||||
|
||||
header = urllib.parse.urlencode(self.header_values)
|
||||
header=header.encode('ascii')
|
||||
request = urllib.request.Request(link, header)
|
||||
response = urllib.request.urlopen(request)
|
||||
self.does_work.append(link)
|
||||
#print(" works " + link)
|
||||
return True
|
||||
|
||||
except (urllib.error.HTTPError, urllib.error.URLError, ValueError):
|
||||
self.does_not_work[link]=root
|
||||
#print(" doesn't work " + link)
|
||||
return False
|
||||
|
||||
def get_actual_urls(self, links, root):
|
||||
temp_links = []
|
||||
for each_link in links:
|
||||
|
||||
if each_link.startswith("http") | each_link.startswith("//"):
|
||||
temp_links.append(each_link)
|
||||
else:
|
||||
temp_links.append(urljoin(root, each_link))
|
||||
|
||||
for each_temp_link in temp_links:
|
||||
self.test_url(each_temp_link, root)
|
||||
|
||||
return temp_links
|
||||
|
||||
def run_check(self, root=None): # root is the url of the current Site
|
||||
|
||||
if root == None:
|
||||
root = self.url
|
||||
else:
|
||||
pass
|
||||
|
||||
if root in self.sites or self.url.rsplit('/', 1)[0] not in root or not self.test_url(root, root):
|
||||
return
|
||||
|
||||
header = urllib.parse.urlencode(self.header_values)
|
||||
header=header.encode('ascii')
|
||||
|
||||
request = urllib.request.Request(root, header)
|
||||
http_response = urllib.request.urlopen(request)
|
||||
root = http_response.geturl()
|
||||
response_data= http_response.read()
|
||||
|
||||
|
||||
links = re.findall(r'href="(.*?)"' , str(response_data))
|
||||
|
||||
links = self.get_actual_urls(links, root)
|
||||
print(root, links)
|
||||
|
||||
self.sites[root]=links
|
||||
for each_link in links:
|
||||
self.run_check(each_link)
|
||||
|
||||
#
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
http://www.andreasgehrke.com/prof/moti/moti.html
|
||||
http://www.andreasgehrke.com/idee/idee_18/idee_18_01.html
|
||||
https://www.andreasgehrke.com/proj_21/proj_21_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_12/real_proj_12_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_01.html#i01
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_10/real_proj_10_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_09/real_proj_09_01.html
|
||||
https://www.andreasgehrke.com/proj/proj.html
|
||||
http://www.andreasgehrke.com/../proj/proj.html
|
||||
http://www.andreasgehrke.com/leis/leis.html
|
||||
https://www.andreasgehrke.com/idee/idee_03/idee_03_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_16/idee_16_01.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_07/real_proj_07_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_02/idee_02_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_11.html#r11
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_09/real_proj_09_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_20/idee_20.html
|
||||
http://www.andreasgehrke.com/proj_21/proj_21_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_08.html#i08
|
||||
https://www.andreasgehrke.com/idee/idee_11/idee_11_01.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_06/real_proj_06_01.html
|
||||
http://www.andreasgehrke.com/proj.html
|
||||
http://www.andreasgehrke.com/moti/moti.html
|
||||
https://www.andreasgehrke.com/idee/idee_18/idee_18_01.html
|
||||
http://www.andreasgehrke.com/ver/ver.html
|
||||
https://www.andreasgehrke.com/idee/idee_01/idee_02_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_13.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_06/real_proj_06_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_17/idee_17_01.html
|
||||
http://www.andreasgehrke.com/proj/proj.html
|
||||
https://www.andreasgehrke.com/../proj/proj.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_01/real_proj_01_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_16/idee_16_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_01/idee_02_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_04/real_proj_04_01.html
|
||||
https://www.andreasgehrke.com/leis/leis.html
|
||||
https://www.andreasgehrke.com/ver/ver.html
|
||||
https://www.andreasgehrke.com/idee/idee_04/idee_04_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_20/idee_20.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_13.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_08/real_proj_08_01.html
|
||||
https://www.andreasgehrke.com/
|
||||
https://www.andreasgehrke.com/idee/idee_06.html#i06
|
||||
http://www.andreasgehrke.com/../index.html
|
||||
http://www.andreasgehrke.com/index.html
|
||||
http://www.andreasgehrke.com/proj_23/proj_23_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_04/idee_04_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_19/idee_19_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_01/real_proj_01_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_10/real_proj_10_01.html
|
||||
https://www.andreasgehrke.com/pers/pers.html
|
||||
http://www.andreasgehrke.com/idee/idee_07.html#i07
|
||||
https://www.andreasgehrke.com/proj_23/proj_23_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_17/idee_17_01.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_04/real_proj_04_01.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_08/real_proj_08_01.html
|
||||
https://www.andreasgehrke.com/proj_22/proj_22_01.html
|
||||
https://www.andreasgehrke.com/moti/moti.html
|
||||
http://www.andreasgehrke.com/idee/idee_01.html#i01
|
||||
https://www.andreasgehrke.com/idee/idee_07.html#i07
|
||||
http://www.andreasgehrke.com/idee/idee_03/idee_03_01.html
|
||||
http://www.andreasgehrke.com/idee/idee_08.html#i08
|
||||
http://www.andreasgehrke.com/idee/idee_19/idee_19_01.html
|
||||
http://www.andreasgehrke.com/ref/ref.html
|
||||
https://www.andreasgehrke.com/prof/moti/moti.html
|
||||
http://www.andreasgehrke.com/idee/idee_06.html#i06
|
||||
http://www.andreasgehrke.com/idee/idee_11/idee_11_01.html
|
||||
https://www.andreasgehrke.com/idee/idee_05.html#i05
|
||||
https://www.andreasgehrke.com/../kont/buer/buer.html
|
||||
https://www.andreasgehrke.com/idee/idee_09/idee_09_01.html
|
||||
https://www.andreasgehrke.com/index.html
|
||||
http://www.andreasgehrke.com/idee/idee_09/idee_09_01.html
|
||||
https://www.andreasgehrke.com/ref/ref.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_11.html#r11
|
||||
https://www.andreasgehrke.com/idee/idee_02/idee_02_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_05/real_proj_05_01.html
|
||||
https://www.andreasgehrke.com/real_proj/real_proj_07/real_proj_07_01.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_12/real_proj_12_01.html
|
||||
http://www.andreasgehrke.com/proj_22/proj_22_01.html
|
||||
https://www.andreasgehrke.com/proj.html
|
||||
http://www.andreasgehrke.com/real_proj/real_proj_05/real_proj_05_01.html
|
||||
http://www.andreasgehrke.com/pers/pers.html
|
||||
http://www.andreasgehrke.com/
|
||||
https://www.andreasgehrke.com/../index.html
|
||||
http://www.andreasgehrke.com/../kont/buer/buer.html
|
||||
http://www.andreasgehrke.com/idee/idee_05.html#i05
|
||||
Loading…
Reference in New Issue