Alexa-news-stentiment-evalu.../reader/tests.py

12 lines
388 B
Python
Raw Normal View History

2019-04-22 15:06:20 +00:00
import urllib.request,urllib.parse,urllib.error
from lxml import html
import requests
import re
2019-04-24 16:29:24 +00:00
searchURL = "https://suche.golem.de/search.php?l=10&q=gaming"
site = requests.get(searchURL)
2019-04-22 15:06:20 +00:00
tree = html.fromstring(site.content)
2019-04-24 16:29:24 +00:00
articles = tree.xpath('//span[@class="dh2 head2"]/text()')
links = tree.xpath('//ol[@class="list-articles"]/li/header//@href')
print(len(articles), len(links))