2019-04-22 15:06:20 +00:00
|
|
|
import urllib.request,urllib.parse,urllib.error
|
|
|
|
|
from lxml import html
|
|
|
|
|
import requests
|
|
|
|
|
import re
|
|
|
|
|
|
2019-04-24 16:29:24 +00:00
|
|
|
searchURL = "https://suche.golem.de/search.php?l=10&q=gaming"
|
|
|
|
|
site = requests.get(searchURL)
|
2019-04-22 15:06:20 +00:00
|
|
|
tree = html.fromstring(site.content)
|
2019-04-24 16:29:24 +00:00
|
|
|
|
|
|
|
|
articles = tree.xpath('//span[@class="dh2 head2"]/text()')
|
|
|
|
|
links = tree.xpath('//ol[@class="list-articles"]/li/header//@href')
|
|
|
|
|
print(len(articles), len(links))
|