bmc/fetcher.py

40 lines
712 B
Python
Raw Normal View History

2014-04-23 22:27:55 +02:00
#!/usr/bin/python2 -u
# coding=utf8
2013-01-08 07:27:46 +01:00
"""
Fetches papers.
"""
2014-04-26 18:43:25 +02:00
from __future__ import print_function
import sys
import requesocks as requests
import params
2013-01-22 02:11:12 +01:00
2014-04-26 18:43:25 +02:00
def warning(*objs):
"""
Write to stderr
"""
print("WARNING: ", *objs, file=sys.stderr)
def download_url(url):
for proxy in params.proxies:
r_proxy = {
"http": proxy,
"https": proxy,
}
2013-01-22 02:11:12 +01:00
2014-04-26 18:43:25 +02:00
try:
r = requests.get(url, proxies=r_proxy)
2013-02-22 00:13:22 +01:00
2014-04-26 18:43:25 +02:00
if r.status_code != 200 or 'pdf' not in r.headers['content-type']:
continue
2013-05-11 11:57:28 +02:00
2014-04-26 18:43:25 +02:00
return r.content
except:
warning("Proxy "+proxy+" not available.")
continue
2013-05-11 11:57:28 +02:00
return False