2014-04-23 22:27:55 +02:00
|
|
|
#!/usr/bin/python2 -u
|
2013-05-11 16:10:48 +02:00
|
|
|
# coding=utf8
|
2014-04-26 11:52:19 +02:00
|
|
|
|
2013-01-08 07:27:46 +01:00
|
|
|
"""
|
|
|
|
Fetches papers.
|
|
|
|
"""
|
2013-05-11 16:10:48 +02:00
|
|
|
|
2014-04-26 18:43:25 +02:00
|
|
|
from __future__ import print_function
|
|
|
|
import sys
|
2014-04-26 11:52:19 +02:00
|
|
|
import requesocks as requests
|
|
|
|
import params
|
2013-01-22 02:11:12 +01:00
|
|
|
|
2014-04-26 18:43:25 +02:00
|
|
|
|
|
|
|
def warning(*objs):
|
|
|
|
"""
|
|
|
|
Write to stderr
|
|
|
|
"""
|
|
|
|
print("WARNING: ", *objs, file=sys.stderr)
|
|
|
|
|
|
|
|
|
2014-04-26 11:52:19 +02:00
|
|
|
def download_url(url):
|
|
|
|
for proxy in params.proxies:
|
|
|
|
r_proxy = {
|
|
|
|
"http": proxy,
|
|
|
|
"https": proxy,
|
|
|
|
}
|
2013-01-22 02:11:12 +01:00
|
|
|
|
2014-04-26 18:43:25 +02:00
|
|
|
try:
|
|
|
|
r = requests.get(url, proxies=r_proxy)
|
2013-02-22 00:13:22 +01:00
|
|
|
|
2014-04-26 18:43:25 +02:00
|
|
|
if r.status_code != 200 or 'pdf' not in r.headers['content-type']:
|
|
|
|
continue
|
2013-05-11 11:57:28 +02:00
|
|
|
|
2014-04-26 18:43:25 +02:00
|
|
|
return r.content
|
|
|
|
except:
|
|
|
|
warning("Proxy "+proxy+" not available.")
|
|
|
|
continue
|
2013-05-11 11:57:28 +02:00
|
|
|
|
2014-04-26 11:52:19 +02:00
|
|
|
return False
|