02e679bc72
You should pass the url of the pdf file to the script, along with the `download` parameter. It will try the proxies in the `params.py` file, until it finds one that allow him to get the pdf file. TODO : Use pdfparanoia to remove watermarks
26 lines
437 B
Python
Executable File
26 lines
437 B
Python
Executable File
#!/usr/bin/python2 -u
|
|
# coding=utf8
|
|
|
|
"""
|
|
Fetches papers.
|
|
"""
|
|
|
|
import requesocks as requests
|
|
import params
|
|
|
|
def download_url(url):
|
|
for proxy in params.proxies:
|
|
r_proxy = {
|
|
"http": proxy,
|
|
"https": proxy,
|
|
}
|
|
|
|
r = requests.get(url, proxies=r_proxy)
|
|
|
|
if r.status_code != 200 or 'pdf' not in r.headers['content-type']:
|
|
continue
|
|
|
|
return r.content
|
|
|
|
return False
|