Rewrite to use PySocks
This commit is contained in:
parent
1a03ab6d70
commit
5f908a6d7b
@ -12,9 +12,17 @@
|
|||||||
|
|
||||||
import isbnlib
|
import isbnlib
|
||||||
import re
|
import re
|
||||||
import requesocks as requests # Requesocks is requests with SOCKS support
|
import socket
|
||||||
|
import socks
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
try:
|
||||||
|
# For Python 3.0 and later
|
||||||
|
from urllib.request import urlopen
|
||||||
|
from urllib.error import URLError
|
||||||
|
except ImportError:
|
||||||
|
# Fall back to Python 2's urllib2
|
||||||
|
from urllib2 import urlopen, URLError
|
||||||
import arxiv2bib as arxiv_metadata
|
import arxiv2bib as arxiv_metadata
|
||||||
import tools
|
import tools
|
||||||
from bibtexparser.bparser import BibTexParser
|
from bibtexparser.bparser import BibTexParser
|
||||||
@ -32,16 +40,31 @@ def download(url):
|
|||||||
false if it could not be downloaded.
|
false if it could not be downloaded.
|
||||||
"""
|
"""
|
||||||
for proxy in config.get("proxies"):
|
for proxy in config.get("proxies"):
|
||||||
r_proxy = {
|
if proxy.startswith('socks'):
|
||||||
"http": proxy,
|
if proxy[5] == '4':
|
||||||
"https": proxy,
|
proxy_type = socks.SOCKS4
|
||||||
}
|
else:
|
||||||
|
proxy_type = socks.SOCKS5
|
||||||
|
proxy = proxy[proxy.find('://')+3:]
|
||||||
try:
|
try:
|
||||||
r = requests.get(url, proxies=r_proxy)
|
proxy, port = proxy.split(':')
|
||||||
size = int(r.headers['Content-Length'].strip())
|
except ValueError:
|
||||||
|
port = None
|
||||||
|
socks.set_default_proxy(proxy_type, proxy, port)
|
||||||
|
else: # TODO : Reset if proxy is empty
|
||||||
|
try:
|
||||||
|
proxy, port = proxy.split(':')
|
||||||
|
except ValueError:
|
||||||
|
port = None
|
||||||
|
socks.set_default_proxy(socks.HTTP, proxy, port)
|
||||||
|
socket.socket = socks.socksocket
|
||||||
|
try:
|
||||||
|
r = urlopen(url)
|
||||||
|
size = int(r.headers.getheader('content-length').strip())
|
||||||
dl = ""
|
dl = ""
|
||||||
dl_size = 0
|
dl_size = 0
|
||||||
for buf in r.iter_content(1024):
|
while True:
|
||||||
|
buf = r.read(1024)
|
||||||
if buf:
|
if buf:
|
||||||
dl += buf
|
dl += buf
|
||||||
dl_size += len(buf)
|
dl_size += len(buf)
|
||||||
@ -49,20 +72,22 @@ def download(url):
|
|||||||
sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done)))
|
sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done)))
|
||||||
sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
|
sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
else:
|
||||||
|
break
|
||||||
contenttype = False
|
contenttype = False
|
||||||
if 'pdf' in r.headers['content-type']:
|
if 'pdf' in r.headers.getheader('content-type'):
|
||||||
contenttype = 'pdf'
|
contenttype = 'pdf'
|
||||||
elif 'djvu' in r.headers['content-type']:
|
elif 'djvu' in r.headers.getheader('content-type'):
|
||||||
contenttype = 'djvu'
|
contenttype = 'djvu'
|
||||||
|
|
||||||
if r.status_code != 200 or contenttype is False:
|
if r.getcode() != 200 or contenttype is False:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return dl, contenttype
|
return dl, contenttype
|
||||||
except ValueError:
|
except ValueError:
|
||||||
tools.warning("Invalid URL")
|
tools.warning("Invalid URL")
|
||||||
return False, None
|
return False, None
|
||||||
except requests.exceptions.RequestException:
|
except URLError:
|
||||||
tools.warning("Unable to get "+url+" using proxy "+proxy+". It " +
|
tools.warning("Unable to get "+url+" using proxy "+proxy+". It " +
|
||||||
"may not be available.")
|
"may not be available.")
|
||||||
continue
|
continue
|
||||||
|
@ -168,7 +168,7 @@ class SearchQueryParser:
|
|||||||
return self._methods[argument.getName()](argument)
|
return self._methods[argument.getName()](argument)
|
||||||
|
|
||||||
def Parse(self, query):
|
def Parse(self, query):
|
||||||
#print self._parser(query)[0]
|
#print(self._parser(query)[0])
|
||||||
return self.evaluate(self._parser(query)[0])
|
return self.evaluate(self._parser(query)[0])
|
||||||
|
|
||||||
def GetWord(self, word):
|
def GetWord(self, word):
|
||||||
@ -278,21 +278,21 @@ class ParserTest(SearchQueryParser):
|
|||||||
def Test(self):
|
def Test(self):
|
||||||
all_ok = True
|
all_ok = True
|
||||||
for item in self.tests.keys():
|
for item in self.tests.keys():
|
||||||
print item
|
print(item)
|
||||||
r = self.Parse(item)
|
r = self.Parse(item)
|
||||||
e = self.tests[item]
|
e = self.tests[item]
|
||||||
print 'Result: %s' % r
|
print('Result: %s' % r)
|
||||||
print 'Expect: %s' % e
|
print('Expect: %s' % e)
|
||||||
if e == r:
|
if e == r:
|
||||||
print 'Test OK'
|
print('Test OK')
|
||||||
else:
|
else:
|
||||||
all_ok = False
|
all_ok = False
|
||||||
print '>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<'
|
print('>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<')
|
||||||
print ''
|
print('')
|
||||||
return all_ok
|
return all_ok
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
if ParserTest().Test():
|
if ParserTest().Test():
|
||||||
print 'All tests OK'
|
print('All tests OK')
|
||||||
else:
|
else:
|
||||||
print 'One or more tests FAILED'
|
print('One or more tests FAILED')
|
||||||
|
0
libbmc/tearpages.py
Executable file → Normal file
0
libbmc/tearpages.py
Executable file → Normal file
Loading…
Reference in New Issue
Block a user