From 5f908a6d7beeea868fdbf4aa345bd4c058c54662 Mon Sep 17 00:00:00 2001 From: Phyks Date: Sat, 2 Aug 2014 23:34:34 +0200 Subject: [PATCH] Rewrite to use PySocks --- libbmc/fetcher.py | 49 ++++++++++++++++++++++++++++++++++----------- libbmc/search.py | 18 ++++++++--------- libbmc/tearpages.py | 0 3 files changed, 46 insertions(+), 21 deletions(-) mode change 100755 => 100644 libbmc/tearpages.py diff --git a/libbmc/fetcher.py b/libbmc/fetcher.py index 2961492..19e2566 100644 --- a/libbmc/fetcher.py +++ b/libbmc/fetcher.py @@ -12,9 +12,17 @@ import isbnlib import re -import requesocks as requests # Requesocks is requests with SOCKS support +import socket +import socks import subprocess import sys +try: + # For Python 3.0 and later + from urllib.request import urlopen + from urllib.error import URLError +except ImportError: + # Fall back to Python 2's urllib2 + from urllib2 import urlopen, URLError import arxiv2bib as arxiv_metadata import tools from bibtexparser.bparser import BibTexParser @@ -32,16 +40,31 @@ def download(url): false if it could not be downloaded. """ for proxy in config.get("proxies"): - r_proxy = { - "http": proxy, - "https": proxy, - } + if proxy.startswith('socks'): + if proxy[5] == '4': + proxy_type = socks.SOCKS4 + else: + proxy_type = socks.SOCKS5 + proxy = proxy[proxy.find('://')+3:] + try: + proxy, port = proxy.split(':') + except ValueError: + port = None + socks.set_default_proxy(proxy_type, proxy, port) + else: # TODO : Reset if proxy is empty + try: + proxy, port = proxy.split(':') + except ValueError: + port = None + socks.set_default_proxy(socks.HTTP, proxy, port) + socket.socket = socks.socksocket try: - r = requests.get(url, proxies=r_proxy) - size = int(r.headers['Content-Length'].strip()) + r = urlopen(url) + size = int(r.headers.getheader('content-length').strip()) dl = "" dl_size = 0 - for buf in r.iter_content(1024): + while True: + buf = r.read(1024) if buf: dl += buf dl_size += len(buf) @@ -49,20 +72,22 @@ def download(url): sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done))) sys.stdout.write(" "+str(int(float(done)/52*100))+"%") sys.stdout.flush() + else: + break contenttype = False - if 'pdf' in r.headers['content-type']: + if 'pdf' in r.headers.getheader('content-type'): contenttype = 'pdf' - elif 'djvu' in r.headers['content-type']: + elif 'djvu' in r.headers.getheader('content-type'): contenttype = 'djvu' - if r.status_code != 200 or contenttype is False: + if r.getcode() != 200 or contenttype is False: continue return dl, contenttype except ValueError: tools.warning("Invalid URL") return False, None - except requests.exceptions.RequestException: + except URLError: tools.warning("Unable to get "+url+" using proxy "+proxy+". It " + "may not be available.") continue diff --git a/libbmc/search.py b/libbmc/search.py index 4a7d34f..eb132f4 100644 --- a/libbmc/search.py +++ b/libbmc/search.py @@ -168,7 +168,7 @@ class SearchQueryParser: return self._methods[argument.getName()](argument) def Parse(self, query): - #print self._parser(query)[0] + #print(self._parser(query)[0]) return self.evaluate(self._parser(query)[0]) def GetWord(self, word): @@ -278,21 +278,21 @@ class ParserTest(SearchQueryParser): def Test(self): all_ok = True for item in self.tests.keys(): - print item + print(item) r = self.Parse(item) e = self.tests[item] - print 'Result: %s' % r - print 'Expect: %s' % e + print('Result: %s' % r) + print('Expect: %s' % e) if e == r: - print 'Test OK' + print('Test OK') else: all_ok = False - print '>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<' - print '' + print('>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<') + print('') return all_ok if __name__=='__main__': if ParserTest().Test(): - print 'All tests OK' + print('All tests OK') else: - print 'One or more tests FAILED' + print('One or more tests FAILED') diff --git a/libbmc/tearpages.py b/libbmc/tearpages.py old mode 100755 new mode 100644