From c48a377f449c681ecc83ae5e1f4f6dfc1a914dde Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Fri, 8 Feb 2013 04:16:10 -0600 Subject: [PATCH] better support for IEEE Xplore --- modules/papers.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/papers.py b/modules/papers.py index 2915644..4de51d1 100644 --- a/modules/papers.py +++ b/modules/papers.py @@ -203,6 +203,18 @@ def download_url(url): else: content = new_content response = new_response + elif "ieeexplore.ieee.org" in url: + try: + pdf_url = [url for url in tree.xpath("//frame/@src") if "pdf" in url][0] + new_response = requests.get(pdf_url, headers={"User-Agent": "time-machine/2.0"}) + new_content = new_response.content + if "pdf" in new_response.headers["content-type"]: + extension = ".pdf" + except Exception: + pass + else: + content = new_content + response = new_response elif "h1 class=\"articleTitle" in content: try: title = tree.xpath("//h1[@class='articleTitle']")[0].text