diff --git a/modules/papers.py b/modules/papers.py index 09bba89..2a11146 100644 --- a/modules/papers.py +++ b/modules/papers.py @@ -164,6 +164,7 @@ def download_url(url): citation_pdf_url = find_citation_pdf_url(tree, url) citation_title = find_citation_title(tree) + # wow, this seriously needs to be cleaned up if citation_pdf_url and citation_title: citation_title = citation_title.encode("ascii", "ignore") response = requests.get(citation_pdf_url, headers={"User-Agent": "pdf-defense-force"}) @@ -185,6 +186,19 @@ def download_url(url): else: content = new_content response = new_response + elif "apl.aip.org" in url: + try: + title = tree.xpath("//title/text()")[0].split(" | ")[0] + pdf_url = [link for link in tree.xpath("//a/@href") if "getpdf" in link][0] + new_response = requests.get(pdf_url, headers={"User-Agent": "time-machine/1.0"}) + new_content = new_response.content + if "pdf" in new_response.headers["content-type"]: + extension = ".pdf" + except Exception: + pass + else: + content = new_content + response = new_response elif "h1 class=\"articleTitle" in content: try: title = tree.xpath("//h1[@class='articleTitle']")[0].text