fix sciencedirect.com parsing
This commit is contained in:
parent
cf7c1b78e1
commit
a89129b424
@ -169,7 +169,10 @@ def download_url(url):
|
|||||||
extension = ".pdf"
|
extension = ".pdf"
|
||||||
title = citation_title
|
title = citation_title
|
||||||
else:
|
else:
|
||||||
if "h1 class=\"articleTitle" in content:
|
if "sciencedirect.com" in url:
|
||||||
|
title = tree.xpath("//h1[@class='svTitle']")[0].text
|
||||||
|
pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0]
|
||||||
|
elif "h1 class=\"articleTitle" in content:
|
||||||
try:
|
try:
|
||||||
title = tree.xpath("//h1[@class='articleTitle']")[0].text
|
title = tree.xpath("//h1[@class='articleTitle']")[0].text
|
||||||
title = title.encode("ascii", "ignore")
|
title = title.encode("ascii", "ignore")
|
||||||
|
Loading…
Reference in New Issue
Block a user