Functions to handle arXiv metadata
This commit is contained in:
parent
980e678883
commit
289c7dece4
@ -107,7 +107,7 @@ Here are some sources of inspirations for this project :
|
||||
|
||||
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
|
||||
|
||||
20. No DOI for arXiv / HAL
|
||||
20. No DOI for HAL
|
||||
30. Parameter to disable remote search
|
||||
40. Open file
|
||||
45. Doc / Man
|
||||
@ -119,3 +119,9 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
|
||||
## Issues ?
|
||||
|
||||
* Remove the watermarks on pdf files => done, some warning in okular on generated pdf, but seems ok. Seems to be a bug in Okular.
|
||||
|
||||
|
||||
## Thanks
|
||||
|
||||
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module
|
||||
* François Boulogne for his [python-bibtexparser](https://github.com/sciunto/python-bibtexparser) python module and his integration of new requested features
|
||||
|
22
fetcher.py
22
fetcher.py
@ -5,6 +5,7 @@ import isbntools
|
||||
import re
|
||||
import requesocks as requests # Requesocks is requests with SOCKS support
|
||||
import subprocess
|
||||
import arxiv2bib as arxiv_metadata
|
||||
import tools
|
||||
import params
|
||||
|
||||
@ -178,7 +179,6 @@ def doi2Bib(doi):
|
||||
|
||||
|
||||
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
|
||||
arXiv_wo_v_re = re.compile(r'v\d+\Z')
|
||||
|
||||
|
||||
def findArXivId(src):
|
||||
@ -208,21 +208,19 @@ def findArXivId(src):
|
||||
# Error happened
|
||||
tools.warning(err)
|
||||
return False
|
||||
|
||||
cleanID = False
|
||||
if extractID:
|
||||
cleanID = arXiv_wo_v_re.sub('', extractID.group(1))
|
||||
return cleanID
|
||||
|
||||
else:
|
||||
return extractID
|
||||
|
||||
|
||||
def arXiv2Bib(arxiv):
|
||||
"""Returns bibTeX string of metadata for a given arXiv id
|
||||
|
||||
arxiv is an arxiv id
|
||||
From : https://github.com/minad/bibsync/blob/master/lib/bibsync/actions/synchronize_metadata.rb
|
||||
"""
|
||||
arxiv = "oai:arXiv.org:"+arxiv
|
||||
bibtex = ''
|
||||
|
||||
return bibtex
|
||||
bibtex = arxiv_metadata.arxiv2bib([arxiv])
|
||||
for bib in bibtex:
|
||||
if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
|
||||
continue
|
||||
else:
|
||||
return bib.bibtex()
|
||||
return False
|
||||
|
43
main.py
43
main.py
@ -58,37 +58,52 @@ def addFile(src, filetype):
|
||||
"""
|
||||
if filetype == 'article' or filetype is None:
|
||||
doi = fetcher.findDOI(src)
|
||||
if (filetype == 'article' or filetype is None) and doi is False:
|
||||
arxiv = fetcher.findArXivId(src)
|
||||
|
||||
if filetype == 'book' or (filetype is None and doi is False):
|
||||
if filetype == 'book' or (filetype is None and doi is False and arxiv is
|
||||
False):
|
||||
isbn = fetcher.findISBN(src)
|
||||
|
||||
if doi is False and isbn is False:
|
||||
if doi is False and isbn is False and arxiv is False:
|
||||
if filetype is None:
|
||||
tools.warning("Could not determine the DOI or the ISBN for " +
|
||||
src+"."+"Switching to manual entry.")
|
||||
doi_isbn = ''
|
||||
while doi_isbn not in ['doi', 'isbn']:
|
||||
doi_isbn = tools.rawInput("DOI / ISBN ? ").lower()
|
||||
if doi_isbn == 'doi':
|
||||
tools.warning("Could not determine the DOI nor the arXiv id nor " +
|
||||
"the ISBN for "+src+"."+"Switching to manual entry.")
|
||||
doi_arxiv_isbn = ''
|
||||
while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn']:
|
||||
doi_arxiv_isbn = tools.rawInput("DOI / arXiv / ISBN? ").lower()
|
||||
if doi_arxiv_isbn == 'doi':
|
||||
doi = tools.rawInput('DOI? ')
|
||||
elif doi_arxiv_isbn == 'arxiv':
|
||||
arxiv = tools.rawInput('arXiv id? ')
|
||||
else:
|
||||
isbn = tools.rawInput('ISBN? ')
|
||||
elif filetype == 'article':
|
||||
tools.warning("Could not determine the DOI for "+src +
|
||||
", switching to manual entry.")
|
||||
tools.warning("Could not determine the DOI nor the arXiv id for " +
|
||||
src+", switching to manual entry.")
|
||||
doi_arxiv = ''
|
||||
while doi_arxiv not in ['doi', 'arxiv']:
|
||||
doi_arxiv = tools.rawInput("DOI / arXiv? ").lower()
|
||||
if doi_arxiv == 'doi':
|
||||
doi = tools.rawInput('DOI? ')
|
||||
else:
|
||||
arxiv = tools.rawInput('arXiv id? ')
|
||||
elif filetype == 'book':
|
||||
tools.warning("Could not determine the ISBN for "+src +
|
||||
", switching to manual entry.")
|
||||
isbn = tools.rawInput('ISBN? ')
|
||||
elif doi is not False:
|
||||
print("DOI for "+src+" is "+doi+".")
|
||||
elif arxiv is not False:
|
||||
print("ArXiv id for "+src+" is "+arxiv+".")
|
||||
elif isbn is not False:
|
||||
print("ISBN for "+src+" is "+isbn+".")
|
||||
|
||||
if doi is not False and doi != '':
|
||||
# Add extra \n for bibtexparser
|
||||
bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n"
|
||||
elif arxiv is not False and arxiv != '':
|
||||
bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n"
|
||||
elif isbn is not False and isbn != '':
|
||||
# Idem
|
||||
bibtex = fetcher.isbn2Bib(isbn).strip()+"\n"
|
||||
@ -163,6 +178,14 @@ def resync():
|
||||
"DOI, continue anyway " +
|
||||
"? [y/N]")
|
||||
confirm = (confirm.lower() == 'y')
|
||||
if 'Eprint' in entry.keys():
|
||||
arxiv = fetcher.findArXivId(filename)
|
||||
if arxiv is not False and arxiv != entry['Eprint']:
|
||||
confirm = tools.rawInput("Found arXiv id does " +
|
||||
"not match bibtex " +
|
||||
"entry arxiv id, " +
|
||||
"continue anyway ? [y/N]")
|
||||
confirm = (confirm.lower() == 'y')
|
||||
elif 'isbn' in entry.keys():
|
||||
isbn = fetcher.findISBN(filename)
|
||||
if isbn is not False and isbn != entry['isbn']:
|
||||
|
Loading…
Reference in New Issue
Block a user