Functions to handle arXiv metadata
This commit is contained in:
parent
980e678883
commit
289c7dece4
@ -107,7 +107,7 @@ Here are some sources of inspirations for this project :
|
|||||||
|
|
||||||
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
|
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
|
||||||
|
|
||||||
20. No DOI for arXiv / HAL
|
20. No DOI for HAL
|
||||||
30. Parameter to disable remote search
|
30. Parameter to disable remote search
|
||||||
40. Open file
|
40. Open file
|
||||||
45. Doc / Man
|
45. Doc / Man
|
||||||
@ -119,3 +119,9 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
|
|||||||
## Issues ?
|
## Issues ?
|
||||||
|
|
||||||
* Remove the watermarks on pdf files => done, some warning in okular on generated pdf, but seems ok. Seems to be a bug in Okular.
|
* Remove the watermarks on pdf files => done, some warning in okular on generated pdf, but seems ok. Seems to be a bug in Okular.
|
||||||
|
|
||||||
|
|
||||||
|
## Thanks
|
||||||
|
|
||||||
|
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module
|
||||||
|
* François Boulogne for his [python-bibtexparser](https://github.com/sciunto/python-bibtexparser) python module and his integration of new requested features
|
||||||
|
22
fetcher.py
22
fetcher.py
@ -5,6 +5,7 @@ import isbntools
|
|||||||
import re
|
import re
|
||||||
import requesocks as requests # Requesocks is requests with SOCKS support
|
import requesocks as requests # Requesocks is requests with SOCKS support
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import arxiv2bib as arxiv_metadata
|
||||||
import tools
|
import tools
|
||||||
import params
|
import params
|
||||||
|
|
||||||
@ -178,7 +179,6 @@ def doi2Bib(doi):
|
|||||||
|
|
||||||
|
|
||||||
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
|
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
|
||||||
arXiv_wo_v_re = re.compile(r'v\d+\Z')
|
|
||||||
|
|
||||||
|
|
||||||
def findArXivId(src):
|
def findArXivId(src):
|
||||||
@ -208,21 +208,19 @@ def findArXivId(src):
|
|||||||
# Error happened
|
# Error happened
|
||||||
tools.warning(err)
|
tools.warning(err)
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
cleanID = False
|
return extractID
|
||||||
if extractID:
|
|
||||||
cleanID = arXiv_wo_v_re.sub('', extractID.group(1))
|
|
||||||
return cleanID
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def arXiv2Bib(arxiv):
|
def arXiv2Bib(arxiv):
|
||||||
"""Returns bibTeX string of metadata for a given arXiv id
|
"""Returns bibTeX string of metadata for a given arXiv id
|
||||||
|
|
||||||
arxiv is an arxiv id
|
arxiv is an arxiv id
|
||||||
From : https://github.com/minad/bibsync/blob/master/lib/bibsync/actions/synchronize_metadata.rb
|
|
||||||
"""
|
"""
|
||||||
arxiv = "oai:arXiv.org:"+arxiv
|
bibtex = arxiv_metadata.arxiv2bib([arxiv])
|
||||||
bibtex = ''
|
for bib in bibtex:
|
||||||
|
if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
|
||||||
return bibtex
|
continue
|
||||||
|
else:
|
||||||
|
return bib.bibtex()
|
||||||
|
return False
|
||||||
|
79
main.py
79
main.py
@ -18,7 +18,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
|
|||||||
|
|
||||||
|
|
||||||
def checkBibtex(filename, bibtex):
|
def checkBibtex(filename, bibtex):
|
||||||
print("The bibtex entry found for "+filename+" is :")
|
print("The bibtex entry found for "+filename+" is:")
|
||||||
|
|
||||||
bibtex = BibTexParser(bibtex, customization=homogeneize_latex_encoding)
|
bibtex = BibTexParser(bibtex, customization=homogeneize_latex_encoding)
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
@ -29,7 +29,7 @@ def checkBibtex(filename, bibtex):
|
|||||||
else:
|
else:
|
||||||
bibtex_string = ''
|
bibtex_string = ''
|
||||||
print(bibtex_string)
|
print(bibtex_string)
|
||||||
check = tools.rawInput("Is it correct ? [Y/n] ")
|
check = tools.rawInput("Is it correct? [Y/n] ")
|
||||||
|
|
||||||
while check.lower() == 'n':
|
while check.lower() == 'n':
|
||||||
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
|
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
|
||||||
@ -46,9 +46,9 @@ def checkBibtex(filename, bibtex):
|
|||||||
bibtex_string = backend.parsed2Bibtex(bibtex)
|
bibtex_string = backend.parsed2Bibtex(bibtex)
|
||||||
else:
|
else:
|
||||||
bibtex_string = ''
|
bibtex_string = ''
|
||||||
print("\nThe bibtex entry for "+filename+" is :")
|
print("\nThe bibtex entry for "+filename+" is:")
|
||||||
print(bibtex_string)
|
print(bibtex_string)
|
||||||
check = tools.rawInput("Is it correct ? [Y/n] ")
|
check = tools.rawInput("Is it correct? [Y/n] ")
|
||||||
return bibtex
|
return bibtex
|
||||||
|
|
||||||
|
|
||||||
@ -58,37 +58,52 @@ def addFile(src, filetype):
|
|||||||
"""
|
"""
|
||||||
if filetype == 'article' or filetype is None:
|
if filetype == 'article' or filetype is None:
|
||||||
doi = fetcher.findDOI(src)
|
doi = fetcher.findDOI(src)
|
||||||
|
if (filetype == 'article' or filetype is None) and doi is False:
|
||||||
|
arxiv = fetcher.findArXivId(src)
|
||||||
|
|
||||||
if filetype == 'book' or (filetype is None and doi is False):
|
if filetype == 'book' or (filetype is None and doi is False and arxiv is
|
||||||
|
False):
|
||||||
isbn = fetcher.findISBN(src)
|
isbn = fetcher.findISBN(src)
|
||||||
|
|
||||||
if doi is False and isbn is False:
|
if doi is False and isbn is False and arxiv is False:
|
||||||
if filetype is None:
|
if filetype is None:
|
||||||
tools.warning("Could not determine the DOI or the ISBN for " +
|
tools.warning("Could not determine the DOI nor the arXiv id nor " +
|
||||||
src+"."+"Switching to manual entry.")
|
"the ISBN for "+src+"."+"Switching to manual entry.")
|
||||||
doi_isbn = ''
|
doi_arxiv_isbn = ''
|
||||||
while doi_isbn not in ['doi', 'isbn']:
|
while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn']:
|
||||||
doi_isbn = tools.rawInput("DOI / ISBN ? ").lower()
|
doi_arxiv_isbn = tools.rawInput("DOI / arXiv / ISBN? ").lower()
|
||||||
if doi_isbn == 'doi':
|
if doi_arxiv_isbn == 'doi':
|
||||||
doi = tools.rawInput('DOI ? ')
|
doi = tools.rawInput('DOI? ')
|
||||||
|
elif doi_arxiv_isbn == 'arxiv':
|
||||||
|
arxiv = tools.rawInput('arXiv id? ')
|
||||||
else:
|
else:
|
||||||
isbn = tools.rawInput('ISBN ? ')
|
isbn = tools.rawInput('ISBN? ')
|
||||||
elif filetype == 'article':
|
elif filetype == 'article':
|
||||||
tools.warning("Could not determine the DOI for "+src +
|
tools.warning("Could not determine the DOI nor the arXiv id for " +
|
||||||
", switching to manual entry.")
|
src+", switching to manual entry.")
|
||||||
doi = tools.rawInput('DOI ? ')
|
doi_arxiv = ''
|
||||||
|
while doi_arxiv not in ['doi', 'arxiv']:
|
||||||
|
doi_arxiv = tools.rawInput("DOI / arXiv? ").lower()
|
||||||
|
if doi_arxiv == 'doi':
|
||||||
|
doi = tools.rawInput('DOI? ')
|
||||||
|
else:
|
||||||
|
arxiv = tools.rawInput('arXiv id? ')
|
||||||
elif filetype == 'book':
|
elif filetype == 'book':
|
||||||
tools.warning("Could not determine the ISBN for "+src +
|
tools.warning("Could not determine the ISBN for "+src +
|
||||||
", switching to manual entry.")
|
", switching to manual entry.")
|
||||||
isbn = tools.rawInput('ISBN ? ')
|
isbn = tools.rawInput('ISBN? ')
|
||||||
elif doi is not False:
|
elif doi is not False:
|
||||||
print("DOI for "+src+" is "+doi+".")
|
print("DOI for "+src+" is "+doi+".")
|
||||||
|
elif arxiv is not False:
|
||||||
|
print("ArXiv id for "+src+" is "+arxiv+".")
|
||||||
elif isbn is not False:
|
elif isbn is not False:
|
||||||
print("ISBN for "+src+" is "+isbn+".")
|
print("ISBN for "+src+" is "+isbn+".")
|
||||||
|
|
||||||
if doi is not False and doi != '':
|
if doi is not False and doi != '':
|
||||||
# Add extra \n for bibtexparser
|
# Add extra \n for bibtexparser
|
||||||
bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n"
|
bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n"
|
||||||
|
elif arxiv is not False and arxiv != '':
|
||||||
|
bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n"
|
||||||
elif isbn is not False and isbn != '':
|
elif isbn is not False and isbn != '':
|
||||||
# Idem
|
# Idem
|
||||||
bibtex = fetcher.isbn2Bib(isbn).strip()+"\n"
|
bibtex = fetcher.isbn2Bib(isbn).strip()+"\n"
|
||||||
@ -103,7 +118,7 @@ def addFile(src, filetype):
|
|||||||
tools.warning("file "+new_name+" already exists.")
|
tools.warning("file "+new_name+" already exists.")
|
||||||
default_rename = new_name.replace(tools.getExtension(new_name),
|
default_rename = new_name.replace(tools.getExtension(new_name),
|
||||||
" (2)"+tools.getExtension(new_name))
|
" (2)"+tools.getExtension(new_name))
|
||||||
rename = tools.rawInput("New name ["+default_rename+"] ? ")
|
rename = tools.rawInput("New name ["+default_rename+"]? ")
|
||||||
if rename == '':
|
if rename == '':
|
||||||
new_name = default_rename
|
new_name = default_rename
|
||||||
else:
|
else:
|
||||||
@ -150,7 +165,7 @@ def resync():
|
|||||||
while not confirm:
|
while not confirm:
|
||||||
filename = tools.rawInput("File to import for this entry " +
|
filename = tools.rawInput("File to import for this entry " +
|
||||||
"(leave empty to delete the " +
|
"(leave empty to delete the " +
|
||||||
"entry) ? ")
|
"entry)? ")
|
||||||
if filename == '':
|
if filename == '':
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
@ -163,6 +178,14 @@ def resync():
|
|||||||
"DOI, continue anyway " +
|
"DOI, continue anyway " +
|
||||||
"? [y/N]")
|
"? [y/N]")
|
||||||
confirm = (confirm.lower() == 'y')
|
confirm = (confirm.lower() == 'y')
|
||||||
|
if 'Eprint' in entry.keys():
|
||||||
|
arxiv = fetcher.findArXivId(filename)
|
||||||
|
if arxiv is not False and arxiv != entry['Eprint']:
|
||||||
|
confirm = tools.rawInput("Found arXiv id does " +
|
||||||
|
"not match bibtex " +
|
||||||
|
"entry arxiv id, " +
|
||||||
|
"continue anyway ? [y/N]")
|
||||||
|
confirm = (confirm.lower() == 'y')
|
||||||
elif 'isbn' in entry.keys():
|
elif 'isbn' in entry.keys():
|
||||||
isbn = fetcher.findISBN(filename)
|
isbn = fetcher.findISBN(filename)
|
||||||
if isbn is not False and isbn != entry['isbn']:
|
if isbn is not False and isbn != entry['isbn']:
|
||||||
@ -187,7 +210,7 @@ def resync():
|
|||||||
print("Found file without any associated entry in index.")
|
print("Found file without any associated entry in index.")
|
||||||
action = ''
|
action = ''
|
||||||
while action.lower() not in ['import', 'delete']:
|
while action.lower() not in ['import', 'delete']:
|
||||||
action = tools.rawInput("What to do ? [import / delete] ")
|
action = tools.rawInput("What to do? [import / delete] ")
|
||||||
action = action.lower()
|
action = action.lower()
|
||||||
if action == 'import':
|
if action == 'import':
|
||||||
tmp = tempfile.NamedTemporaryFile()
|
tmp = tempfile.NamedTemporaryFile()
|
||||||
@ -209,11 +232,11 @@ def resync():
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
sys.exit("Usage : TODO")
|
sys.exit("Usage: TODO")
|
||||||
|
|
||||||
if sys.argv[1] == 'download':
|
if sys.argv[1] == 'download':
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv) < 3:
|
||||||
sys.exit("Usage : " + sys.argv[0] +
|
sys.exit("Usage: " + sys.argv[0] +
|
||||||
" download FILE [article|book]")
|
" download FILE [article|book]")
|
||||||
|
|
||||||
filetype = None
|
filetype = None
|
||||||
@ -227,7 +250,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
if sys.argv[1] == 'import':
|
if sys.argv[1] == 'import':
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv) < 3:
|
||||||
sys.exit("Usage : " + sys.argv[0] +
|
sys.exit("Usage: " + sys.argv[0] +
|
||||||
" import FILE [article|book]")
|
" import FILE [article|book]")
|
||||||
|
|
||||||
filetype = None
|
filetype = None
|
||||||
@ -241,10 +264,10 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
elif sys.argv[1] == 'delete':
|
elif sys.argv[1] == 'delete':
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv) < 3:
|
||||||
sys.exit("Usage : " + sys.argv[0] + " delete FILE|ID")
|
sys.exit("Usage: " + sys.argv[0] + " delete FILE|ID")
|
||||||
|
|
||||||
confirm = tools.rawInput("Are you sure you want to delete " +
|
confirm = tools.rawInput("Are you sure you want to delete " +
|
||||||
sys.argv[2]+" ? [y/N] ")
|
sys.argv[2]+"? [y/N] ")
|
||||||
|
|
||||||
if confirm.lower() == 'y':
|
if confirm.lower() == 'y':
|
||||||
if not backend.deleteId(sys.argv[2]):
|
if not backend.deleteId(sys.argv[2]):
|
||||||
@ -263,8 +286,8 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
elif sys.argv[1] == 'resync':
|
elif sys.argv[1] == 'resync':
|
||||||
if len(sys.argv) > 2 and sys.argv[2] == 'help':
|
if len(sys.argv) > 2 and sys.argv[2] == 'help':
|
||||||
sys.exit("Usage : " + sys.argv[0] + " resync")
|
sys.exit("Usage: " + sys.argv[0] + " resync")
|
||||||
confirm = tools.rawInput("Resync files and bibtex index ? [y/N] ")
|
confirm = tools.rawInput("Resync files and bibtex index? [y/N] ")
|
||||||
if confirm.lower() == 'y':
|
if confirm.lower() == 'y':
|
||||||
resync()
|
resync()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user