From 289c7dece4aca5c59a76793d6e16087099f2b680 Mon Sep 17 00:00:00 2001 From: Phyks Date: Fri, 2 May 2014 00:07:49 +0200 Subject: [PATCH] Functions to handle arXiv metadata --- README.md | 8 +++++- fetcher.py | 22 +++++++-------- main.py | 79 +++++++++++++++++++++++++++++++++++------------------- 3 files changed, 68 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 0f194e9..dd50cb7 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ Here are some sources of inspirations for this project : A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns. -20. No DOI for arXiv / HAL +20. No DOI for HAL 30. Parameter to disable remote search 40. Open file 45. Doc / Man @@ -119,3 +119,9 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really ## Issues ? * Remove the watermarks on pdf files => done, some warning in okular on generated pdf, but seems ok. Seems to be a bug in Okular. + + +## Thanks + +* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module +* François Boulogne for his [python-bibtexparser](https://github.com/sciunto/python-bibtexparser) python module and his integration of new requested features diff --git a/fetcher.py b/fetcher.py index 649512e..df67f14 100644 --- a/fetcher.py +++ b/fetcher.py @@ -5,6 +5,7 @@ import isbntools import re import requesocks as requests # Requesocks is requests with SOCKS support import subprocess +import arxiv2bib as arxiv_metadata import tools import params @@ -178,7 +179,6 @@ def doi2Bib(doi): arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)') -arXiv_wo_v_re = re.compile(r'v\d+\Z') def findArXivId(src): @@ -208,21 +208,19 @@ def findArXivId(src): # Error happened tools.warning(err) return False - - cleanID = False - if extractID: - cleanID = arXiv_wo_v_re.sub('', extractID.group(1)) - return cleanID - + else: + return extractID def arXiv2Bib(arxiv): """Returns bibTeX string of metadata for a given arXiv id arxiv is an arxiv id - From : https://github.com/minad/bibsync/blob/master/lib/bibsync/actions/synchronize_metadata.rb """ - arxiv = "oai:arXiv.org:"+arxiv - bibtex = '' - - return bibtex + bibtex = arxiv_metadata.arxiv2bib([arxiv]) + for bib in bibtex: + if isinstance(bib, arxiv_metadata.ReferenceErrorInfo): + continue + else: + return bib.bibtex() + return False diff --git a/main.py b/main.py index d4f1f90..89cdd53 100755 --- a/main.py +++ b/main.py @@ -18,7 +18,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim' def checkBibtex(filename, bibtex): - print("The bibtex entry found for "+filename+" is :") + print("The bibtex entry found for "+filename+" is:") bibtex = BibTexParser(bibtex, customization=homogeneize_latex_encoding) bibtex = bibtex.get_entry_dict() @@ -29,7 +29,7 @@ def checkBibtex(filename, bibtex): else: bibtex_string = '' print(bibtex_string) - check = tools.rawInput("Is it correct ? [Y/n] ") + check = tools.rawInput("Is it correct? [Y/n] ") while check.lower() == 'n': with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile: @@ -46,9 +46,9 @@ def checkBibtex(filename, bibtex): bibtex_string = backend.parsed2Bibtex(bibtex) else: bibtex_string = '' - print("\nThe bibtex entry for "+filename+" is :") + print("\nThe bibtex entry for "+filename+" is:") print(bibtex_string) - check = tools.rawInput("Is it correct ? [Y/n] ") + check = tools.rawInput("Is it correct? [Y/n] ") return bibtex @@ -58,37 +58,52 @@ def addFile(src, filetype): """ if filetype == 'article' or filetype is None: doi = fetcher.findDOI(src) + if (filetype == 'article' or filetype is None) and doi is False: + arxiv = fetcher.findArXivId(src) - if filetype == 'book' or (filetype is None and doi is False): + if filetype == 'book' or (filetype is None and doi is False and arxiv is + False): isbn = fetcher.findISBN(src) - if doi is False and isbn is False: + if doi is False and isbn is False and arxiv is False: if filetype is None: - tools.warning("Could not determine the DOI or the ISBN for " + - src+"."+"Switching to manual entry.") - doi_isbn = '' - while doi_isbn not in ['doi', 'isbn']: - doi_isbn = tools.rawInput("DOI / ISBN ? ").lower() - if doi_isbn == 'doi': - doi = tools.rawInput('DOI ? ') + tools.warning("Could not determine the DOI nor the arXiv id nor " + + "the ISBN for "+src+"."+"Switching to manual entry.") + doi_arxiv_isbn = '' + while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn']: + doi_arxiv_isbn = tools.rawInput("DOI / arXiv / ISBN? ").lower() + if doi_arxiv_isbn == 'doi': + doi = tools.rawInput('DOI? ') + elif doi_arxiv_isbn == 'arxiv': + arxiv = tools.rawInput('arXiv id? ') else: - isbn = tools.rawInput('ISBN ? ') + isbn = tools.rawInput('ISBN? ') elif filetype == 'article': - tools.warning("Could not determine the DOI for "+src + - ", switching to manual entry.") - doi = tools.rawInput('DOI ? ') + tools.warning("Could not determine the DOI nor the arXiv id for " + + src+", switching to manual entry.") + doi_arxiv = '' + while doi_arxiv not in ['doi', 'arxiv']: + doi_arxiv = tools.rawInput("DOI / arXiv? ").lower() + if doi_arxiv == 'doi': + doi = tools.rawInput('DOI? ') + else: + arxiv = tools.rawInput('arXiv id? ') elif filetype == 'book': tools.warning("Could not determine the ISBN for "+src + ", switching to manual entry.") - isbn = tools.rawInput('ISBN ? ') + isbn = tools.rawInput('ISBN? ') elif doi is not False: print("DOI for "+src+" is "+doi+".") + elif arxiv is not False: + print("ArXiv id for "+src+" is "+arxiv+".") elif isbn is not False: print("ISBN for "+src+" is "+isbn+".") if doi is not False and doi != '': # Add extra \n for bibtexparser bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n" + elif arxiv is not False and arxiv != '': + bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n" elif isbn is not False and isbn != '': # Idem bibtex = fetcher.isbn2Bib(isbn).strip()+"\n" @@ -103,7 +118,7 @@ def addFile(src, filetype): tools.warning("file "+new_name+" already exists.") default_rename = new_name.replace(tools.getExtension(new_name), " (2)"+tools.getExtension(new_name)) - rename = tools.rawInput("New name ["+default_rename+"] ? ") + rename = tools.rawInput("New name ["+default_rename+"]? ") if rename == '': new_name = default_rename else: @@ -150,7 +165,7 @@ def resync(): while not confirm: filename = tools.rawInput("File to import for this entry " + "(leave empty to delete the " + - "entry) ? ") + "entry)? ") if filename == '': break else: @@ -163,6 +178,14 @@ def resync(): "DOI, continue anyway " + "? [y/N]") confirm = (confirm.lower() == 'y') + if 'Eprint' in entry.keys(): + arxiv = fetcher.findArXivId(filename) + if arxiv is not False and arxiv != entry['Eprint']: + confirm = tools.rawInput("Found arXiv id does " + + "not match bibtex " + + "entry arxiv id, " + + "continue anyway ? [y/N]") + confirm = (confirm.lower() == 'y') elif 'isbn' in entry.keys(): isbn = fetcher.findISBN(filename) if isbn is not False and isbn != entry['isbn']: @@ -187,7 +210,7 @@ def resync(): print("Found file without any associated entry in index.") action = '' while action.lower() not in ['import', 'delete']: - action = tools.rawInput("What to do ? [import / delete] ") + action = tools.rawInput("What to do? [import / delete] ") action = action.lower() if action == 'import': tmp = tempfile.NamedTemporaryFile() @@ -209,11 +232,11 @@ def resync(): if __name__ == '__main__': try: if len(sys.argv) < 2: - sys.exit("Usage : TODO") + sys.exit("Usage: TODO") if sys.argv[1] == 'download': if len(sys.argv) < 3: - sys.exit("Usage : " + sys.argv[0] + + sys.exit("Usage: " + sys.argv[0] + " download FILE [article|book]") filetype = None @@ -227,7 +250,7 @@ if __name__ == '__main__': if sys.argv[1] == 'import': if len(sys.argv) < 3: - sys.exit("Usage : " + sys.argv[0] + + sys.exit("Usage: " + sys.argv[0] + " import FILE [article|book]") filetype = None @@ -241,10 +264,10 @@ if __name__ == '__main__': elif sys.argv[1] == 'delete': if len(sys.argv) < 3: - sys.exit("Usage : " + sys.argv[0] + " delete FILE|ID") + sys.exit("Usage: " + sys.argv[0] + " delete FILE|ID") confirm = tools.rawInput("Are you sure you want to delete " + - sys.argv[2]+" ? [y/N] ") + sys.argv[2]+"? [y/N] ") if confirm.lower() == 'y': if not backend.deleteId(sys.argv[2]): @@ -263,8 +286,8 @@ if __name__ == '__main__': elif sys.argv[1] == 'resync': if len(sys.argv) > 2 and sys.argv[2] == 'help': - sys.exit("Usage : " + sys.argv[0] + " resync") - confirm = tools.rawInput("Resync files and bibtex index ? [y/N] ") + sys.exit("Usage: " + sys.argv[0] + " resync") + confirm = tools.rawInput("Resync files and bibtex index? [y/N] ") if confirm.lower() == 'y': resync()