Functions to handle arXiv metadata

2014-05-02 00:07:49 +02:00 · 2014-05-02 00:07:49 +02:00 · 289c7dece4
commit 289c7dece4
parent 980e678883
3 changed files with 68 additions and 41 deletions
--- a/README.md
+++ b/README.md
@ -107,7 +107,7 @@ Here are some sources of inspirations for this project :
 A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
-20. No DOI for arXiv / HAL
+20. No DOI for HAL
 30. Parameter to disable remote search
 40. Open file
 45. Doc / Man
@ -119,3 +119,9 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
 ## Issues ?
 * Remove the watermarks on pdf files => done, some warning in okular on generated pdf, but seems ok. Seems to be a bug in Okular.
 ## Thanks
 * Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module
 * François Boulogne for his [python-bibtexparser](https://github.com/sciunto/python-bibtexparser) python module and his integration of new requested features
--- a/fetcher.py
+++ b/fetcher.py
@ -5,6 +5,7 @@ import isbntools
 import re
 import requesocks as requests  # Requesocks is requests with SOCKS support
 import subprocess
 import arxiv2bib as arxiv_metadata
 import tools
 import params
@ -178,7 +179,6 @@ def doi2Bib(doi):
 arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
 arXiv_wo_v_re = re.compile(r'v\d+\Z')
 def findArXivId(src):
@ -208,21 +208,19 @@ def findArXivId(src):
        # Error happened
        tools.warning(err)
        return False
-
+    else:
-    cleanID = False
+        return extractID
    if extractID:
        cleanID = arXiv_wo_v_re.sub('', extractID.group(1))
    return cleanID
 def arXiv2Bib(arxiv):
    """Returns bibTeX string of metadata for a given arXiv id
    arxiv is an arxiv id
    From : https://github.com/minad/bibsync/blob/master/lib/bibsync/actions/synchronize_metadata.rb
    """
-    arxiv = "oai:arXiv.org:"+arxiv
+    bibtex = arxiv_metadata.arxiv2bib([arxiv])
-    bibtex = ''
+    for bib in bibtex:
-
+        if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
-    return bibtex
+            continue
        else:
            return bib.bibtex()
    return False
--- a/main.py
+++ b/main.py
@ -18,7 +18,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
 def checkBibtex(filename, bibtex):
-    print("The bibtex entry found for "+filename+" is :")
+    print("The bibtex entry found for "+filename+" is:")
    bibtex = BibTexParser(bibtex, customization=homogeneize_latex_encoding)
    bibtex = bibtex.get_entry_dict()
@ -29,7 +29,7 @@ def checkBibtex(filename, bibtex):
    else:
        bibtex_string = ''
    print(bibtex_string)
-    check = tools.rawInput("Is it correct ? [Y/n] ")
+    check = tools.rawInput("Is it correct? [Y/n] ")
    while check.lower() == 'n':
        with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
@ -46,9 +46,9 @@ def checkBibtex(filename, bibtex):
            bibtex_string = backend.parsed2Bibtex(bibtex)
        else:
            bibtex_string = ''
-        print("\nThe bibtex entry for "+filename+" is :")
+        print("\nThe bibtex entry for "+filename+" is:")
        print(bibtex_string)
-        check = tools.rawInput("Is it correct ? [Y/n] ")
+        check = tools.rawInput("Is it correct? [Y/n] ")
    return bibtex
@ -58,37 +58,52 @@ def addFile(src, filetype):
    """
    if filetype == 'article' or filetype is None:
        doi = fetcher.findDOI(src)
    if (filetype == 'article' or filetype is None) and doi is False:
        arxiv = fetcher.findArXivId(src)
-    if filetype == 'book' or (filetype is None and doi is False):
+    if filetype == 'book' or (filetype is None and doi is False and arxiv is
                              False):
        isbn = fetcher.findISBN(src)
-    if doi is False and isbn is False:
+    if doi is False and isbn is False and arxiv is False:
        if filetype is None:
-            tools.warning("Could not determine the DOI or the ISBN for " +
+            tools.warning("Could not determine the DOI nor the arXiv id nor " +
-                          src+"."+"Switching to manual entry.")
+                          "the ISBN for "+src+"."+"Switching to manual entry.")
-            doi_isbn = ''
+            doi_arxiv_isbn = ''
-            while doi_isbn not in ['doi', 'isbn']:
+            while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn']:
-                doi_isbn = tools.rawInput("DOI / ISBN ? ").lower()
+                doi_arxiv_isbn = tools.rawInput("DOI / arXiv / ISBN? ").lower()
-            if doi_isbn == 'doi':
+            if doi_arxiv_isbn == 'doi':
-                doi = tools.rawInput('DOI ? ')
+                doi = tools.rawInput('DOI? ')
            elif doi_arxiv_isbn == 'arxiv':
                arxiv = tools.rawInput('arXiv id? ')
            else:
-                isbn = tools.rawInput('ISBN ? ')
+                isbn = tools.rawInput('ISBN? ')
        elif filetype == 'article':
-            tools.warning("Could not determine the DOI for "+src +
+            tools.warning("Could not determine the DOI nor the arXiv id for " +
-                          ", switching to manual entry.")
+                          src+", switching to manual entry.")
-            doi = tools.rawInput('DOI ? ')
+            doi_arxiv = ''
            while doi_arxiv not in ['doi', 'arxiv']:
                doi_arxiv = tools.rawInput("DOI / arXiv? ").lower()
            if doi_arxiv == 'doi':
                doi = tools.rawInput('DOI? ')
            else:
                arxiv = tools.rawInput('arXiv id? ')
        elif filetype == 'book':
            tools.warning("Could not determine the ISBN for "+src +
                          ", switching to manual entry.")
-            isbn = tools.rawInput('ISBN ? ')
+            isbn = tools.rawInput('ISBN? ')
    elif doi is not False:
        print("DOI for "+src+" is "+doi+".")
    elif arxiv is not False:
        print("ArXiv id for "+src+" is "+arxiv+".")
    elif isbn is not False:
        print("ISBN for "+src+" is "+isbn+".")
    if doi is not False and doi != '':
        # Add extra \n for bibtexparser
        bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n"
    elif arxiv is not False and arxiv != '':
        bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n"
    elif isbn is not False and isbn != '':
        # Idem
        bibtex = fetcher.isbn2Bib(isbn).strip()+"\n"
@ -103,7 +118,7 @@ def addFile(src, filetype):
        tools.warning("file "+new_name+" already exists.")
        default_rename = new_name.replace(tools.getExtension(new_name),
                                          " (2)"+tools.getExtension(new_name))
-        rename = tools.rawInput("New name ["+default_rename+"] ? ")
+        rename = tools.rawInput("New name ["+default_rename+"]? ")
        if rename == '':
            new_name = default_rename
        else:
@ -150,7 +165,7 @@ def resync():
            while not confirm:
                filename = tools.rawInput("File to import for this entry " +
                                          "(leave empty to delete the " +
-                                          "entry) ? ")
+                                          "entry)? ")
                if filename == '':
                    break
                else:
@ -163,6 +178,14 @@ def resync():
                                                     "DOI, continue anyway " +
                                                     "? [y/N]")
                            confirm = (confirm.lower() == 'y')
                    if 'Eprint' in entry.keys():
                        arxiv = fetcher.findArXivId(filename)
                        if arxiv is not False and arxiv != entry['Eprint']:
                            confirm = tools.rawInput("Found arXiv id does " +
                                                     "not match bibtex " +
                                                     "entry arxiv id, " +
                                                     "continue anyway ? [y/N]")
                            confirm = (confirm.lower() == 'y')
                    elif 'isbn' in entry.keys():
                        isbn = fetcher.findISBN(filename)
                        if isbn is not False and isbn != entry['isbn']:
@ -187,7 +210,7 @@ def resync():
            print("Found file without any associated entry in index.")
            action = ''
            while action.lower() not in ['import', 'delete']:
-                action = tools.rawInput("What to do ? [import / delete] ")
+                action = tools.rawInput("What to do? [import / delete] ")
                action = action.lower()
            if action == 'import':
                tmp = tempfile.NamedTemporaryFile()
@ -209,11 +232,11 @@ def resync():
 if __name__ == '__main__':
    try:
        if len(sys.argv) < 2:
-            sys.exit("Usage : TODO")
+            sys.exit("Usage: TODO")
        if sys.argv[1] == 'download':
            if len(sys.argv) < 3:
-                sys.exit("Usage : " + sys.argv[0] +
+                sys.exit("Usage: " + sys.argv[0] +
                         " download FILE [article|book]")
            filetype = None
@ -227,7 +250,7 @@ if __name__ == '__main__':
        if sys.argv[1] == 'import':
            if len(sys.argv) < 3:
-                sys.exit("Usage : " + sys.argv[0] +
+                sys.exit("Usage: " + sys.argv[0] +
                         " import FILE [article|book]")
            filetype = None
@ -241,10 +264,10 @@ if __name__ == '__main__':
        elif sys.argv[1] == 'delete':
            if len(sys.argv) < 3:
-                sys.exit("Usage : " + sys.argv[0] + " delete FILE|ID")
+                sys.exit("Usage: " + sys.argv[0] + " delete FILE|ID")
            confirm = tools.rawInput("Are you sure you want to delete " +
-                                     sys.argv[2]+" ? [y/N] ")
+                                     sys.argv[2]+"? [y/N] ")
            if confirm.lower() == 'y':
                if not backend.deleteId(sys.argv[2]):
@ -263,8 +286,8 @@ if __name__ == '__main__':
        elif sys.argv[1] == 'resync':
            if len(sys.argv) > 2 and sys.argv[2] == 'help':
-                sys.exit("Usage : " + sys.argv[0] + " resync")
+                sys.exit("Usage: " + sys.argv[0] + " resync")
-            confirm = tools.rawInput("Resync files and bibtex index ? [y/N] ")
+            confirm = tools.rawInput("Resync files and bibtex index? [y/N] ")
            if confirm.lower() == 'y':
                resync()