Import / Download / Delete working

All bug should be fixed for the import / download / delete functions. * Some problems with utf-8 and homogeneize_latex_encoding in python-bbtexparser are bypassed and will be cleaned in a better way when the latest version will be available in pip. * Tweaked regex for isbn, which was not cas insensitive and forgot about spaces separated numbers. * File entry in arXiv bibtex is now deleted to avoid confusion.
2014-05-11 19:29:42 +02:00 · 2014-05-11 19:29:42 +02:00 · b4f0e7c0eb
commit b4f0e7c0eb
parent 204357222c
5 changed files with 50 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -31,6 +31,13 @@ Should be almost working and usable now, although still to be considered as **ex
 **Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code.
 * Import
    * working : all (file / tags / bibtex modification / bibtex retrieval / remove watermark pages)
 * Download
    * working : all
 * Delete
    * working : all (by file and by id)
 ## Installation
 * Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
@ -123,8 +130,8 @@ Tree à la docear ?
 ## Issues ?
 * Multiplication of {{}} => solved in bibtexparser
-* UTF-8 and bibtexparser => solved upstream
+* UTF-8 and bibtexparser => solved upstream in bibtexparser
-* delete / edit => problem with filename encoding
+===> TODO : update bibtexparser when available in pip
 ## Thanks
--- a/backend.py
+++ b/backend.py
@ -54,16 +54,6 @@ def getNewName(src, bibtex, tag=''):
    return new_name
 def parsed2Bibtex(parsed):
    """Convert a single bibtex entry dict to bibtex string"""
    bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
    for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
        bibtex += "\t"+field+"={"+parsed[field]+"},\n"
    bibtex += "}\n\n"
    return bibtex
 def bibtexAppend(data):
    """Append data to the main bibtex file
@ -71,7 +61,7 @@ def bibtexAppend(data):
    """
    try:
        with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh:
-            fh.write(parsed2Bibtex(data)+"\n")
+            fh.write(tools.parsed2Bibtex(data)+"\n")
    except:
        tools.warning("Unable to open index file.")
        return False
@ -100,7 +90,7 @@ def bibtexRewrite(data):
    """
    bibtex = ''
    for entry in data.keys():
-        bibtex += parsed2Bibtex(data[entry])+"\n"
+        bibtex += tools.parsed2Bibtex(data[entry])+"\n"
    try:
        with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh:
            fh.write(bibtex)
@ -113,7 +103,7 @@ def deleteId(ident):
    """Delete a file based on its id in the bibtex file"""
    try:
        with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
-            bibtex = BibTexParser(fh.read())
+            bibtex = BibTexParser(fh.read().decode('utf-8'))
        bibtex = bibtex.get_entry_dict()
    except:
        tools.warning("Unable to open index file.")
@ -148,7 +138,7 @@ def deleteFile(filename):
    """Delete a file based on its filename"""
    try:
        with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
-            bibtex = BibTexParser(fh.read())
+            bibtex = BibTexParser(fh.read().decode('utf-8'))
        bibtex = bibtex.get_entry_dict()
    except:
        tools.warning("Unable to open index file.")
--- a/fetcher.py
+++ b/fetcher.py
@ -8,6 +8,8 @@ import subprocess
 import arxiv2bib as arxiv_metadata
 import tools
 import params
 from bibtexparser.bparser import BibTexParser
 from isbntools.dev.fmt import fmtbib
 def download(url):
@ -41,7 +43,8 @@ def download(url):
    return False
-isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[-][0-9])")
+isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
                    re.IGNORECASE)
 def findISBN(src):
@ -84,17 +87,13 @@ def findISBN(src):
 def isbn2Bib(isbn):
    """Tries to get bibtex entry from an ISBN number"""
    try:
    # Default merges results from worldcat.org and google books
-        return isbntools.dev.fmt.fmtbib('bibtex',
+    return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
                                        isbntools.meta(isbn, 'default'))
    except:
        return ''
-doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]')
+doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
-doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+')
+doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+', re.IGNORECASE)
-doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}')
+doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}', re.IGNORECASE)
 clean_doi_re = re.compile('^/')
 clean_doi_fabse_re = re.compile('^10.1096')
 clean_doi_jcb_re = re.compile('^10.1083')
@ -183,7 +182,7 @@ def doi2Bib(doi):
        return ''
-arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
+arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)', re.IGNORECASE)
 def findArXivId(src):
@ -215,8 +214,10 @@ def findArXivId(src):
        # Error happened
        tools.warning(err)
        return False
-    else:
+    elif extractID is not None:
        return extractID.group(1)
    else:
        return False
 def arXiv2Bib(arxiv):
@ -229,7 +230,14 @@ def arXiv2Bib(arxiv):
        if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
            continue
        else:
-            return bib.bibtex()
+            fetched_bibtex = BibTexParser(bib.bibtex())
            fetched_bibtex = fetched_bibtex.get_entry_dict()
            fetched_bibtex = fetched_bibtex[fetched_bibtex.keys()[0]]
            try:
                del(fetched_bibtex['file'])
            except:
                pass
            return tools.parsed2Bibtex(fetched_bibtex)
    return False
--- a/main.py
+++ b/main.py
@ -27,7 +27,7 @@ def checkBibtex(filename, bibtex):
    if len(bibtex) > 0:
        bibtex_name = bibtex.keys()[0]
        bibtex = bibtex[bibtex_name]
-        bibtex_string = backend.parsed2Bibtex(bibtex)
+        bibtex_string = tools.parsed2Bibtex(bibtex)
    else:
        bibtex_string = ''
    print(bibtex_string)
@ -54,7 +54,7 @@ def checkBibtex(filename, bibtex):
            if len(bibtex) > 0:
                bibtex_name = bibtex.keys()[0]
                bibtex = bibtex[bibtex_name]
-                bibtex_string = backend.parsed2Bibtex(bibtex)
+                bibtex_string = tools.parsed2Bibtex(bibtex)
            else:
                bibtex_string = ''
            print("\nThe bibtex entry for "+filename+" is:")
@ -80,17 +80,17 @@ def addFile(src, filetype, manual):
    if not manual:
        if filetype == 'article' or filetype is None:
            doi = fetcher.findDOI(src)
-        if (filetype == 'article' or filetype is None) and doi is False:
+        if doi is False and (filetype == 'article' or filetype is None):
            arxiv = fetcher.findArXivId(src)
-        if filetype == 'book' or (filetype is None and doi is False and
+        if filetype == 'book' or (doi is False and arxiv is False and
-                                  arxiv is False):
+                                  filetype is None):
            isbn = fetcher.findISBN(src)
    if doi is False and isbn is False and arxiv is False:
        if filetype is None:
            tools.warning("Could not determine the DOI nor the arXiv id nor " +
-                          "the ISBN for "+src+"."+"Switching to manual entry.")
+                          "the ISBN for "+src+". Switching to manual entry.")
            doi_arxiv_isbn = ''
            while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']:
                doi_arxiv_isbn = tools.rawInput("DOI / arXiv " +
--- a/tools.py
+++ b/tools.py
@ -24,6 +24,16 @@ def slugify(value):
    return _slugify_hyphenate_re.sub('_', value)
 def parsed2Bibtex(parsed):
    """Convert a single bibtex entry dict to bibtex string"""
    bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
    for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
        bibtex += "\t"+field+"={"+parsed[field]+"},\n"
    bibtex += "}\n\n"
    return bibtex
 def getExtension(filename):
    """Get the extension of filename"""
    return filename[filename.rfind('.'):]