From b4f0e7c0eb5d568420938c1033932ccd82365a4b Mon Sep 17 00:00:00 2001
From: Phyks <webmaster@phyks.me>
Date: Sun, 11 May 2014 19:29:42 +0200
Subject: [PATCH] Import / Download / Delete working All bug should be fixed
 for the import / download / delete functions.

* Some problems with utf-8 and homogeneize_latex_encoding in
python-bbtexparser are bypassed and will be cleaned in a better way when
the latest version will be available in pip.
* Tweaked regex for isbn, which was not cas insensitive and forgot about
spaces separated numbers.
* File entry in arXiv bibtex is now deleted to avoid confusion.
---
 README.md  | 11 +++++++++--
 backend.py | 18 ++++--------------
 fetcher.py | 34 +++++++++++++++++++++-------------
 main.py    | 12 ++++++------
 tools.py   | 10 ++++++++++
 5 files changed, 50 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index fb31e11..be2386b 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,13 @@ Should be almost working and usable now, although still to be considered as **ex
 **Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code.
 
 
+* Import
+    * working : all (file / tags / bibtex modification / bibtex retrieval / remove watermark pages)
+* Download
+    * working : all
+* Delete
+    * working : all (by file and by id)
+
 ## Installation
 
 * Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
@@ -123,8 +130,8 @@ Tree à la docear ?
 ## Issues ?
 
 * Multiplication of {{}} => solved in bibtexparser
-* UTF-8 and bibtexparser => solved upstream
-* delete / edit => problem with filename encoding
+* UTF-8 and bibtexparser => solved upstream in bibtexparser
+===> TODO : update bibtexparser when available in pip
 
 ## Thanks
 
diff --git a/backend.py b/backend.py
index 35d2399..8faa6b5 100644
--- a/backend.py
+++ b/backend.py
@@ -54,16 +54,6 @@ def getNewName(src, bibtex, tag=''):
     return new_name
 
 
-def parsed2Bibtex(parsed):
-    """Convert a single bibtex entry dict to bibtex string"""
-    bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
-
-    for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
-        bibtex += "\t"+field+"={"+parsed[field]+"},\n"
-    bibtex += "}\n\n"
-    return bibtex
-
-
 def bibtexAppend(data):
     """Append data to the main bibtex file
 
@@ -71,7 +61,7 @@ def bibtexAppend(data):
     """
     try:
         with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh:
-            fh.write(parsed2Bibtex(data)+"\n")
+            fh.write(tools.parsed2Bibtex(data)+"\n")
     except:
         tools.warning("Unable to open index file.")
         return False
@@ -100,7 +90,7 @@ def bibtexRewrite(data):
     """
     bibtex = ''
     for entry in data.keys():
-        bibtex += parsed2Bibtex(data[entry])+"\n"
+        bibtex += tools.parsed2Bibtex(data[entry])+"\n"
     try:
         with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh:
             fh.write(bibtex)
@@ -113,7 +103,7 @@ def deleteId(ident):
     """Delete a file based on its id in the bibtex file"""
     try:
         with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
-            bibtex = BibTexParser(fh.read())
+            bibtex = BibTexParser(fh.read().decode('utf-8'))
         bibtex = bibtex.get_entry_dict()
     except:
         tools.warning("Unable to open index file.")
@@ -148,7 +138,7 @@ def deleteFile(filename):
     """Delete a file based on its filename"""
     try:
         with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
-            bibtex = BibTexParser(fh.read())
+            bibtex = BibTexParser(fh.read().decode('utf-8'))
         bibtex = bibtex.get_entry_dict()
     except:
         tools.warning("Unable to open index file.")
diff --git a/fetcher.py b/fetcher.py
index fee038f..28fe539 100644
--- a/fetcher.py
+++ b/fetcher.py
@@ -8,6 +8,8 @@ import subprocess
 import arxiv2bib as arxiv_metadata
 import tools
 import params
+from bibtexparser.bparser import BibTexParser
+from isbntools.dev.fmt import fmtbib
 
 
 def download(url):
@@ -41,7 +43,8 @@ def download(url):
     return False
 
 
-isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[-][0-9])")
+isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
+                    re.IGNORECASE)
 
 
 def findISBN(src):
@@ -84,17 +87,13 @@ def findISBN(src):
 
 def isbn2Bib(isbn):
     """Tries to get bibtex entry from an ISBN number"""
-    try:
-        # Default merges results from worldcat.org and google books
-        return isbntools.dev.fmt.fmtbib('bibtex',
-                                        isbntools.meta(isbn, 'default'))
-    except:
-        return ''
+    # Default merges results from worldcat.org and google books
+    return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
 
 
-doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]')
-doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+')
-doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}')
+doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
+doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+', re.IGNORECASE)
+doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}', re.IGNORECASE)
 clean_doi_re = re.compile('^/')
 clean_doi_fabse_re = re.compile('^10.1096')
 clean_doi_jcb_re = re.compile('^10.1083')
@@ -183,7 +182,7 @@ def doi2Bib(doi):
         return ''
 
 
-arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
+arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)', re.IGNORECASE)
 
 
 def findArXivId(src):
@@ -215,8 +214,10 @@ def findArXivId(src):
         # Error happened
         tools.warning(err)
         return False
-    else:
+    elif extractID is not None:
         return extractID.group(1)
+    else:
+        return False
 
 
 def arXiv2Bib(arxiv):
@@ -229,7 +230,14 @@ def arXiv2Bib(arxiv):
         if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
             continue
         else:
-            return bib.bibtex()
+            fetched_bibtex = BibTexParser(bib.bibtex())
+            fetched_bibtex = fetched_bibtex.get_entry_dict()
+            fetched_bibtex = fetched_bibtex[fetched_bibtex.keys()[0]]
+            try:
+                del(fetched_bibtex['file'])
+            except:
+                pass
+            return tools.parsed2Bibtex(fetched_bibtex)
     return False
 
 
diff --git a/main.py b/main.py
index 56367a1..9d21053 100755
--- a/main.py
+++ b/main.py
@@ -27,7 +27,7 @@ def checkBibtex(filename, bibtex):
     if len(bibtex) > 0:
         bibtex_name = bibtex.keys()[0]
         bibtex = bibtex[bibtex_name]
-        bibtex_string = backend.parsed2Bibtex(bibtex)
+        bibtex_string = tools.parsed2Bibtex(bibtex)
     else:
         bibtex_string = ''
     print(bibtex_string)
@@ -54,7 +54,7 @@ def checkBibtex(filename, bibtex):
             if len(bibtex) > 0:
                 bibtex_name = bibtex.keys()[0]
                 bibtex = bibtex[bibtex_name]
-                bibtex_string = backend.parsed2Bibtex(bibtex)
+                bibtex_string = tools.parsed2Bibtex(bibtex)
             else:
                 bibtex_string = ''
             print("\nThe bibtex entry for "+filename+" is:")
@@ -80,17 +80,17 @@ def addFile(src, filetype, manual):
     if not manual:
         if filetype == 'article' or filetype is None:
             doi = fetcher.findDOI(src)
-        if (filetype == 'article' or filetype is None) and doi is False:
+        if doi is False and (filetype == 'article' or filetype is None):
             arxiv = fetcher.findArXivId(src)
 
-        if filetype == 'book' or (filetype is None and doi is False and
-                                  arxiv is False):
+        if filetype == 'book' or (doi is False and arxiv is False and
+                                  filetype is None):
             isbn = fetcher.findISBN(src)
 
     if doi is False and isbn is False and arxiv is False:
         if filetype is None:
             tools.warning("Could not determine the DOI nor the arXiv id nor " +
-                          "the ISBN for "+src+"."+"Switching to manual entry.")
+                          "the ISBN for "+src+". Switching to manual entry.")
             doi_arxiv_isbn = ''
             while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']:
                 doi_arxiv_isbn = tools.rawInput("DOI / arXiv " +
diff --git a/tools.py b/tools.py
index 7141011..59cd931 100644
--- a/tools.py
+++ b/tools.py
@@ -24,6 +24,16 @@ def slugify(value):
     return _slugify_hyphenate_re.sub('_', value)
 
 
+def parsed2Bibtex(parsed):
+    """Convert a single bibtex entry dict to bibtex string"""
+    bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
+
+    for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
+        bibtex += "\t"+field+"={"+parsed[field]+"},\n"
+    bibtex += "}\n\n"
+    return bibtex
+
+
 def getExtension(filename):
     """Get the extension of filename"""
     return filename[filename.rfind('.'):]