From b4f0e7c0eb5d568420938c1033932ccd82365a4b Mon Sep 17 00:00:00 2001 From: Phyks Date: Sun, 11 May 2014 19:29:42 +0200 Subject: [PATCH] Import / Download / Delete working All bug should be fixed for the import / download / delete functions. * Some problems with utf-8 and homogeneize_latex_encoding in python-bbtexparser are bypassed and will be cleaned in a better way when the latest version will be available in pip. * Tweaked regex for isbn, which was not cas insensitive and forgot about spaces separated numbers. * File entry in arXiv bibtex is now deleted to avoid confusion. --- README.md | 11 +++++++++-- backend.py | 18 ++++-------------- fetcher.py | 34 +++++++++++++++++++++------------- main.py | 12 ++++++------ tools.py | 10 ++++++++++ 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index fb31e11..be2386b 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,13 @@ Should be almost working and usable now, although still to be considered as **ex **Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code. +* Import + * working : all (file / tags / bibtex modification / bibtex retrieval / remove watermark pages) +* Download + * working : all +* Delete + * working : all (by file and by id) + ## Installation * Clone this git repository where you want : `git clone https://github.com/Phyks/BMC` @@ -123,8 +130,8 @@ Tree à la docear ? ## Issues ? * Multiplication of {{}} => solved in bibtexparser -* UTF-8 and bibtexparser => solved upstream -* delete / edit => problem with filename encoding +* UTF-8 and bibtexparser => solved upstream in bibtexparser +===> TODO : update bibtexparser when available in pip ## Thanks diff --git a/backend.py b/backend.py index 35d2399..8faa6b5 100644 --- a/backend.py +++ b/backend.py @@ -54,16 +54,6 @@ def getNewName(src, bibtex, tag=''): return new_name -def parsed2Bibtex(parsed): - """Convert a single bibtex entry dict to bibtex string""" - bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n" - - for field in [i for i in sorted(parsed) if i not in ['type', 'id']]: - bibtex += "\t"+field+"={"+parsed[field]+"},\n" - bibtex += "}\n\n" - return bibtex - - def bibtexAppend(data): """Append data to the main bibtex file @@ -71,7 +61,7 @@ def bibtexAppend(data): """ try: with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh: - fh.write(parsed2Bibtex(data)+"\n") + fh.write(tools.parsed2Bibtex(data)+"\n") except: tools.warning("Unable to open index file.") return False @@ -100,7 +90,7 @@ def bibtexRewrite(data): """ bibtex = '' for entry in data.keys(): - bibtex += parsed2Bibtex(data[entry])+"\n" + bibtex += tools.parsed2Bibtex(data[entry])+"\n" try: with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh: fh.write(bibtex) @@ -113,7 +103,7 @@ def deleteId(ident): """Delete a file based on its id in the bibtex file""" try: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: - bibtex = BibTexParser(fh.read()) + bibtex = BibTexParser(fh.read().decode('utf-8')) bibtex = bibtex.get_entry_dict() except: tools.warning("Unable to open index file.") @@ -148,7 +138,7 @@ def deleteFile(filename): """Delete a file based on its filename""" try: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: - bibtex = BibTexParser(fh.read()) + bibtex = BibTexParser(fh.read().decode('utf-8')) bibtex = bibtex.get_entry_dict() except: tools.warning("Unable to open index file.") diff --git a/fetcher.py b/fetcher.py index fee038f..28fe539 100644 --- a/fetcher.py +++ b/fetcher.py @@ -8,6 +8,8 @@ import subprocess import arxiv2bib as arxiv_metadata import tools import params +from bibtexparser.bparser import BibTexParser +from isbntools.dev.fmt import fmtbib def download(url): @@ -41,7 +43,8 @@ def download(url): return False -isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[-][0-9])") +isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])", + re.IGNORECASE) def findISBN(src): @@ -84,17 +87,13 @@ def findISBN(src): def isbn2Bib(isbn): """Tries to get bibtex entry from an ISBN number""" - try: - # Default merges results from worldcat.org and google books - return isbntools.dev.fmt.fmtbib('bibtex', - isbntools.meta(isbn, 'default')) - except: - return '' + # Default merges results from worldcat.org and google books + return fmtbib('bibtex', isbntools.meta(isbn, 'default')) -doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]') -doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+') -doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}') +doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE) +doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+', re.IGNORECASE) +doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}', re.IGNORECASE) clean_doi_re = re.compile('^/') clean_doi_fabse_re = re.compile('^10.1096') clean_doi_jcb_re = re.compile('^10.1083') @@ -183,7 +182,7 @@ def doi2Bib(doi): return '' -arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)') +arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)', re.IGNORECASE) def findArXivId(src): @@ -215,8 +214,10 @@ def findArXivId(src): # Error happened tools.warning(err) return False - else: + elif extractID is not None: return extractID.group(1) + else: + return False def arXiv2Bib(arxiv): @@ -229,7 +230,14 @@ def arXiv2Bib(arxiv): if isinstance(bib, arxiv_metadata.ReferenceErrorInfo): continue else: - return bib.bibtex() + fetched_bibtex = BibTexParser(bib.bibtex()) + fetched_bibtex = fetched_bibtex.get_entry_dict() + fetched_bibtex = fetched_bibtex[fetched_bibtex.keys()[0]] + try: + del(fetched_bibtex['file']) + except: + pass + return tools.parsed2Bibtex(fetched_bibtex) return False diff --git a/main.py b/main.py index 56367a1..9d21053 100755 --- a/main.py +++ b/main.py @@ -27,7 +27,7 @@ def checkBibtex(filename, bibtex): if len(bibtex) > 0: bibtex_name = bibtex.keys()[0] bibtex = bibtex[bibtex_name] - bibtex_string = backend.parsed2Bibtex(bibtex) + bibtex_string = tools.parsed2Bibtex(bibtex) else: bibtex_string = '' print(bibtex_string) @@ -54,7 +54,7 @@ def checkBibtex(filename, bibtex): if len(bibtex) > 0: bibtex_name = bibtex.keys()[0] bibtex = bibtex[bibtex_name] - bibtex_string = backend.parsed2Bibtex(bibtex) + bibtex_string = tools.parsed2Bibtex(bibtex) else: bibtex_string = '' print("\nThe bibtex entry for "+filename+" is:") @@ -80,17 +80,17 @@ def addFile(src, filetype, manual): if not manual: if filetype == 'article' or filetype is None: doi = fetcher.findDOI(src) - if (filetype == 'article' or filetype is None) and doi is False: + if doi is False and (filetype == 'article' or filetype is None): arxiv = fetcher.findArXivId(src) - if filetype == 'book' or (filetype is None and doi is False and - arxiv is False): + if filetype == 'book' or (doi is False and arxiv is False and + filetype is None): isbn = fetcher.findISBN(src) if doi is False and isbn is False and arxiv is False: if filetype is None: tools.warning("Could not determine the DOI nor the arXiv id nor " + - "the ISBN for "+src+"."+"Switching to manual entry.") + "the ISBN for "+src+". Switching to manual entry.") doi_arxiv_isbn = '' while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']: doi_arxiv_isbn = tools.rawInput("DOI / arXiv " + diff --git a/tools.py b/tools.py index 7141011..59cd931 100644 --- a/tools.py +++ b/tools.py @@ -24,6 +24,16 @@ def slugify(value): return _slugify_hyphenate_re.sub('_', value) +def parsed2Bibtex(parsed): + """Convert a single bibtex entry dict to bibtex string""" + bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n" + + for field in [i for i in sorted(parsed) if i not in ['type', 'id']]: + bibtex += "\t"+field+"={"+parsed[field]+"},\n" + bibtex += "}\n\n" + return bibtex + + def getExtension(filename): """Get the extension of filename""" return filename[filename.rfind('.'):]