Import / Download / Delete working

All bug should be fixed for the import / download / delete functions.

* Some problems with utf-8 and homogeneize_latex_encoding in
python-bbtexparser are bypassed and will be cleaned in a better way when
the latest version will be available in pip.
* Tweaked regex for isbn, which was not cas insensitive and forgot about
spaces separated numbers.
* File entry in arXiv bibtex is now deleted to avoid confusion.
This commit is contained in:
Phyks 2014-05-11 19:29:42 +02:00
parent 204357222c
commit b4f0e7c0eb
5 changed files with 50 additions and 35 deletions

View File

@ -31,6 +31,13 @@ Should be almost working and usable now, although still to be considered as **ex
**Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code. **Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code.
* Import
* working : all (file / tags / bibtex modification / bibtex retrieval / remove watermark pages)
* Download
* working : all
* Delete
* working : all (by file and by id)
## Installation ## Installation
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC` * Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
@ -123,8 +130,8 @@ Tree à la docear ?
## Issues ? ## Issues ?
* Multiplication of {{}} => solved in bibtexparser * Multiplication of {{}} => solved in bibtexparser
* UTF-8 and bibtexparser => solved upstream * UTF-8 and bibtexparser => solved upstream in bibtexparser
* delete / edit => problem with filename encoding ===> TODO : update bibtexparser when available in pip
## Thanks ## Thanks

View File

@ -54,16 +54,6 @@ def getNewName(src, bibtex, tag=''):
return new_name return new_name
def parsed2Bibtex(parsed):
"""Convert a single bibtex entry dict to bibtex string"""
bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
bibtex += "\t"+field+"={"+parsed[field]+"},\n"
bibtex += "}\n\n"
return bibtex
def bibtexAppend(data): def bibtexAppend(data):
"""Append data to the main bibtex file """Append data to the main bibtex file
@ -71,7 +61,7 @@ def bibtexAppend(data):
""" """
try: try:
with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh:
fh.write(parsed2Bibtex(data)+"\n") fh.write(tools.parsed2Bibtex(data)+"\n")
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")
return False return False
@ -100,7 +90,7 @@ def bibtexRewrite(data):
""" """
bibtex = '' bibtex = ''
for entry in data.keys(): for entry in data.keys():
bibtex += parsed2Bibtex(data[entry])+"\n" bibtex += tools.parsed2Bibtex(data[entry])+"\n"
try: try:
with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh:
fh.write(bibtex) fh.write(bibtex)
@ -113,7 +103,7 @@ def deleteId(ident):
"""Delete a file based on its id in the bibtex file""" """Delete a file based on its id in the bibtex file"""
try: try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
bibtex = BibTexParser(fh.read()) bibtex = BibTexParser(fh.read().decode('utf-8'))
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")
@ -148,7 +138,7 @@ def deleteFile(filename):
"""Delete a file based on its filename""" """Delete a file based on its filename"""
try: try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
bibtex = BibTexParser(fh.read()) bibtex = BibTexParser(fh.read().decode('utf-8'))
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")

View File

@ -8,6 +8,8 @@ import subprocess
import arxiv2bib as arxiv_metadata import arxiv2bib as arxiv_metadata
import tools import tools
import params import params
from bibtexparser.bparser import BibTexParser
from isbntools.dev.fmt import fmtbib
def download(url): def download(url):
@ -41,7 +43,8 @@ def download(url):
return False return False
isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[-][0-9])") isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
re.IGNORECASE)
def findISBN(src): def findISBN(src):
@ -84,17 +87,13 @@ def findISBN(src):
def isbn2Bib(isbn): def isbn2Bib(isbn):
"""Tries to get bibtex entry from an ISBN number""" """Tries to get bibtex entry from an ISBN number"""
try: # Default merges results from worldcat.org and google books
# Default merges results from worldcat.org and google books return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
return isbntools.dev.fmt.fmtbib('bibtex',
isbntools.meta(isbn, 'default'))
except:
return ''
doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]') doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+') doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+', re.IGNORECASE)
doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}') doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}', re.IGNORECASE)
clean_doi_re = re.compile('^/') clean_doi_re = re.compile('^/')
clean_doi_fabse_re = re.compile('^10.1096') clean_doi_fabse_re = re.compile('^10.1096')
clean_doi_jcb_re = re.compile('^10.1083') clean_doi_jcb_re = re.compile('^10.1083')
@ -183,7 +182,7 @@ def doi2Bib(doi):
return '' return ''
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)') arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)', re.IGNORECASE)
def findArXivId(src): def findArXivId(src):
@ -215,8 +214,10 @@ def findArXivId(src):
# Error happened # Error happened
tools.warning(err) tools.warning(err)
return False return False
else: elif extractID is not None:
return extractID.group(1) return extractID.group(1)
else:
return False
def arXiv2Bib(arxiv): def arXiv2Bib(arxiv):
@ -229,7 +230,14 @@ def arXiv2Bib(arxiv):
if isinstance(bib, arxiv_metadata.ReferenceErrorInfo): if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
continue continue
else: else:
return bib.bibtex() fetched_bibtex = BibTexParser(bib.bibtex())
fetched_bibtex = fetched_bibtex.get_entry_dict()
fetched_bibtex = fetched_bibtex[fetched_bibtex.keys()[0]]
try:
del(fetched_bibtex['file'])
except:
pass
return tools.parsed2Bibtex(fetched_bibtex)
return False return False

12
main.py
View File

@ -27,7 +27,7 @@ def checkBibtex(filename, bibtex):
if len(bibtex) > 0: if len(bibtex) > 0:
bibtex_name = bibtex.keys()[0] bibtex_name = bibtex.keys()[0]
bibtex = bibtex[bibtex_name] bibtex = bibtex[bibtex_name]
bibtex_string = backend.parsed2Bibtex(bibtex) bibtex_string = tools.parsed2Bibtex(bibtex)
else: else:
bibtex_string = '' bibtex_string = ''
print(bibtex_string) print(bibtex_string)
@ -54,7 +54,7 @@ def checkBibtex(filename, bibtex):
if len(bibtex) > 0: if len(bibtex) > 0:
bibtex_name = bibtex.keys()[0] bibtex_name = bibtex.keys()[0]
bibtex = bibtex[bibtex_name] bibtex = bibtex[bibtex_name]
bibtex_string = backend.parsed2Bibtex(bibtex) bibtex_string = tools.parsed2Bibtex(bibtex)
else: else:
bibtex_string = '' bibtex_string = ''
print("\nThe bibtex entry for "+filename+" is:") print("\nThe bibtex entry for "+filename+" is:")
@ -80,17 +80,17 @@ def addFile(src, filetype, manual):
if not manual: if not manual:
if filetype == 'article' or filetype is None: if filetype == 'article' or filetype is None:
doi = fetcher.findDOI(src) doi = fetcher.findDOI(src)
if (filetype == 'article' or filetype is None) and doi is False: if doi is False and (filetype == 'article' or filetype is None):
arxiv = fetcher.findArXivId(src) arxiv = fetcher.findArXivId(src)
if filetype == 'book' or (filetype is None and doi is False and if filetype == 'book' or (doi is False and arxiv is False and
arxiv is False): filetype is None):
isbn = fetcher.findISBN(src) isbn = fetcher.findISBN(src)
if doi is False and isbn is False and arxiv is False: if doi is False and isbn is False and arxiv is False:
if filetype is None: if filetype is None:
tools.warning("Could not determine the DOI nor the arXiv id nor " + tools.warning("Could not determine the DOI nor the arXiv id nor " +
"the ISBN for "+src+"."+"Switching to manual entry.") "the ISBN for "+src+". Switching to manual entry.")
doi_arxiv_isbn = '' doi_arxiv_isbn = ''
while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']: while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']:
doi_arxiv_isbn = tools.rawInput("DOI / arXiv " + doi_arxiv_isbn = tools.rawInput("DOI / arXiv " +

View File

@ -24,6 +24,16 @@ def slugify(value):
return _slugify_hyphenate_re.sub('_', value) return _slugify_hyphenate_re.sub('_', value)
def parsed2Bibtex(parsed):
"""Convert a single bibtex entry dict to bibtex string"""
bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
bibtex += "\t"+field+"={"+parsed[field]+"},\n"
bibtex += "}\n\n"
return bibtex
def getExtension(filename): def getExtension(filename):
"""Get the extension of filename""" """Get the extension of filename"""
return filename[filename.rfind('.'):] return filename[filename.rfind('.'):]