Import / Download / Delete working
All bug should be fixed for the import / download / delete functions. * Some problems with utf-8 and homogeneize_latex_encoding in python-bbtexparser are bypassed and will be cleaned in a better way when the latest version will be available in pip. * Tweaked regex for isbn, which was not cas insensitive and forgot about spaces separated numbers. * File entry in arXiv bibtex is now deleted to avoid confusion.
This commit is contained in:
parent
204357222c
commit
b4f0e7c0eb
11
README.md
11
README.md
@ -31,6 +31,13 @@ Should be almost working and usable now, although still to be considered as **ex
|
|||||||
**Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code.
|
**Important note :** I use it for personal use, but I don't read articles from many journals. If you find any file which is not working, please fill an issue or send me an e-mail with the relevant information. There are alternative ways to get the metadata for example, and I didn't know really which one was the best one as writing this code.
|
||||||
|
|
||||||
|
|
||||||
|
* Import
|
||||||
|
* working : all (file / tags / bibtex modification / bibtex retrieval / remove watermark pages)
|
||||||
|
* Download
|
||||||
|
* working : all
|
||||||
|
* Delete
|
||||||
|
* working : all (by file and by id)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
|
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
|
||||||
@ -123,8 +130,8 @@ Tree à la docear ?
|
|||||||
## Issues ?
|
## Issues ?
|
||||||
|
|
||||||
* Multiplication of {{}} => solved in bibtexparser
|
* Multiplication of {{}} => solved in bibtexparser
|
||||||
* UTF-8 and bibtexparser => solved upstream
|
* UTF-8 and bibtexparser => solved upstream in bibtexparser
|
||||||
* delete / edit => problem with filename encoding
|
===> TODO : update bibtexparser when available in pip
|
||||||
|
|
||||||
## Thanks
|
## Thanks
|
||||||
|
|
||||||
|
18
backend.py
18
backend.py
@ -54,16 +54,6 @@ def getNewName(src, bibtex, tag=''):
|
|||||||
return new_name
|
return new_name
|
||||||
|
|
||||||
|
|
||||||
def parsed2Bibtex(parsed):
|
|
||||||
"""Convert a single bibtex entry dict to bibtex string"""
|
|
||||||
bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
|
|
||||||
|
|
||||||
for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
|
|
||||||
bibtex += "\t"+field+"={"+parsed[field]+"},\n"
|
|
||||||
bibtex += "}\n\n"
|
|
||||||
return bibtex
|
|
||||||
|
|
||||||
|
|
||||||
def bibtexAppend(data):
|
def bibtexAppend(data):
|
||||||
"""Append data to the main bibtex file
|
"""Append data to the main bibtex file
|
||||||
|
|
||||||
@ -71,7 +61,7 @@ def bibtexAppend(data):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'a', encoding='utf-8') as fh:
|
||||||
fh.write(parsed2Bibtex(data)+"\n")
|
fh.write(tools.parsed2Bibtex(data)+"\n")
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
return False
|
return False
|
||||||
@ -100,7 +90,7 @@ def bibtexRewrite(data):
|
|||||||
"""
|
"""
|
||||||
bibtex = ''
|
bibtex = ''
|
||||||
for entry in data.keys():
|
for entry in data.keys():
|
||||||
bibtex += parsed2Bibtex(data[entry])+"\n"
|
bibtex += tools.parsed2Bibtex(data[entry])+"\n"
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'w', encoding='utf-8') as fh:
|
||||||
fh.write(bibtex)
|
fh.write(bibtex)
|
||||||
@ -113,7 +103,7 @@ def deleteId(ident):
|
|||||||
"""Delete a file based on its id in the bibtex file"""
|
"""Delete a file based on its id in the bibtex file"""
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||||
bibtex = BibTexParser(fh.read())
|
bibtex = BibTexParser(fh.read().decode('utf-8'))
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
@ -148,7 +138,7 @@ def deleteFile(filename):
|
|||||||
"""Delete a file based on its filename"""
|
"""Delete a file based on its filename"""
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||||
bibtex = BibTexParser(fh.read())
|
bibtex = BibTexParser(fh.read().decode('utf-8'))
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
|
32
fetcher.py
32
fetcher.py
@ -8,6 +8,8 @@ import subprocess
|
|||||||
import arxiv2bib as arxiv_metadata
|
import arxiv2bib as arxiv_metadata
|
||||||
import tools
|
import tools
|
||||||
import params
|
import params
|
||||||
|
from bibtexparser.bparser import BibTexParser
|
||||||
|
from isbntools.dev.fmt import fmtbib
|
||||||
|
|
||||||
|
|
||||||
def download(url):
|
def download(url):
|
||||||
@ -41,7 +43,8 @@ def download(url):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[-][0-9])")
|
isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
|
||||||
|
re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def findISBN(src):
|
def findISBN(src):
|
||||||
@ -84,17 +87,13 @@ def findISBN(src):
|
|||||||
|
|
||||||
def isbn2Bib(isbn):
|
def isbn2Bib(isbn):
|
||||||
"""Tries to get bibtex entry from an ISBN number"""
|
"""Tries to get bibtex entry from an ISBN number"""
|
||||||
try:
|
|
||||||
# Default merges results from worldcat.org and google books
|
# Default merges results from worldcat.org and google books
|
||||||
return isbntools.dev.fmt.fmtbib('bibtex',
|
return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
|
||||||
isbntools.meta(isbn, 'default'))
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
|
|
||||||
doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]')
|
doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
|
||||||
doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+')
|
doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+', re.IGNORECASE)
|
||||||
doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}')
|
doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}', re.IGNORECASE)
|
||||||
clean_doi_re = re.compile('^/')
|
clean_doi_re = re.compile('^/')
|
||||||
clean_doi_fabse_re = re.compile('^10.1096')
|
clean_doi_fabse_re = re.compile('^10.1096')
|
||||||
clean_doi_jcb_re = re.compile('^10.1083')
|
clean_doi_jcb_re = re.compile('^10.1083')
|
||||||
@ -183,7 +182,7 @@ def doi2Bib(doi):
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
|
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)', re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def findArXivId(src):
|
def findArXivId(src):
|
||||||
@ -215,8 +214,10 @@ def findArXivId(src):
|
|||||||
# Error happened
|
# Error happened
|
||||||
tools.warning(err)
|
tools.warning(err)
|
||||||
return False
|
return False
|
||||||
else:
|
elif extractID is not None:
|
||||||
return extractID.group(1)
|
return extractID.group(1)
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def arXiv2Bib(arxiv):
|
def arXiv2Bib(arxiv):
|
||||||
@ -229,7 +230,14 @@ def arXiv2Bib(arxiv):
|
|||||||
if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
|
if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
return bib.bibtex()
|
fetched_bibtex = BibTexParser(bib.bibtex())
|
||||||
|
fetched_bibtex = fetched_bibtex.get_entry_dict()
|
||||||
|
fetched_bibtex = fetched_bibtex[fetched_bibtex.keys()[0]]
|
||||||
|
try:
|
||||||
|
del(fetched_bibtex['file'])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return tools.parsed2Bibtex(fetched_bibtex)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
12
main.py
12
main.py
@ -27,7 +27,7 @@ def checkBibtex(filename, bibtex):
|
|||||||
if len(bibtex) > 0:
|
if len(bibtex) > 0:
|
||||||
bibtex_name = bibtex.keys()[0]
|
bibtex_name = bibtex.keys()[0]
|
||||||
bibtex = bibtex[bibtex_name]
|
bibtex = bibtex[bibtex_name]
|
||||||
bibtex_string = backend.parsed2Bibtex(bibtex)
|
bibtex_string = tools.parsed2Bibtex(bibtex)
|
||||||
else:
|
else:
|
||||||
bibtex_string = ''
|
bibtex_string = ''
|
||||||
print(bibtex_string)
|
print(bibtex_string)
|
||||||
@ -54,7 +54,7 @@ def checkBibtex(filename, bibtex):
|
|||||||
if len(bibtex) > 0:
|
if len(bibtex) > 0:
|
||||||
bibtex_name = bibtex.keys()[0]
|
bibtex_name = bibtex.keys()[0]
|
||||||
bibtex = bibtex[bibtex_name]
|
bibtex = bibtex[bibtex_name]
|
||||||
bibtex_string = backend.parsed2Bibtex(bibtex)
|
bibtex_string = tools.parsed2Bibtex(bibtex)
|
||||||
else:
|
else:
|
||||||
bibtex_string = ''
|
bibtex_string = ''
|
||||||
print("\nThe bibtex entry for "+filename+" is:")
|
print("\nThe bibtex entry for "+filename+" is:")
|
||||||
@ -80,17 +80,17 @@ def addFile(src, filetype, manual):
|
|||||||
if not manual:
|
if not manual:
|
||||||
if filetype == 'article' or filetype is None:
|
if filetype == 'article' or filetype is None:
|
||||||
doi = fetcher.findDOI(src)
|
doi = fetcher.findDOI(src)
|
||||||
if (filetype == 'article' or filetype is None) and doi is False:
|
if doi is False and (filetype == 'article' or filetype is None):
|
||||||
arxiv = fetcher.findArXivId(src)
|
arxiv = fetcher.findArXivId(src)
|
||||||
|
|
||||||
if filetype == 'book' or (filetype is None and doi is False and
|
if filetype == 'book' or (doi is False and arxiv is False and
|
||||||
arxiv is False):
|
filetype is None):
|
||||||
isbn = fetcher.findISBN(src)
|
isbn = fetcher.findISBN(src)
|
||||||
|
|
||||||
if doi is False and isbn is False and arxiv is False:
|
if doi is False and isbn is False and arxiv is False:
|
||||||
if filetype is None:
|
if filetype is None:
|
||||||
tools.warning("Could not determine the DOI nor the arXiv id nor " +
|
tools.warning("Could not determine the DOI nor the arXiv id nor " +
|
||||||
"the ISBN for "+src+"."+"Switching to manual entry.")
|
"the ISBN for "+src+". Switching to manual entry.")
|
||||||
doi_arxiv_isbn = ''
|
doi_arxiv_isbn = ''
|
||||||
while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']:
|
while doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual']:
|
||||||
doi_arxiv_isbn = tools.rawInput("DOI / arXiv " +
|
doi_arxiv_isbn = tools.rawInput("DOI / arXiv " +
|
||||||
|
10
tools.py
10
tools.py
@ -24,6 +24,16 @@ def slugify(value):
|
|||||||
return _slugify_hyphenate_re.sub('_', value)
|
return _slugify_hyphenate_re.sub('_', value)
|
||||||
|
|
||||||
|
|
||||||
|
def parsed2Bibtex(parsed):
|
||||||
|
"""Convert a single bibtex entry dict to bibtex string"""
|
||||||
|
bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
|
||||||
|
|
||||||
|
for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
|
||||||
|
bibtex += "\t"+field+"={"+parsed[field]+"},\n"
|
||||||
|
bibtex += "}\n\n"
|
||||||
|
return bibtex
|
||||||
|
|
||||||
|
|
||||||
def getExtension(filename):
|
def getExtension(filename):
|
||||||
"""Get the extension of filename"""
|
"""Get the extension of filename"""
|
||||||
return filename[filename.rfind('.'):]
|
return filename[filename.rfind('.'):]
|
||||||
|
Loading…
Reference in New Issue
Block a user