bmc/main.py
2014-04-26 18:40:32 +02:00

426 lines
13 KiB
Python
Executable File

#!/usr/bin/env python2
# -*- coding: utf8 -*-
from __future__ import print_function
import fetcher
import sys
import shutil
import tempfile
import requests
import subprocess
import re
import os
from isbntools import meta
from isbntools.dev.fmt import fmtbib
try:
from cStringIO import StringIO
except:
from StringIO import StringIO
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogeneize_latex_encoding
from termios import tcflush, TCIOFLUSH
import params
def rawInput(string):
tcflush(sys.stdin, TCIOFLUSH)
return raw_input(string)
def warning(*objs):
"""
Write to stderr
"""
print("WARNING: ", *objs, file=sys.stderr)
def parsed2Bibtex(parsed):
"""
Convert a single bibtex entry dict to bibtex string
"""
bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
for field in [i for i in sorted(parsed) if i not in ['type', 'id']]:
bibtex += "\t"+field+"={"+parsed[field]+"},\n"
bibtex += "}\n"
return bibtex
def bibtexAppend(data):
"""
Append data to the main bibtex file
data is a dict for one entry in bibtex, as the one from bibtexparser output
"""
with open(params.folder+'index.bib', 'a') as fh:
fh.write(parsed2Bibtex(data)+"\n")
def bibtexRewrite(data):
"""
Rewrite the bibtex index file.
data is a dict of bibtex entry dict.
"""
bibtex = ''
for entry in data.keys():
bibtex += parsed2Bibtex(data[entry])+"\n"
with open(params.folder+'index.bib', 'w') as fh:
fh.write(bibtex)
def replaceAll(text, dic):
for i, j in dic.iteritems():
text = text.replace(i, j)
return text
def findISBN(src):
if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
extractfull = totext.communicate()
if extractfull[1] is not "":
return False
extractfull = extractfull[0]
extractISBN = re.search(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
extractfull.lower().replace('Œ', '-'))
cleanISBN = False
if extractISBN:
cleanISBN = extractISBN.group(1).replace('-', '').replace(' ', '')
return cleanISBN
def isbn2Bib(isbn):
return fmtbib('bibtex', meta(isbn, 'default'))
def findDOI(src):
if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
extractfull = totext.communicate()
if extractfull[1] is not "":
return False
extractfull = extractfull[0]
extractDOI = re.search('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]',
extractfull.lower().replace('&#338;', '-'))
if not extractDOI:
# PNAS fix
extractDOI = re.search('(?<=doi).?10.1073/pnas\.\d+',
extractfull.lower().replace('pnas', '/pnas'))
if not extractDOI:
# JSB fix
extractDOI = re.search('10\.1083/jcb\.\d{9}', extractfull.lower())
cleanDOI = False
if extractDOI:
cleanDOI = extractDOI.group(0).replace(':', '').replace(' ', '')
if re.search('^/', cleanDOI):
cleanDOI = cleanDOI[1:]
# FABSE J fix
if re.search('^10.1096', cleanDOI):
cleanDOI = cleanDOI[:20]
# Second JCB fix
if re.search('^10.1083', cleanDOI):
cleanDOI = cleanDOI[:21]
if len(cleanDOI) > 40:
cleanDOItemp = re.sub(r'\d\.\d', '000', cleanDOI)
reps = {'.': 'A', '-': '0'}
cleanDOItemp = replaceAll(cleanDOItemp[8:], reps)
digitStart = 0
for i in range(len(cleanDOItemp)):
if cleanDOItemp[i].isdigit():
digitStart = 1
if cleanDOItemp[i].isalpha() and digitStart:
break
cleanDOI = cleanDOI[0:(8+i)]
return cleanDOI
def doi2Bib(doi):
"""
Return a bibTeX string of metadata for a given DOI.
From : https://gist.github.com/jrsmith3/5513926
"""
url = "http://dx.doi.org/" + doi
headers = {"accept": "application/x-bibtex"}
r = requests.get(url, headers=headers)
return r.text
_slugify_strip_re = re.compile(r'[^\w\s-]')
_slugify_hyphenate_re = re.compile(r'[\s]+')
def _slugify(value):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
From Django's "django/template/defaultfilters.py".
"""
import unicodedata
if not isinstance(value, unicode):
value = unicode(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
value = unicode(_slugify_strip_re.sub('', value).strip())
return _slugify_hyphenate_re.sub('_', value)
def getExtension(filename):
"""
Get the extension of the filename
"""
return filename[filename.rfind('.'):]
def checkBibtex(filename, bibtex):
print("The bibtex entry found for "+filename+" is :")
bibtex = StringIO(bibtex)
bibtex = BibTexParser(bibtex, customization=homogeneize_latex_encoding)
bibtex = bibtex.get_entry_dict()
bibtex_name = bibtex.keys()[0]
bibtex = bibtex[bibtex_name]
print(parsed2Bibtex(bibtex))
check = rawInput("Is it correct ? [Y/n] ")
while check.lower() == 'n':
fields = [u'type', u'id'] + [i for i in sorted(bibtex)
if i not in ['id', 'type']]
for field in fields:
new_value = rawInput(field.capitalize()+" ? ["+bibtex[field]+"] ")
if new_value != '':
bibtex[field] = new_value
while True:
new_field = rawInput("Add a new field (leave empty to skip) ? ")
if new_field == '':
break
new_value = rawInput("Value for field "+new_field+" ? ")
bibtex[new_field] = new_value
print("\nThe bibtex entry for "+filename+" is :")
print(parsed2Bibtex(bibtex))
check = rawInput("Is it correct ? [Y/n] ")
return bibtex
def addFile(src, filetype):
"""
Add a file to the library
"""
if filetype == 'article' or filetype is None:
doi = findDOI(src)
if filetype == 'book' or (filetype is None and doi is False):
isbn = findISBN(src)
if doi is False and isbn is False:
if filetype is None:
warning("Could not determine the DOI or the ISBN for "+src+"." +
"Switching to manual entry.")
doi_isbn = ''
while doi_isbn not in ['doi', 'isbn']:
doi_isbn = rawInput("DOI / ISBN ? ").lower()
if doi_isbn == 'doi':
doi = rawInput('DOI ? ')
else:
isbn = rawInput('ISBN ? ')
elif filetype == 'article':
warning("Could not determine the DOI for "+src +
", switching to manual entry.")
doi = rawInput('DOI ? ')
elif filetype == 'book':
warning("Could not determine the ISBN for "+src +
", switching to manual entry.")
isbn = rawInput('ISBN ? ')
elif doi is not False:
print("DOI for "+src+" is "+doi+".")
elif isbn is not False:
print("ISBN for "+src+" is "+isbn+".")
if doi is not False:
# Add extra \n for bibtexparser
bibtex = doi2Bib(doi).strip().replace(',', ",\n")+"\n"
else:
# Idem
bibtex = isbn2Bib(isbn).strip()+"\n"
bibtex = checkBibtex(src, bibtex)
authors = re.split(' and ', bibtex['author'])
if doi is not False:
new_name = params.format_articles
new_name = new_name.replace("%j", bibtex['journal'])
else:
new_name = params.format_books
new_name = new_name.replace("%t", bibtex['title'])
new_name = new_name.replace("%Y", bibtex['year'])
new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
for i in authors]))
new_name = params.folder+_slugify(new_name)+getExtension(src)
while os.path.exists(new_name):
warning("file "+new_name+" already exists.")
default_rename = new_name.replace(getExtension(new_name),
" (2)"+getExtension(new_name))
rename = rawInput("New name ["+default_rename+"] ? ")
if rename == '':
new_name = default_rename
else:
new_name = rename
bibtex['file'] = new_name
try:
shutil.copy2(src, new_name)
except IOError:
new_name = False
sys.exit("Unable to move file to library dir " + params.folder+".")
bibtexAppend(bibtex)
return new_name
def deleteId(ident):
"""
Delete a file based on its id in the bibtex file
"""
with open(params.folder+'index.bib', 'r') as fh:
bibtex = BibTexParser(fh, customization=homogeneize_latex_encoding)
bibtex = bibtex.get_entry_dict()
if ident not in bibtex.keys():
return False
try:
os.remove(bibtex[ident]['file'])
except:
warning("Unable to delete file associated to id "+ident+" : " +
bibtex[ident]['file'])
del(bibtex[ident])
bibtexRewrite(bibtex)
return True
def deleteFile(filename):
"""
Delete a file based on its filename
"""
with open(params.folder+'index.bib', 'r') as fh:
bibtex = BibTexParser(fh, customization=homogeneize_latex_encoding)
bibtex = bibtex.get_entry_dict()
found = False
for key in bibtex.keys():
if bibtex[key]['file'] == filename:
found = True
try:
os.remove(bibtex[key]['file'])
except:
warning("Unable to delete file associated to id "+key+" : " +
bibtex[key]['file'])
del(bibtex[key])
if found:
bibtexRewrite(bibtex)
return found
def downloadFile(url, filetype):
dl = fetcher.download_url(url)
if dl is not False:
tmp = tempfile.NamedTemporaryFile(suffix='.pdf')
with open(tmp.name, 'w+') as fh:
fh.write(dl)
new_name = addFile(tmp.name, filetype)
tmp.close()
return new_name
else:
warning("Could not fetch "+url)
return False
if __name__ == '__main__':
try:
if len(sys.argv) < 2:
sys.exit("Usage : TODO")
if sys.argv[1] == 'download':
if len(sys.argv) < 3:
sys.exit("Usage : " + sys.argv[0] +
" download FILE [article|book]")
filetype = None
if len(sys.argv) > 3 and sys.argv[3] in ["article", "book"]:
filetype = sys.argv[3].lower()
new_name = downloadFile(sys.argv[2], filetype)
if new_name is not False:
print(sys.argv[2]+" successfully imported as "+new_name)
sys.exit()
if sys.argv[1] == 'import':
if len(sys.argv) < 3:
sys.exit("Usage : " + sys.argv[0] +
" import FILE [article|book]")
filetype = None
if len(sys.argv) > 3 and sys.argv[3] in ["article", "book"]:
filetype = sys.argv[3].lower()
new_name = addFile(sys.argv[2], filetype)
if new_name is not False:
print("File " + src + " successfully imported as "+new_name+".")
sys.exit()
elif sys.argv[1] == 'delete':
if len(sys.argv) < 3:
sys.exit("Usage : " + sys.argv[0] + " delete FILE|ID")
if not deleteId(sys.argv[2]):
if not deleteFile(sys.argv[2]):
warning("Unable to delete "+sys.argv[2])
sys.exit(1)
print(sys.argv[2]+" successfully deleted.")
sys.exit()
elif sys.argv[1] == 'list':
raise Exception('TODO')
elif sys.argv[1] == 'search':
raise Exception('TODO')
elif sys.argv[1] == 'rebuild':
raise Exception('TODO')
except KeyboardInterrupt:
sys.exit()