#!/usr/bin/python2 -u # coding=utf8 """ Main app """ import sys import shutil import requests import subprocess import re try: from cStringIO import StringIO except: from StringIO import StringIO from bibtexparser.bparser import BibTexParser import params def bibtexAppend(data): """ Append data to the main bibtex file data is a dict as the one from bibtexparser output """ bibtex = '' for field, value in data: bibtex += "\n" + field + ": " + value + "," # TODO : Write def replaceAll(text, dic): for i, j in dic.iteritems(): text = text.replace(i, j) return text def PDF2Doi(pdf): pdftotext = subprocess.Popen(["pdftotext", pdf, "-"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) extractfull = pdftotext.communicate() if extractfull[1] is not "": return False extractfull = extractfull[0] extractDOI = re.search('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', extractfull.lower().replace('Œ', '-')) if not extractDOI: # PNAS fix extractDOI = re.search('(?<=doi).?10.1073/pnas\.\d+', extractfull.lower().replace('pnas', '/pnas')) if not extractDOI: # JSB fix extractDOI = re.search('10\.1083/jcb\.\d{9}', extractfull.lower()) cleanDOI = False if extractDOI: cleanDOI = extractDOI.group(0).replace(':', '').replace(' ', '') if re.search('^/', cleanDOI): cleanDOI = cleanDOI[1:] # FABSE J fix if re.search('^10.1096', cleanDOI): cleanDOI = cleanDOI[:20] # Second JCB fix if re.search('^10.1083', cleanDOI): cleanDOI = cleanDOI[:21] if len(cleanDOI) > 40: cleanDOItemp = re.sub(r'\d\.\d', '000', cleanDOI) reps = {'.': 'A', '-': '0'} cleanDOItemp = replaceAll(cleanDOItemp[8:], reps) digitStart = 0 for i in range(len(cleanDOItemp)): if cleanDOItemp[i].isdigit(): digitStart = 1 if cleanDOItemp[i].isalpha() and digitStart: break cleanDOI = cleanDOI[0:(8+i)] return cleanDOI def doi2Bib(doi): """ Return a bibTeX string of metadata for a given DOI. From : https://gist.github.com/jrsmith3/5513926 """ url = "http://dx.doi.org/" + doi headers = {"accept": "application/x-bibtex"} r = requests.get(url, headers=headers) return r.text def addFile(src): """ Add a file to the library """ # TODO : Handle books + djvu if src.endswith(".pdf"): doi = PDF2Doi(src) elif src.endswith(".djvu"): raise Exception("TODO") if doi is False: print("Could not determine the DOI for "+src+", switching to manual " + "entry.") doi = raw_input('DOI ? ') else: print("DOI for "+src+" is "+doi+".") bibtex = doi2Bib(doi).strip().replace(',', ",\n") bibtex = StringIO(bibtex) bibtex = BibTexParser(bibtex).get_entry_dict() # TODO : Rename new_name = params.folder+"/"+doi bibtex[bibtex.keys()[0]]['file'] = new_name try: shutil.copy2(src, new_name) except IOError: sys.exit("Unable to move file to library dir " + params.folder+".") bibtexAppend(bibtex) print("File " + src + " successfully imported.") if __name__ == '__main__': if len(sys.argv) < 2: sys.exit("Usage : TODO") if sys.argv[1] == 'download': raise Exception('TODO') if sys.argv[1] == 'import': if len(sys.argv) < 3: sys.exit("Usage : " + sys.argv[0] + " import FILE") addFile(sys.argv[2]) sys.exit() elif sys.argv[1] == 'list': raise Exception('TODO') elif sys.argv[1] == 'search': raise Exception('TODO')