bmc/main.py

#!/usr/bin/python2 -u
# coding=utf8
"""
Main app
"""
from __future__ import print_function

import sys
import shutil
import requests
import subprocess
import re
import os
try:
    from cStringIO import StringIO
except:
    from StringIO import StringIO
from bibtexparser.bparser import BibTexParser
import params


def warning(*objs):
    """
    Write to stderr
    """
    print("WARNING: ", *objs, file=sys.stderr)


def bibtexAppend(data):
    """
    Append data to the main bibtex file
    data is a dict as the one from bibtexparser output
    """
    bibtex = ''
    for field, value in data:
        bibtex += "\n" + field + ": " + value + ","

    # TODO : Write


def replaceAll(text, dic):
    for i, j in dic.iteritems():
        text = text.replace(i, j)
    return text


def findDOI(src):
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)

    extractfull = totext.communicate()
    if extractfull[1] is not "":
        return False

    extractfull = extractfull[0]
    extractDOI = re.search('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]',
                           extractfull.lower().replace('&#338;', '-'))
    if not extractDOI:
        # PNAS fix
        extractDOI = re.search('(?<=doi).?10.1073/pnas\.\d+',
                               extractfull.lower().replace('pnas', '/pnas'))
        if not extractDOI:
            # JSB fix
            extractDOI = re.search('10\.1083/jcb\.\d{9}', extractfull.lower())

    cleanDOI = False
    if extractDOI:
        cleanDOI = extractDOI.group(0).replace(':', '').replace(' ', '')
        if re.search('^/', cleanDOI):
            cleanDOI = cleanDOI[1:]

        # FABSE J fix
        if re.search('^10.1096', cleanDOI):
            cleanDOI = cleanDOI[:20]

        # Second JCB fix
        if re.search('^10.1083', cleanDOI):
            cleanDOI = cleanDOI[:21]

        if len(cleanDOI) > 40:
            cleanDOItemp = re.sub(r'\d\.\d', '000', cleanDOI)
            reps = {'.': 'A', '-': '0'}
            cleanDOItemp = replaceAll(cleanDOItemp[8:], reps)
            digitStart = 0
            for i in range(len(cleanDOItemp)):
                if cleanDOItemp[i].isdigit():
                    digitStart = 1
                    if cleanDOItemp[i].isalpha() and digitStart:
                        break
            cleanDOI = cleanDOI[0:(8+i)]

    return cleanDOI


def doi2Bib(doi):
    """
    Return a bibTeX string of metadata for a given DOI.
    From : https://gist.github.com/jrsmith3/5513926
    """
    url = "http://dx.doi.org/" + doi
    headers = {"accept": "application/x-bibtex"}
    r = requests.get(url, headers=headers)
    return r.text


_slugify_strip_re = re.compile(r'[^\w\s-]')
_slugify_hyphenate_re = re.compile(r'[\s]+')
def _slugify(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
    
    From Django's "django/template/defaultfilters.py".
    """
    import unicodedata
    if not isinstance(value, unicode):
        value = unicode(value)
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
    value = unicode(_slugify_strip_re.sub('', value).strip())
    return _slugify_hyphenate_re.sub('_', value)


def getExtension(filename):
    """
    Get the extension of the filename
    """
    return filename[filename.rfind('.'):]


def addFile(src):
    """
    Add a file to the library
    """
    # TODO : Handle books (ISBN)
    doi = findDOI(src)

    if doi is False:
        warning("Could not determine the DOI for "+src+", switching to manual " +
              "entry.")
        doi = raw_input('DOI ? ')
    else:
        print("DOI for "+src+" is "+doi+".")

    bibtex = doi2Bib(doi).strip().replace(',', ",\n")
    bibtex = StringIO(bibtex)
    bibtex = BibTexParser(bibtex).get_entry_dict()
    bibtex_name = bibtex.keys()[0]
    bibtex = bibtex[bibtex_name]

    authors = re.split(' and ', bibtex['author'])

    new_name = params.format
    new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
    new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
    new_name = new_name.replace("%j", bibtex['journal'])
    new_name = new_name.replace("%Y", bibtex['year'])
    new_name = new_name.replace("%t", bibtex['title'])
    new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
                                                 for i in authors]))

    new_name = params.folder+_slugify(new_name)+getExtension(src)
    bibtex['file'] = new_name

    while os.path.exists(new_name):
        warning("Error, file "+new_name+" already exists.")
        default_rename = new_name.replace(getExtension(new_name),
                                          " (2)"+getExtension(new_name))
        rename = raw_input("New name ["+default_rename+"] ? ")
        if rename == '':
            new_name = default_rename
        else:
            new_name = rename


    try:
        shutil.copy2(src, new_name)
    except IOError:
        sys.exit("Unable to move file to library dir " + params.folder+".")

    # TODO
    bibtexAppend(bibtex)
    print("File " + src + " successfully imported.")


if __name__ == '__main__':
    try:
        if len(sys.argv) < 2:
            sys.exit("Usage : TODO")

        if sys.argv[1] == 'download':
            raise Exception('TODO')

        if sys.argv[1] == 'import':
            if len(sys.argv) < 3:
                sys.exit("Usage : " + sys.argv[0] + " import FILE")

            addFile(sys.argv[2])
            sys.exit()

        elif sys.argv[1] == 'list':
            raise Exception('TODO')

        elif sys.argv[1] == 'search':
            raise Exception('TODO')
    except KeyboardInterrupt:
        sys.exit()
Started the main code 2014-04-24 00:18:49 +02:00			`#!/usr/bin/python2 -u`
			`# coding=utf8`
			`"""`
			`Main app`
			`"""`
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`from __future__ import print_function`
Started the main code 2014-04-24 00:18:49 +02:00
			`import sys`
			`import shutil`
			`import requests`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`import subprocess`
			`import re`
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`import os`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`try:`
			`from cStringIO import StringIO`
			`except:`
			`from StringIO import StringIO`
Started the main code 2014-04-24 00:18:49 +02:00			`from bibtexparser.bparser import BibTexParser`
			`import params`


Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`def warning(*objs):`
			`"""`
			`Write to stderr`
			`"""`
			`print("WARNING: ", *objs, file=sys.stderr)`


Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`def bibtexAppend(data):`
Started the main code 2014-04-24 00:18:49 +02:00			`"""`
			`Append data to the main bibtex file`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`data is a dict as the one from bibtexparser output`
Started the main code 2014-04-24 00:18:49 +02:00			`"""`
			`bibtex = ''`
			`for field, value in data:`
			`bibtex += "\n" + field + ": " + value + ","`

			`# TODO : Write`


Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`def replaceAll(text, dic):`
			`for i, j in dic.iteritems():`
			`text = text.replace(i, j)`
			`return text`


Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`def findDOI(src):`
			`if src.endswith(".pdf"):`
			`totext = subprocess.Popen(["pdftotext", src, "-"],`
			`stdout=subprocess.PIPE,`
			`stderr=subprocess.PIPE)`
			`elif src.endswith(".djvu"):`
			`totext = subprocess.Popen(["djvutxt", src],`
			`stdout=subprocess.PIPE,`
			`stderr=subprocess.PIPE)`

			`extractfull = totext.communicate()`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`if extractfull[1] is not "":`
			`return False`

			`extractfull = extractfull[0]`
			`extractDOI = re.search('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]',`
			`extractfull.lower().replace('Œ', '-'))`
			`if not extractDOI:`
			`# PNAS fix`
			`extractDOI = re.search('(?<=doi).?10.1073/pnas\.\d+',`
			`extractfull.lower().replace('pnas', '/pnas'))`
			`if not extractDOI:`
			`# JSB fix`
			`extractDOI = re.search('10\.1083/jcb\.\d{9}', extractfull.lower())`

			`cleanDOI = False`
			`if extractDOI:`
			`cleanDOI = extractDOI.group(0).replace(':', '').replace(' ', '')`
			`if re.search('^/', cleanDOI):`
			`cleanDOI = cleanDOI[1:]`

			`# FABSE J fix`
			`if re.search('^10.1096', cleanDOI):`
			`cleanDOI = cleanDOI[:20]`

			`# Second JCB fix`
			`if re.search('^10.1083', cleanDOI):`
			`cleanDOI = cleanDOI[:21]`

			`if len(cleanDOI) > 40:`
			`cleanDOItemp = re.sub(r'\d\.\d', '000', cleanDOI)`
			`reps = {'.': 'A', '-': '0'}`
			`cleanDOItemp = replaceAll(cleanDOItemp[8:], reps)`
			`digitStart = 0`
			`for i in range(len(cleanDOItemp)):`
			`if cleanDOItemp[i].isdigit():`
			`digitStart = 1`
			`if cleanDOItemp[i].isalpha() and digitStart:`
			`break`
			`cleanDOI = cleanDOI[0:(8+i)]`

			`return cleanDOI`


			`def doi2Bib(doi):`
			`"""`
			`Return a bibTeX string of metadata for a given DOI.`
			`From : https://gist.github.com/jrsmith3/5513926`
			`"""`
			`url = "http://dx.doi.org/" + doi`
			`headers = {"accept": "application/x-bibtex"}`
			`r = requests.get(url, headers=headers)`
			`return r.text`


Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`_slugify_strip_re = re.compile(r'[^\w\s-]')`
			`_slugify_hyphenate_re = re.compile(r'[\s]+')`
			`def _slugify(value):`
			`"""`
			`Normalizes string, converts to lowercase, removes non-alpha characters,`
			`and converts spaces to hyphens.`

			`From Django's "django/template/defaultfilters.py".`
			`"""`
			`import unicodedata`
			`if not isinstance(value, unicode):`
			`value = unicode(value)`
			`value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')`
			`value = unicode(_slugify_strip_re.sub('', value).strip())`
			`return _slugify_hyphenate_re.sub('_', value)`


			`def getExtension(filename):`
			`"""`
			`Get the extension of the filename`
			`"""`
			`return filename[filename.rfind('.'):]`


Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`def addFile(src):`
Started the main code 2014-04-24 00:18:49 +02:00			`"""`
			`Add a file to the library`
			`"""`
Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`# TODO : Handle books (ISBN)`
			`doi = findDOI(src)`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00
			`if doi is False:`
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`warning("Could not determine the DOI for "+src+", switching to manual " +`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`"entry.")`
			`doi = raw_input('DOI ? ')`
			`else:`
			`print("DOI for "+src+" is "+doi+".")`

			`bibtex = doi2Bib(doi).strip().replace(',', ",\n")`
			`bibtex = StringIO(bibtex)`
			`bibtex = BibTexParser(bibtex).get_entry_dict()`
Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`bibtex_name = bibtex.keys()[0]`
			`bibtex = bibtex[bibtex_name]`

			`authors = re.split(' and ', bibtex['author'])`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00
Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`new_name = params.format`
			`new_name = new_name.replace("%f", authors[0].split(',')[0].strip())`
			`new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())`
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`new_name = new_name.replace("%j", bibtex['journal'])`
Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`new_name = new_name.replace("%Y", bibtex['year'])`
			`new_name = new_name.replace("%t", bibtex['title'])`
			`new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()`
			`for i in authors]))`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`new_name = params.folder+_slugify(new_name)+getExtension(src)`
Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`bibtex['file'] = new_name`
Started the main code 2014-04-24 00:18:49 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`while os.path.exists(new_name):`
			`warning("Error, file "+new_name+" already exists.")`
			`default_rename = new_name.replace(getExtension(new_name),`
			`" (2)"+getExtension(new_name))`
			`rename = raw_input("New name ["+default_rename+"] ? ")`
			`if rename == '':`
			`new_name = default_rename`
			`else:`
			`new_name = rename`


Started the main code 2014-04-24 00:18:49 +02:00			`try:`
			`shutil.copy2(src, new_name)`
			`except IOError:`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`sys.exit("Unable to move file to library dir " + params.folder+".")`
Started the main code 2014-04-24 00:18:49 +02:00
Import updated * Added djvu support * Nice mask for file renaming TODO : Append to bibtex index + test if file already exists 2014-04-24 19:38:52 +02:00			`# TODO`
Working on PDF import * Search the PDF file for DOI, manual fallback if not found * Move the PDF file * Add its Bibtex entry to the general bibtex file TODO : * Better renaming * Adding to bibtex file 2014-04-24 16:18:56 +02:00			`bibtexAppend(bibtex)`
Started the main code 2014-04-24 00:18:49 +02:00			`print("File " + src + " successfully imported.")`


			`if __name__ == '__main__':`
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`try:`
			`if len(sys.argv) < 2:`
			`sys.exit("Usage : TODO")`
Started the main code 2014-04-24 00:18:49 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`if sys.argv[1] == 'download':`
			`raise Exception('TODO')`
Started the main code 2014-04-24 00:18:49 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`if sys.argv[1] == 'import':`
			`if len(sys.argv) < 3:`
			`sys.exit("Usage : " + sys.argv[0] + " import FILE")`
Started the main code 2014-04-24 00:18:49 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`addFile(sys.argv[2])`
			`sys.exit()`
Started the main code 2014-04-24 00:18:49 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`elif sys.argv[1] == 'list':`
			`raise Exception('TODO')`
Started the main code 2014-04-24 00:18:49 +02:00
Check if file already exists when importing 2014-04-24 21:19:27 +02:00			`elif sys.argv[1] == 'search':`
			`raise Exception('TODO')`
			`except KeyboardInterrupt:`
			`sys.exit()`