Refactor of main script, commands are still to be written

This commit is contained in:
Lucas Verney 2016-01-24 00:17:28 +01:00
parent 96a85feec0
commit 691e752081
29 changed files with 257 additions and 2168 deletions

30
LICENSE
View File

@ -1,9 +1,21 @@
* --------------------------------------------------------------------------------
* "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
* Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice you
* can do whatever you want with this stuff (and you can also do whatever you want
* with this stuff without retaining it, but that's not cool...). If we meet some
* day, and you think this stuff is worth it, you can buy me a <del>beer</del> soda
* in return.
* Phyks
* ---------------------------------------------------------------------------------
The MIT License (MIT)
Copyright (c) 2016 Phyks (Lucas Verney)
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

59
backend/commands.py Normal file
View File

@ -0,0 +1,59 @@
import tempfile
from backend import tools
from libbmc import fetcher
def download(url, filetype, manual, autoconfirm, tag):
"""
Download a given URL and add it to the library.
:param url: URL to download.
:param filetype: paper / book / ``None``.
:param manual: Whether BibTeX should be fetched automatically.
:param autoconfirm: Whether import should be made silent or not.
:param tag: A tag for this file.
:returns: The name of the downloaded file once imported, \
or ``None`` in case of error.
"""
# Download the paper
print("Downloading %s" % (url,))
dl, contenttype = fetcher.download(url)
if dl is not None:
print("Download finished.")
# Store it to a temporary file
try:
tmp = tempfile.NamedTemporaryFile(suffix='.%s' % (contenttype,))
with open(tmp.name, 'wb+') as fh:
fh.write(dl)
# And add it as a normal paper from now on
new_name = import_file(tmp.name, filetype, manual,
autoconfirm, tag)
if new_name is None:
return None
else:
return new_name
finally:
tmp.close()
else:
tools.warning("Could not fetch %s." % (url,))
return None
def import_file(src, filetype, manual, autoconfirm, tag, rename=True):
"""
Add a file to the library.
:param src: The path of the file to import.
:param filetype: paper / book / ``None``.
:param manual: Whether BibTeX should be fetched automatically.
:param autoconfirm: Whether import should be made silent or not.
:param tag: A tag for this file.
:param rename: TODO
:returns: The name of the imported file, or ``None`` in case of error.
"""
# TODO
pass

View File

@ -1,12 +1,3 @@
from __future__ import unicode_literals
import os
import errno
import imp
import inspect
import json
import sys
import libbmc.tools as tools
# List of available options (in ~/.config/bmc/bmc.json file):
# * folder : folder in which papers are stored
# * proxies : list of proxies to use, e.g. ['', "socks5://localhost:4711"]
@ -23,9 +14,17 @@ import libbmc.tools as tools
# %v = arXiv version (e.g. '-v1') or nothing if not an arXiv paper
# You can add your custom masks to rename files by adding functions in
# ~/.config/masks.py.
# ~/.config/bmc/masks.py.
# E.g. : def f(x): x.replace('test', 'some_expr')
import os
import errno
import imp
import inspect
import json
import sys
from backend import tools
def make_sure_path_exists(path):
try:
@ -86,7 +85,7 @@ class Config():
folder_exists = make_sure_path_exists(self.get("folder"))
except OSError:
tools.warning("Unable to create paper storage folder.")
sys.exit(1)
raise
self.load_masks()
def save(self):
@ -98,7 +97,7 @@ class Config():
separators=(',', ': ')))
except IOError:
tools.warning("Could not write config file.")
sys.exit(1)
raise
def load_masks(self):
if os.path.isfile(self.config_path + "masks.py"):

8
backend/tools.py Normal file
View File

@ -0,0 +1,8 @@
import sys
def warning(*objs):
"""
Write warnings to stderr.
"""
print("WARNING: ", *objs, file=sys.stderr)

740
bmc.py
View File

@ -1,507 +1,74 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-
from __future__ import unicode_literals
#!/usr/bin/env python3
import argparse
import os
import shutil
import subprocess
import sys
import tempfile
import bibtexparser
from codecs import open
from libbmc.config import Config
from libbmc import backend
from libbmc import fetcher
from libbmc import tearpages
from libbmc import tools
from backend import commands
from backend import tools
from backend.config import Config
# TODO: Handle config
config = Config()
EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
# Load EDITOR variable
EDITOR = os.environ.get("EDITOR")
def checkBibtex(filename, bibtex_string):
print("The bibtex entry found for "+filename+" is:")
bibtex = bibtexparser.loads(bibtex_string)
bibtex = bibtex.entries_dict
try:
bibtex = bibtex[list(bibtex.keys())[0]]
# Check entries are correct
if "title" not in bibtex:
raise AssertionError
if "authors" not in bibtex and "author" not in bibtex:
raise AssertionError
if "year" not in bibtex:
raise AssertionError
# Print the bibtex and confirm
print(tools.parsed2Bibtex(bibtex))
check = tools.rawInput("Is it correct? [Y/n] ")
except KeyboardInterrupt:
sys.exit()
except (IndexError, KeyError, AssertionError):
print("Missing author, year or title in bibtex.")
check = 'n'
try:
old_filename = bibtex['file']
except KeyError:
old_filename = False
while check.lower() == 'n':
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
tmpfile.write(bibtex_string.encode('utf-8'))
tmpfile.flush()
subprocess.call([EDITOR, tmpfile.name])
tmpfile.seek(0)
bibtex = bibtexparser.loads(tmpfile.read().decode('utf-8')+"\n")
bibtex = bibtex.entries_dict
try:
bibtex = bibtex[list(bibtex.keys())[0]]
except (IndexError, KeyError):
tools.warning("Invalid bibtex entry")
bibtex_string = ''
tools.rawInput("Press Enter to go back to editor.")
continue
if('authors' not in bibtex and 'title' not in bibtex and 'year' not in
bibtex):
tools.warning("Invalid bibtex entry")
bibtex_string = ''
tools.rawInput("Press Enter to go back to editor.")
continue
if old_filename is not False and 'file' not in bibtex:
tools.warning("Invalid bibtex entry. No filename given.")
tools.rawInput("Press Enter to go back to editor.")
check = 'n'
else:
bibtex_string = tools.parsed2Bibtex(bibtex)
print("\nThe bibtex entry for "+filename+" is:")
print(bibtex_string)
check = tools.rawInput("Is it correct? [Y/n] ")
if old_filename is not False and old_filename != bibtex['file']:
try:
print("Moving file to new location…")
shutil.move(old_filename, bibtex['file'])
except shutil.Error:
tools.warning("Unable to move file "+old_filename+" to " +
bibtex['file']+". You should check it manually.")
return bibtex
def addFile(src, filetype, manual, autoconfirm, tag, rename=True):
def parse_args():
"""
Add a file to the library
Build a parser and parse arguments of command line.
:returns: Parsed arguments from the parser.
"""
doi = False
arxiv = False
isbn = False
if not manual:
try:
if filetype == 'article' or filetype is None:
id_type, article_id = fetcher.findArticleID(src)
if id_type == "DOI":
doi = article_id
elif id_type == "arXiv":
arxiv = article_id
if filetype == 'book' or (doi is False and arxiv is False and
filetype is None):
isbn = fetcher.findISBN(src)
except KeyboardInterrupt:
doi = False
arxiv = False
isbn = False
if doi is False and isbn is False and arxiv is False:
if filetype is None:
tools.warning("Could not determine the DOI nor the arXiv id nor " +
"the ISBN for "+src+". Switching to manual entry.")
doi_arxiv_isbn = ''
while(doi_arxiv_isbn not in
['doi', 'arxiv', 'isbn', 'manual', 'skip']):
doi_arxiv_isbn = (tools.rawInput("DOI / arXiv " +
"/ ISBN / manual / skip? ").
lower())
if doi_arxiv_isbn == 'doi':
doi = tools.rawInput('DOI? ')
elif doi_arxiv_isbn == 'arxiv':
arxiv = tools.rawInput('arXiv id? ')
elif doi_arxiv_isbn == 'isbn':
isbn = tools.rawInput('ISBN? ')
elif doi_arxiv_isbn == 'skip':
return False
elif filetype == 'article':
tools.warning("Could not determine the DOI nor the arXiv id for " +
src+", switching to manual entry.")
doi_arxiv = ''
while doi_arxiv not in ['doi', 'arxiv', 'manual', 'skip']:
doi_arxiv = (tools.rawInput("DOI / arXiv / manual / skip? ").
lower())
if doi_arxiv == 'doi':
doi = tools.rawInput('DOI? ')
elif doi_arxiv == 'arxiv':
arxiv = tools.rawInput('arXiv id? ')
elif doi_arxiv == 'skip':
return False
elif filetype == 'book':
isbn_manual = ''
while isbn_manual not in ['isbn', 'manual', 'skip']:
isbn_manual = tools.rawInput("ISBN / manual / skip? ").lower()
if isbn_manual == 'isbn':
isbn = (tools.rawInput('ISBN? ').
replace(' ', '').
replace('-', ''))
elif isbn_manual == 'skip':
return False
elif doi is not False:
print("DOI for "+src+" is "+doi+".")
elif arxiv is not False:
print("ArXiv id for "+src+" is "+arxiv+".")
elif isbn is not False:
print("ISBN for "+src+" is "+isbn+".")
if doi is not False and doi != '':
# Add extra \n for bibtexparser
bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n"
elif arxiv is not False and arxiv != '':
bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n"
elif isbn is not False and isbn != '':
# Idem
bibtex = fetcher.isbn2Bib(isbn).strip()+"\n"
else:
bibtex = ''
bibtex = bibtexparser.loads(bibtex)
bibtex = bibtex.entries_dict
if len(bibtex) > 0:
bibtex_name = list(bibtex.keys())[0]
bibtex = bibtex[bibtex_name]
bibtex_string = tools.parsed2Bibtex(bibtex)
else:
bibtex_string = ''
if not autoconfirm:
bibtex = checkBibtex(src, bibtex_string)
if not autoconfirm:
tag = tools.rawInput("Tag for this paper (leave empty for default) ? ")
else:
tag = args.tag
bibtex['tag'] = tag
if rename:
new_name = backend.getNewName(src, bibtex, tag)
while os.path.exists(new_name):
tools.warning("file "+new_name+" already exists.")
default_rename = new_name.replace(tools.getExtension(new_name),
" (2)" +
tools.getExtension(new_name))
rename = tools.rawInput("New name ["+default_rename+"]? ")
if rename == '':
new_name = default_rename
else:
new_name = rename
try:
shutil.copy2(src, new_name)
except shutil.Error:
new_name = False
sys.exit("Unable to move file to library dir " +
config.get("folder")+".")
else:
new_name = src
bibtex['file'] = os.path.abspath(new_name)
# Remove first page of IOP papers
try:
if 'IOP' in bibtex['publisher'] and bibtex['ENTRYTYPE'] == 'article':
tearpages.tearpage(new_name)
except (KeyError, shutil.Error, IOError):
pass
backend.bibtexAppend(bibtex)
return new_name
def editEntry(entry, file_id='both'):
bibtex = backend.getBibtex(entry, file_id)
if bibtex is False:
tools.warning("Entry "+entry+" does not exist.")
return False
if file_id == 'file':
filename = entry
else:
filename = bibtex['file']
new_bibtex = checkBibtex(filename, tools.parsed2Bibtex(bibtex))
# Tag update
if new_bibtex['tag'] != bibtex['tag']:
print("Editing tag, moving file.")
new_name = backend.getNewName(new_bibtex['file'],
new_bibtex,
new_bibtex['tag'])
while os.path.exists(new_name):
tools.warning("file "+new_name+" already exists.")
default_rename = new_name.replace(tools.getExtension(new_name),
" (2)" +
tools.getExtension(new_name))
rename = tools.rawInput("New name ["+default_rename+"]? ")
if rename == '':
new_name = default_rename
else:
new_name = rename
new_bibtex['file'] = new_name
try:
shutil.move(bibtex['file'], new_bibtex['file'])
except shutil.Error:
tools.warning('Unable to move file '+bibtex['file']+' to ' +
new_bibtex['file'] + ' according to tag edit.')
try:
if not os.listdir(os.path.dirname(bibtex['file'])):
os.rmdir(os.path.dirname(bibtex['file']))
except OSError:
tools.warning("Unable to delete empty tag dir " +
os.path.dirname(bibtex['file']))
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
index = bibtexparser.load(fh)
index = index.entries_dict
except (TypeError, IOError):
tools.warning("Unable to open index file.")
return False
index[new_bibtex['ID']] = new_bibtex
backend.bibtexRewrite(index)
return True
def downloadFile(url, filetype, manual, autoconfirm, tag):
print('Downloading '+url)
dl, contenttype = fetcher.download(url)
if dl is not False:
print('Download finished')
tmp = tempfile.NamedTemporaryFile(suffix='.'+contenttype)
with open(tmp.name, 'wb+') as fh:
fh.write(dl)
new_name = addFile(tmp.name, filetype, manual, autoconfirm, tag)
if new_name is False:
return False
tmp.close()
return new_name
else:
tools.warning("Could not fetch "+url)
return False
def openFile(ident):
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
bibtex = bibtexparser.load(fh)
bibtex = bibtex.entries_dict
except (TypeError, IOError):
tools.warning("Unable to open index file.")
return False
if ident not in list(bibtex.keys()):
return False
else:
subprocess.Popen(['xdg-open', bibtex[ident]['file']])
return True
def resync():
diff = backend.diffFilesIndex()
if diff is False:
return False
for key in diff:
entry = diff[key]
if entry['file'] == '':
print("\nFound entry in index without associated file: " +
entry['ID'])
print("Title:\t"+entry['title'])
loop = True
while confirm:
filename = tools.rawInput("File to import for this entry " +
"(leave empty to delete the " +
"entry)? ")
if filename == '':
break
else:
if 'doi' in list(entry.keys()):
doi = fetcher.findArticleID(filename, only=["DOI"])
if doi is not False and doi != entry['doi']:
loop = tools.rawInput("Found DOI does not " +
"match bibtex entry " +
"DOI, continue anyway " +
"? [y/N]")
loop = (loop.lower() != 'y')
if 'Eprint' in list(entry.keys()):
arxiv = fetcher.findArticleID(filename, only=["arXiv"])
if arxiv is not False and arxiv != entry['Eprint']:
loop = tools.rawInput("Found arXiv id does " +
"not match bibtex " +
"entry arxiv id, " +
"continue anyway ? [y/N]")
loop = (loop.lower() != 'y')
if 'isbn' in list(entry.keys()):
isbn = fetcher.findISBN(filename)
if isbn is not False and isbn != entry['isbn']:
loop = tools.rawInput("Found ISBN does not " +
"match bibtex entry " +
"ISBN, continue anyway " +
"? [y/N]")
loop = (loop.lower() != 'y')
continue
if filename == '':
backend.deleteId(entry['ID'])
print("Deleted entry \""+entry['ID']+"\".")
else:
new_name = backend.getNewName(filename, entry)
try:
shutil.copy2(filename, new_name)
print("Imported new file "+filename+" for entry " +
entry['ID']+".")
except shutil.Error:
new_name = False
sys.exit("Unable to move file to library dir " +
config.get("folder")+".")
backend.bibtexEdit(entry['ID'], {'file': filename})
else:
print("Found file without any associated entry in index:")
print(entry['file'])
action = ''
while action.lower() not in ['import', 'delete']:
action = tools.rawInput("What to do? [import / delete] ")
action = action.lower()
if action == 'import':
tmp = tempfile.NamedTemporaryFile()
shutil.copy(entry['file'], tmp.name)
filetype = tools.getExtension(entry['file'])
try:
os.remove(entry['file'])
except OSError:
tools.warning("Unable to delete file "+entry['file'])
if not addFile(tmp.name, filetype):
tools.warning("Unable to reimport file "+entry['file'])
tmp.close()
else:
backend.deleteFile(entry['file'])
print(entry['file'] + " removed from disk and " +
"index.")
# Check for empty tag dirs
for i in os.listdir(config.get("folder")):
if os.path.isdir(i) and not os.listdir(config.get("folder") + i):
try:
os.rmdir(config.get("folder") + i)
except OSError:
tools.warning("Found empty tag dir "+config.get("folder") + i +
" but could not delete it.")
def update(entry):
update = backend.updateArXiv(entry)
if update is not False:
print("New version found for "+entry)
print("\t Title: "+update['title'])
confirm = tools.rawInput("Download it ? [Y/n] ")
if confirm.lower() == 'n':
return
new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'],
'article', False)
if new_name is not False:
print(update['eprint']+" successfully imported as "+new_name)
else:
tools.warning("An error occurred while downloading "+url)
confirm = tools.rawInput("Delete previous version ? [y/N] ")
if confirm.lower() == 'y':
if not backend.deleteId(entry):
if not backend.deleteFile(entry):
tools.warning("Unable to remove previous version.")
return
print("Previous version successfully deleted.")
def commandline_arg(bytestring):
# UTF-8 encoding for python2
if sys.version_info >= (3, 0):
unicode_string = bytestring
else:
unicode_string = bytestring.decode(sys.getfilesystemencoding())
return unicode_string
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="A bibliography " +
"management tool.")
parser = argparse.ArgumentParser(
description="A bibliography management tool.")
parser.add_argument("-c", "--config", default=None,
help="path to a custom config dir.")
subparsers = parser.add_subparsers(help="sub-command help", dest='parser')
subparsers.required = True # Fix for Python 3.3.5
parser_download = subparsers.add_parser('download', help="download help")
parser_download.add_argument('-t', '--type', default=None,
choices=['article', 'book'],
help="type of the file to download",
type=commandline_arg)
choices=['paper', 'book'],
help="type of the file to download")
parser_download.add_argument('-m', '--manual', default=False,
action='store_true',
help="disable auto-download of bibtex")
parser_download.add_argument('-y', default=False,
help="Confirm all")
parser_download.add_argument('--tag', default='',
help="Tag", type=commandline_arg)
help="Tag")
parser_download.add_argument('--keep', default=False,
help="Do not remove the file")
parser_download.add_argument('url', nargs='+',
help="url of the file to import",
type=commandline_arg)
help="url of the file to import")
parser_download.set_defaults(func='download')
parser_import = subparsers.add_parser('import', help="import help")
parser_import.add_argument('-t', '--type', default=None,
choices=['article', 'book'],
help="type of the file to import",
type=commandline_arg)
choices=['paper', 'book'],
help="type of the file to import")
parser_import.add_argument('-m', '--manual', default=False,
action='store_true',
help="disable auto-download of bibtex")
parser_import.add_argument('-y', default=False,
help="Confirm all")
parser_import.add_argument('--tag', default='', help="Tag",
type=commandline_arg)
parser_import.add_argument('--tag', default='', help="Tag")
parser_import.add_argument('--in-place', default=False,
dest="inplace", action='store_true',
help="Leave the imported file in place",)
parser_import.add_argument('file', nargs='+',
help="path to the file to import",
type=commandline_arg)
help="path to the file to import")
parser_import.add_argument('--skip', nargs='+',
help="path to files to skip", default=[],
type=commandline_arg)
help="path to files to skip", default=[])
parser_import.set_defaults(func='import')
parser_delete = subparsers.add_parser('delete', help="delete help")
parser_delete.add_argument('entries', metavar='entry', nargs='+',
help="a filename or an identifier",
type=commandline_arg)
help="a filename or an identifier")
parser_delete.add_argument('--skip', nargs='+',
help="path to files to skip", default=[],
type=commandline_arg)
help="path to files to skip", default=[])
group = parser_delete.add_mutually_exclusive_group()
group.add_argument('--id', action="store_true", default=False,
help="id based deletion")
@ -514,11 +81,9 @@ if __name__ == '__main__':
parser_edit = subparsers.add_parser('edit', help="edit help")
parser_edit.add_argument('entries', metavar='entry', nargs='+',
help="a filename or an identifier",
type=commandline_arg)
help="a filename or an identifier")
parser_edit.add_argument('--skip', nargs='+',
help="path to files to skip", default=[],
type=commandline_arg)
help="path to files to skip", default=[])
group = parser_edit.add_mutually_exclusive_group()
group.add_argument('--id', action="store_true", default=False,
help="id based deletion")
@ -529,19 +94,14 @@ if __name__ == '__main__':
parser_list = subparsers.add_parser('list', help="list help")
parser_list.set_defaults(func='list')
parser_search = subparsers.add_parser('search', help="search help")
parser_search.set_defaults(func='search')
parser_open = subparsers.add_parser('open', help="open help")
parser_open.add_argument('ids', metavar='id', nargs='+',
help="an identifier",
type=commandline_arg)
help="an identifier")
parser_open.set_defaults(func='open')
parser_export = subparsers.add_parser('export', help="export help")
parser_export.add_argument('ids', metavar='id', nargs='+',
help="an identifier",
type=commandline_arg)
help="an identifier")
parser_export.set_defaults(func='export')
parser_resync = subparsers.add_parser('resync', help="resync help")
@ -549,131 +109,133 @@ if __name__ == '__main__':
parser_update = subparsers.add_parser('update', help="update help")
parser_update.add_argument('--entries', metavar='entry', nargs='+',
help="a filename or an identifier",
type=commandline_arg)
help="a filename or an identifier")
parser_update.set_defaults(func='update')
parser_search = subparsers.add_parser('search', help="search help")
parser_search.add_argument('query', metavar='entry', nargs='+',
help="your query, see README for more info.",
type=commandline_arg)
parser_search.set_defaults(func='search')
return parser.parse_args()
args = parser.parse_args()
try:
if args.func == 'download':
skipped = []
for url in args.url:
new_name = downloadFile(url, args.type, args.manual, args.y,
args.tag)
if new_name is not False:
print(url+" successfully imported as "+new_name)
else:
tools.warning("An error occurred while downloading "+url)
skipped.append(url)
if len(skipped) > 0:
print("\nSkipped files:")
for i in skipped:
print(i)
sys.exit()
if args.func == 'import':
skipped = []
for filename in list(set(args.file) - set(args.skip)):
new_name = addFile(filename, args.type, args.manual, args.y,
args.tag, not args.inplace)
if new_name is not False:
print(filename+" successfully imported as " +
new_name+".")
else:
tools.warning("An error occurred while importing " +
filename)
skipped.append(filename)
if len(skipped) > 0:
print("\nSkipped files:")
for i in skipped:
print(i)
sys.exit()
def main():
"""
Main function.
"""
global config
elif args.func == 'delete':
skipped = []
for filename in list(set(args.entries) - set(args.skip)):
if not args.force:
confirm = tools.rawInput("Are you sure you want to " +
"delete "+filename+" ? [y/N] ")
else:
confirm = 'y'
# Parse arguments
args = parse_args()
if confirm.lower() == 'y':
if args.file or not backend.deleteId(filename, args.keep):
if(args.id or
not backend.deleteFile(filename, args.keep)):
tools.warning("Unable to delete "+filename)
sys.exit(1)
# Load the custom config if needed
if args.config is not None:
config = Config(base_config_path=args.config)
print(filename+" successfully deleted.")
else:
skipped.append(filename)
if len(skipped) > 0:
print("\nSkipped files:")
for i in skipped:
print(i)
sys.exit()
elif args.func == 'edit':
for filename in list(set(args.entries) - set(args.skip)):
if args.file:
file_id = 'file'
elif args.id:
file_id = 'id'
else:
file_id = 'both'
editEntry(filename, file_id)
sys.exit()
elif args.func == 'list':
listPapers = backend.getEntries(full=True)
if not listPapers:
sys.exit()
listPapers = [v["file"] for k, v in listPapers.items()]
listPapers.sort()
for paper in listPapers:
print(paper)
sys.exit()
elif args.func == 'search':
raise Exception('TODO')
elif args.func == 'open':
for filename in args.ids:
if not openFile(filename):
sys.exit("Unable to open file associated " +
"to ident "+filename)
sys.exit()
elif args.func == 'export':
bibtex = ''
for id in args.ids:
bibtex += tools.parsed2Bibtex(backend.getBibtex(id,
clean=True))
print(bibtex.strip())
sys.exit
elif args.func == 'resync':
confirm = tools.rawInput("Resync files and bibtex index? [y/N] ")
if confirm.lower() == 'y':
resync()
sys.exit()
elif args.func == 'update':
if args.entries is None:
entries = backend.getEntries()
# Download command
if args.func == 'download':
skipped = []
for url in args.url:
# Try to download the URL
new_name = commands.download(url, args.type, args.manual, args.y,
args.tag)
if new_name is not None:
print("%s successfully imported as %s." % (url, new_name))
else:
entries = args.entries
for entry in entries:
update(entry)
sys.exit()
tools.warning("An error occurred while downloading %s." %
(url,))
skipped.append(url)
# Output URLs with errors
if len(skipped) > 0:
tools.warning("Skipped URLs:")
for i in skipped:
tools.warning(i)
# Import command
elif args.func == 'import':
skipped = []
# Handle exclusions
files_to_process = list(set(args.file) - set(args.skip))
for filename in files_to_process:
# Try to import the file
new_name = commands.import_file(filename, args.type,
args.manual, args.y,
args.tag, not args.inplace)
if new_name is not None:
print("%s successfully imported as %s." % (filename, new_name))
else:
tools.warning("An error occurred while importing %s." %
(filename,))
skipped.append(filename)
# Output files with errors
if len(skipped) > 0:
tools.warning("Skipped files:")
for i in skipped:
tools.warning(i)
# Delete command
elif args.func == 'delete':
skipped = []
# Handle exclusions
items_to_process = list(set(args.entries) - set(args.skip))
for item in items_to_process:
# Confirm before deletion
if not args.force:
confirm = input("Are you sure you want to delete %s? [y/N] " %
(item,))
else:
confirm = 'y'
# Try to delete the item
if confirm.lower() == 'y':
file_or_id = "id" if args.id else "file" if args.file else None
commands.delete(item, args.keep, file_or_id)
print("%s successfully deleted." % (item,))
else:
skipped.append(item)
# Output items with errors
if len(skipped) > 0:
tools.warning("Skipped items:")
for i in skipped:
tools.warning(i)
# Edit command
elif args.func == 'edit':
# Handle exclusions
items_to_process = list(set(args.entries) - set(args.skip))
for item in items_to_process:
file_or_id = "id" if args.id else "file" if args.file else None
commands.edit(item, file_or_id)
# List command
elif args.func == 'list':
# List all available items
for id, file in commands.list().items():
# And print them as "identifier: file"
print("%s: %s" % (id, file))
# Open command
elif args.func == 'open':
# Open each entry
for id in args.ids:
if commands.open(id) is None:
# And warn the user about missing files or errors
tools.warning("Unable to open file associated with ident %s." %
(id,))
# Export command
elif args.func == 'export':
print(commands.export(args.ids))
# Resync command
elif args.func == 'resync':
confirm = input("Resync files and bibtex index? [y/N] ")
if confirm.lower() == 'y':
commands.resync()
# Update command
elif args.func == 'update':
commands.update(args.entries)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.exit()

View File

@ -1,2 +0,0 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-

View File

@ -1,336 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import unicode_literals
import os
import re
import libbmc.tools as tools
import libbmc.fetcher as fetcher
import bibtexparser
from libbmc.config import Config
from codecs import open
config = Config()
def getNewName(src, bibtex, tag='', override_format=None):
"""
Return the formatted name according to config for the given
bibtex entry
"""
authors = re.split(' and ', bibtex['author'])
if bibtex['ENTRYTYPE'] == 'article':
if override_format is None:
new_name = config.get("format_articles")
else:
new_name = override_format
try:
new_name = new_name.replace("%j", bibtex['journal'])
except KeyError:
pass
elif bibtex['ENTRYTYPE'] == 'book':
if override_format is None:
new_name = config.get("format_books")
else:
new_name = override_format
new_name = new_name.replace("%t", bibtex['title'])
try:
new_name = new_name.replace("%Y", bibtex['year'])
except KeyError:
pass
new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
for i in authors]))
if('archiveprefix' in bibtex and
'arXiv' in bibtex['archiveprefix']):
new_name = new_name.replace("%v",
'-' +
bibtex['eprint'][bibtex['eprint'].
rfind('v'):])
else:
new_name = new_name.replace("%v", '')
for custom in config.get("format_custom"):
new_name = custom(new_name)
if tag == '':
new_name = (config.get("folder") + tools.slugify(new_name) +
tools.getExtension(src))
else:
if not os.path.isdir(config.get("folder") + tag):
try:
os.mkdir(config.get("folder") + tag)
except OSError:
tools.warning("Unable to create tag dir " +
config.get("folder")+tag+".")
new_name = (config.get("folder") + tools.slugify(tag) + '/' +
tools.slugify(new_name) + tools.getExtension(src))
return new_name
def bibtexAppend(data):
"""Append data to the main bibtex file
data is a dict for one entry in bibtex, as the one from bibtexparser output
"""
try:
with open(config.get("folder")+'index.bib', 'a', encoding='utf-8') \
as fh:
fh.write(tools.parsed2Bibtex(data)+"\n")
except IOError as e:
raise e
tools.warning("Unable to open index file.")
return False
def bibtexEdit(ident, modifs):
"""Update ident key in bibtex file, modifications are in modifs dict"""
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
bibtex = bibtexparser.load(fh)
bibtex = bibtex.entries_dict
except (IOError, TypeError):
tools.warning("Unable to open index file.")
return False
for key in modifs.keys():
bibtex[ident][key] = modifs[key]
bibtexRewrite(bibtex)
def bibtexRewrite(data):
"""Rewrite the bibtex index file.
data is a dict of bibtex entry dict.
"""
bibtex = ''
for entry in data.keys():
bibtex += tools.parsed2Bibtex(data[entry])+"\n"
try:
with open(config.get("folder")+'index.bib', 'w', encoding='utf-8') \
as fh:
fh.write(bibtex)
except (IOError, TypeError):
tools.warning("Unable to open index file.")
return False
def deleteId(ident, keep=False):
"""Delete a file based on its id in the bibtex file"""
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
bibtex = bibtexparser.load(fh)
bibtex = bibtex.entries_dict
except (IOError, TypeError):
tools.warning("Unable to open index file.")
return False
if ident not in bibtex.keys():
return False
if not keep:
try:
os.remove(bibtex[ident]['file'])
except (KeyError, OSError):
tools.warning("Unable to delete file associated to id " + ident +
" : " + bibtex[ident]['file'])
try:
if not os.listdir(os.path.dirname(bibtex[ident]['file'])):
os.rmdir(os.path.dirname(bibtex[ident]['file']))
except (KeyError, OSError):
tools.warning("Unable to delete empty tag dir " +
os.path.dirname(bibtex[ident]['file']))
try:
del(bibtex[ident])
bibtexRewrite(bibtex)
except KeyError:
tools.warning("No associated bibtex entry in index for file " +
bibtex[ident]['file'])
return True
def deleteFile(filename, keep=False):
"""Delete a file based on its filename"""
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
bibtex = bibtexparser.load(fh)
bibtex = bibtex.entries_dict
except (TypeError, IOError):
tools.warning("Unable to open index file.")
return False
found = False
for key in list(bibtex.keys()):
try:
if os.path.samefile(bibtex[key]['file'], filename):
found = True
if not keep:
try:
os.remove(bibtex[key]['file'])
except (KeyError, OSError):
tools.warning("Unable to delete file associated " +
"to id " + key+" : "+bibtex[key]['file'])
try:
if not os.listdir(os.path.dirname(filename)):
os.rmdir(os.path.dirname(filename))
except OSError:
tools.warning("Unable to delete empty tag dir " +
os.path.dirname(filename))
try:
del(bibtex[key])
except KeyError:
tools.warning("No associated bibtex entry in index for " +
"file " + bibtex[key]['file'])
except (KeyError, OSError):
pass
if found:
bibtexRewrite(bibtex)
elif os.path.isfile(filename):
os.remove(filename)
return found
def diffFilesIndex():
"""Compute differences between Bibtex index and PDF files
Returns a dict with bibtex entry:
* full bibtex entry with file='' if file is not found
* only file entry if file with missing bibtex entry
"""
files = tools.listDir(config.get("folder"))
files = [i for i in files if tools.getExtension(i) in ['.pdf', '.djvu']]
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
index = bibtexparser.load(fh)
index_diff = index.entries_dict
except (TypeError, IOError):
tools.warning("Unable to open index file.")
return False
for key in index_diff.keys():
if index_diff[key]['file'] not in files:
index_diff[key]['file'] = ''
else:
files.remove(index_diff[key]['file'])
for filename in files:
index_diff[filename] = {'file': filename}
return index.entries_dict
def getBibtex(entry, file_id='both', clean=False):
"""Returns the bibtex entry corresponding to entry, as a dict
entry is either a filename or a bibtex ident
file_id is file or id or both to search for a file / id / both
clean is to clean the ignored fields specified in config
"""
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
bibtex = bibtexparser.load(fh)
bibtex = bibtex.entries_dict
except (TypeError, IOError):
tools.warning("Unable to open index file.")
return False
bibtex_entry = False
if file_id == 'both' or file_id == 'id':
try:
bibtex_entry = bibtex[entry]
except KeyError:
pass
if file_id == 'both' or file_id == 'file':
if os.path.isfile(entry):
for key in bibtex.keys():
if os.path.samefile(bibtex[key]['file'], entry):
bibtex_entry = bibtex[key]
break
if clean:
for field in config.get("ignore_fields"):
try:
del(bibtex_entry[field])
except KeyError:
pass
return bibtex_entry
def getEntries(full=False):
"""Returns the list of all entries in the bibtex index"""
try:
with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
as fh:
bibtex = bibtexparser.load(fh)
bibtex = bibtex.entries_dict
except (TypeError, IOError):
tools.warning("Unable to open index file.")
return False
if full:
return bibtex
else:
return list(bibtex.keys())
def updateArXiv(entry):
"""Look for new versions of arXiv entry `entry`
Returns False if no new versions or not an arXiv entry,
Returns the new bibtex otherwise.
"""
bibtex = getBibtex(entry)
# Check arXiv
if('archiveprefix' not in bibtex or
'arXiv' not in bibtex['archiveprefix']):
return False
arxiv_id = bibtex['eprint']
arxiv_id_no_v = re.sub(r'v\d+\Z', '', arxiv_id)
ids = set(arxiv_id)
for entry in getEntries():
if('archiveprefix' not in bibtex or
'arXiv' not in bibtex['archiveprefix']):
continue
ids.add(bibtex['eprint'])
last_bibtex = bibtexparser.loads(fetcher.arXiv2Bib(arxiv_id_no_v))
last_bibtex = last_bibtex.entries_dict
last_bibtex = last_bibtex[list(last_bibtex.keys())[0]]
if last_bibtex['eprint'] not in ids:
return last_bibtex
else:
return False
def search(query):
"""Performs a search in the bibtex index.
Param: query is a dict of keys and the query for these keys
"""
raise Exception('TODO')

View File

@ -1,359 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
import isbnlib
import re
import socket
import socks
import subprocess
import sys
try:
# For Python 3.0 and later
from urllib.request import urlopen, Request
from urllib.error import URLError
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen, Request, URLError
import arxiv2bib as arxiv_metadata
import libbmc.tools as tools
import bibtexparser
from libbmc.config import Config
config = Config()
default_socket = socket.socket
try:
stdout_encoding = sys.stdout.encoding
assert(stdout_encoding is not None)
except (AttributeError, AssertionError):
stdout_encoding = 'UTF-8'
def download(url):
"""Download url tofile
Check that it is a valid pdf or djvu file. Tries all the
available proxies sequentially. Returns the raw content of the file, or
false if it could not be downloaded.
"""
for proxy in config.get("proxies"):
if proxy.startswith('socks'):
if proxy[5] == '4':
proxy_type = socks.SOCKS4
else:
proxy_type = socks.SOCKS5
proxy = proxy[proxy.find('://')+3:]
try:
proxy, port = proxy.split(':')
except ValueError:
port = None
socks.set_default_proxy(proxy_type, proxy, port)
socket.socket = socks.socksocket
elif proxy == '':
socket.socket = default_socket
else:
try:
proxy, port = proxy.split(':')
except ValueError:
port = None
socks.set_default_proxy(socks.HTTP, proxy, port)
socket.socket = socks.socksocket
try:
r = urlopen(url)
try:
size = int(dict(r.info())['content-length'].strip())
except KeyError:
try:
size = int(dict(r.info())['Content-Length'].strip())
except KeyError:
size = 0
dl = b""
dl_size = 0
while True:
buf = r.read(1024)
if buf:
dl += buf
dl_size += len(buf)
if size != 0:
done = int(50 * dl_size / size)
sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done)))
sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
sys.stdout.flush()
else:
break
contenttype = False
contenttype_req = None
try:
contenttype_req = dict(r.info())['content-type']
except KeyError:
try:
contenttype_req = dict(r.info())['Content-Type']
except KeyError:
continue
try:
if 'pdf' in contenttype_req:
contenttype = 'pdf'
elif 'djvu' in contenttype_req:
contenttype = 'djvu'
except KeyError:
pass
if r.getcode() != 200 or contenttype is False:
continue
return dl, contenttype
except ValueError:
tools.warning("Invalid URL")
return False, None
except (URLError, socket.error):
if proxy != "":
proxy_txt = "using proxy "+proxy
else:
proxy_txt = "without using any proxy"
tools.warning("Unable to get "+url+" "+proxy_txt+". It " +
"may not be available at the moment.")
continue
return False, None
isbn_re = re.compile(r'isbn[\s]?:?[\s]?((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
re.IGNORECASE)
def findISBN(src):
"""Search for a valid ISBN in src.
Returns the ISBN or false if not found or an error occurred."""
if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=1)
elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=1)
else:
return False
while totext.poll() is None:
extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
extractISBN = isbn_re.search(extractfull.lower().replace('&#338;',
'-'))
if extractISBN:
totext.terminate()
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened
tools.warning(err)
return False
cleanISBN = False
# Clean ISBN is the ISBN number without separators
if extractISBN:
cleanISBN = extractISBN.group(1).replace('-', '').replace(' ', '')
return cleanISBN
def isbn2Bib(isbn):
"""Tries to get bibtex entry from an ISBN number"""
# Default merges results from worldcat.org and google books
try:
return isbnlib.registry.bibformatters['bibtex'](isbnlib.meta(isbn,
'default'))
except (isbnlib.ISBNLibException, TypeError):
return ''
doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
doi_pnas_re = re.compile('(?<=doi).?10.1073/pnas\.\d+', re.IGNORECASE)
doi_jsb_re = re.compile('10\.1083/jcb\.\d{9}', re.IGNORECASE)
clean_doi_re = re.compile('^/')
clean_doi_fabse_re = re.compile('^10.1096')
clean_doi_jcb_re = re.compile('^10.1083')
clean_doi_len_re = re.compile(r'\d\.\d')
arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)', re.IGNORECASE)
def findArticleID(src, only=["DOI", "arXiv"]):
"""Search for a valid article ID (DOI or ArXiv) in src.
Returns a tuple (type, first matching ID) or False if not found
or an error occurred.
From : http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/
and https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb
"""
if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
else:
return (False, False)
extractfull = ''
extract_type = False
extractID = None
while totext.poll() is None:
extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
# Try to extract DOI
if "DOI" in only:
extractlower = extractfull.lower().replace('digital object identifier', 'doi')
extractID = doi_re.search(extractlower.replace('&#338;', '-'))
if not extractID:
# PNAS fix
extractID = doi_pnas_re.search(extractlower.replace('pnas', '/pnas'))
if not extractID:
# JSB fix
extractID = doi_jsb_re.search(extractlower)
if extractID:
extract_type = "DOI"
totext.terminate()
# Try to extract arXiv
if "arXiv" in only:
tmp_extractID = arXiv_re.search(extractfull)
if tmp_extractID:
if not extractID or extractID.start(0) > tmp_extractID.start(1):
# Only use arXiv id if it is before the DOI in the pdf
extractID = tmp_extractID
extract_type = "arXiv"
totext.terminate()
if extract_type is not False:
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened
tools.warning(err)
return (False, False)
if extractID is not None and extract_type == "DOI":
# If DOI extracted, clean it and return it
cleanDOI = False
cleanDOI = extractID.group(0).replace(':', '').replace(' ', '')
if clean_doi_re.search(cleanDOI):
cleanDOI = cleanDOI[1:]
# FABSE J fix
if clean_doi_fabse_re.search(cleanDOI):
cleanDOI = cleanDOI[:20]
# Second JCB fix
if clean_doi_jcb_re.search(cleanDOI):
cleanDOI = cleanDOI[:21]
if len(cleanDOI) > 40:
cleanDOItemp = clean_doi_len_re.sub('000', cleanDOI)
reps = {'.': 'A', '-': '0'}
cleanDOItemp = tools.replaceAll(cleanDOItemp[8:], reps)
digitStart = 0
for i in range(len(cleanDOItemp)):
if cleanDOItemp[i].isdigit():
digitStart = 1
if cleanDOItemp[i].isalpha() and digitStart:
break
cleanDOI = cleanDOI[0:(8+i)]
return ("DOI", cleanDOI)
elif extractID is not None and extract_type == "arXiv":
# If arXiv id is extracted, return it
return ("arXiv", extractID.group(1))
return (False, False)
def doi2Bib(doi):
"""Returns a bibTeX string of metadata for a given DOI.
From : https://gist.github.com/jrsmith3/5513926
"""
url = "http://dx.doi.org/" + doi
headers = {"accept": "application/x-bibtex"}
req = Request(url, headers=headers)
try:
r = urlopen(req)
try:
if dict(r.info())['content-type'] == 'application/x-bibtex':
return r.read().decode('utf-8')
else:
return ''
except KeyError:
try:
if dict(r.info())['Content-Type'] == 'application/x-bibtex':
return r.read().decode('utf-8')
else:
return ''
except KeyError:
return ''
except:
tools.warning('Unable to contact remote server to get the bibtex ' +
'entry for doi '+doi)
return ''
def arXiv2Bib(arxiv):
"""Returns bibTeX string of metadata for a given arXiv id
arxiv is an arxiv id
"""
bibtex = arxiv_metadata.arxiv2bib([arxiv])
for bib in bibtex:
if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
continue
else:
fetched_bibtex = bibtexparser.loads(bib.bibtex())
fetched_bibtex = fetched_bibtex.entries_dict
fetched_bibtex = fetched_bibtex[list(fetched_bibtex.keys())[0]]
try:
del(fetched_bibtex['file'])
except KeyError:
pass
return tools.parsed2Bibtex(fetched_bibtex)
return ''
HAL_re = re.compile(r'(hal-\d{8}), version (\d+)')
def findHALId(src):
"""Searches for a valid HAL id in src
Returns a tuple of the HAL id and the version
or False if not found or an error occurred.
"""
if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
else:
return False
while totext.poll() is None:
extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
extractID = HAL_re.search(extractfull)
if extractID:
totext.terminate()
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened
tools.warning(err)
return False
else:
return extractID.group(1), extractID.group(2)

View File

@ -1,298 +0,0 @@
# -*- coding: utf8 -*-
"""Search query parser
Modified by Phyks, 2014-05-18. Original source code is here:
http://pyparsing.wikispaces.com/file/view/searchparser.py/30112816/searchparser.py
version 2006-03-09
This search query parser uses the excellent Pyparsing module
(http://pyparsing.sourceforge.net/) to parse search queries by users.
It handles:
* 'and', 'or' and implicit 'and' operators;
* parentheses;
* quoted strings;
* wildcards at the end of a search term (help*);
Requirements:
* Python
* Pyparsing
If you run this script, it will perform a number of tests. To use is as a
module, you should use inheritance on the SearchQueryParser class and overwrite
the Get... methods. The ParserTest class gives a very simple example of how this
could work.
-------------------------------------------------------------------------------
Copyright (c) 2006, Estrate, the Netherlands
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Estrate nor the names of its contributors may be used
to endorse or promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
CONTRIBUTORS:
- Steven Mooij
- Rudolph Froger
- Paul McGuire
"""
from pyparsing import Word, alphanums, Keyword, Group, Combine, Forward
from pyparsing import Suppress, Optional, OneOrMore, oneOf, Or
class SearchQueryParser:
def __init__(self):
self._methods = {
'and': self.evaluateAnd,
'or': self.evaluateOr,
'not': self.evaluateNot,
'parenthesis': self.evaluateParenthesis,
'quotes': self.evaluateQuotes,
'word': self.evaluateWord,
'wordwildcard': self.evaluateWordWildcard,
}
self._parser = self.parser()
def parser(self):
"""
This function returns a parser.
The grammar should be like most full text search engines (Google, Tsearch, Lucene).
Grammar:
- a query consists of alphanumeric words, with an optional '*' wildcard
at the end of a word
- a sequence of words between quotes is a literal string
- words can be used together by using operators ('and' or 'or')
- words with operators can be grouped with parenthesis
- a word or group of words can be preceded by a 'not' operator
- the 'and' operator precedes an 'or' operator
- if an operator is missing, use an 'and' operator
"""
operatorOr = Forward()
operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \
Group(Word(alphanums)).setResultsName('word')
operatorQuotesContent = Forward()
operatorQuotesContent << (
(operatorWord + operatorQuotesContent) | operatorWord
)
operatorQuotes = Group(
Or([Suppress('"') + operatorQuotesContent + Suppress('"'),
Suppress('\'') + operatorQuotesContent + Suppress('\'')]
)).setResultsName("quotes") | operatorWord
operatorParenthesis = Group(
(Suppress("(") + operatorOr + Suppress(")"))
).setResultsName("parenthesis") | operatorQuotes
operatorNot = Forward()
operatorNot << (Group(
Suppress(Keyword("not", caseless=True)) + operatorNot
).setResultsName("not") | operatorParenthesis)
operatorAnd = Forward()
operatorAnd << (Group(
operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd
).setResultsName("and") | Group(
operatorNot + OneOrMore(~oneOf("and or") + operatorAnd)
).setResultsName("and") | operatorNot)
operatorOr << (Group(
operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
).setResultsName("or") | operatorAnd)
operatorQ = Forward()
operatorQ << Group(operatorOr + Suppress('=') +
operatorOr).setResultsName('field')
return operatorQ.parseString
def evaluateAnd(self, argument):
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
def evaluateOr(self, argument):
return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
def evaluateNot(self, argument):
return self.GetNot(self.evaluate(argument[0]))
def evaluateParenthesis(self, argument):
return self.evaluate(argument[0])
def evaluateQuotes(self, argument):
"""Evaluate quoted strings
First is does an 'and' on the indidual search terms, then it asks the
function GetQuoted to only return the subset of ID's that contain the
literal string.
"""
r = set()
search_terms = []
for item in argument:
search_terms.append(item[0])
if len(r) == 0:
r = self.evaluate(item)
else:
r = r.intersection(self.evaluate(item))
return self.GetQuotes(' '.join(search_terms), r)
def evaluateWord(self, argument):
return self.GetWord(argument[0])
def evaluateWordWildcard(self, argument):
return self.GetWordWildcard(argument[0])
def evaluate(self, argument):
return self._methods[argument.getName()](argument)
def Parse(self, query):
#print(self._parser(query)[0])
return self.evaluate(self._parser(query)[0])
def GetWord(self, word):
return set()
def GetWordWildcard(self, word):
return set()
def GetQuotes(self, search_string, tmp_result):
return set()
def GetNot(self, not_set):
return set().difference(not_set)
class ParserTest(SearchQueryParser):
"""Tests the parser with some search queries
tests containts a dictionary with tests and expected results.
"""
tests = {
'help': set([1, 2, 4, 5]),
'help or hulp': set([1, 2, 3, 4, 5]),
'help and hulp': set([2]),
'help hulp': set([2]),
'help and hulp or hilp': set([2, 3, 4]),
'help or hulp and hilp': set([1, 2, 3, 4, 5]),
'help or hulp or hilp or halp': set([1, 2, 3, 4, 5, 6]),
'(help or hulp) and (hilp or halp)': set([3, 4, 5]),
'help and (hilp or halp)': set([4, 5]),
'(help and (hilp or halp)) or hulp': set([2, 3, 4, 5]),
'not help': set([3, 6, 7, 8]),
'not hulp and halp': set([5, 6]),
'not (help and halp)': set([1, 2, 3, 4, 6, 7, 8]),
'"help me please"': set([2]),
'"help me please" or hulp': set([2, 3]),
'"help me please" or (hulp and halp)': set([2]),
'help*': set([1, 2, 4, 5, 8]),
'help or hulp*': set([1, 2, 3, 4, 5]),
'help* and hulp': set([2]),
'help and hulp* or hilp': set([2, 3, 4]),
'help* or hulp or hilp or halp': set([1, 2, 3, 4, 5, 6, 8]),
'(help or hulp*) and (hilp* or halp)': set([3, 4, 5]),
'help* and (hilp* or halp*)': set([4, 5]),
'(help and (hilp* or halp)) or hulp*': set([2, 3, 4, 5]),
'not help* and halp': set([6]),
'not (help* and helpe*)': set([1, 2, 3, 4, 5, 6, 7]),
'"help* me please"': set([2]),
'"help* me* please" or hulp*': set([2, 3]),
'"help me please*" or (hulp and halp)': set([2]),
'"help me please" not (hulp and halp)': set([2]),
'"help me please" hulp': set([2]),
'\'help me please\' hulp': set([2]),
'help and hilp and not holp': set([4]),
'help hilp not holp': set([4]),
'help hilp and not holp': set([4]),
}
docs = {
1: 'help',
2: 'help me please hulp',
3: 'hulp hilp',
4: 'help hilp',
5: 'halp thinks he needs help',
6: 'he needs halp',
7: 'nothing',
8: 'helper',
}
index = {
'help': set((1, 2, 4, 5)),
'me': set((2,)),
'please': set((2,)),
'hulp': set((2, 3,)),
'hilp': set((3, 4,)),
'halp': set((5, 6,)),
'thinks': set((5,)),
'he': set((5, 6,)),
'needs': set((5, 6,)),
'nothing': set((7,)),
'helper': set((8,)),
}
def GetWord(self, word):
if (self.index.has_key(word)):
return self.index[word]
else:
return set()
def GetWordWildcard(self, word):
result = set()
for item in self.index.keys():
if word == item[0:len(word)]:
result = result.union(self.index[item])
return result
def GetQuotes(self, search_string, tmp_result):
result = set()
for item in tmp_result:
if self.docs[item].count(search_string):
result.add(item)
return result
def GetNot(self, not_set):
all = set(self.docs.keys())
return all.difference(not_set)
def Test(self):
all_ok = True
for item in self.tests.keys():
print(item)
r = self.Parse(item)
e = self.tests[item]
print('Result: %s' % r)
print('Expect: %s' % e)
if e == r:
print('Test OK')
else:
all_ok = False
print('>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<')
print('')
return all_ok
if __name__=='__main__':
if ParserTest().Test():
print('All tests OK')
else:
print('One or more tests FAILED')

View File

@ -1,57 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Francois Boulogne
import shutil
import tempfile
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.utils import PdfReadError
def _fixPdf(pdfFile, destination):
"""
Fix malformed pdf files when data are present after '%%EOF'
:param pdfFile: PDF filepath
:param destination: destination
"""
tmp = tempfile.NamedTemporaryFile()
output = open(tmp.name, 'wb')
with open(pdfFile, "rb") as fh:
with open(pdfFile, "rb") as fh:
for line in fh:
output.write(line)
if b'%%EOF' in line:
break
output.close()
shutil.copy(tmp.name, destination)
def tearpage(filename, startpage=1):
"""
Copy filename to a tempfile, write pages startpage..N to filename.
:param filename: PDF filepath
:param startpage: page number for the new first page
"""
# Copy the pdf to a tmp file
tmp = tempfile.NamedTemporaryFile()
shutil.copy(filename, tmp.name)
# Read the copied pdf
try:
input_file = PdfFileReader(open(tmp.name, 'rb'))
except PdfReadError:
_fixPdf(filename, tmp.name)
input_file = PdfFileReader(open(tmp.name, 'rb'))
# Seek for the number of pages
num_pages = input_file.getNumPages()
# Write pages excepted the first one
output_file = PdfFileWriter()
for i in range(startpage, num_pages):
output_file.addPage(input_file.getPage(i))
tmp.close()
outputStream = open(filename, "wb")
output_file.write(outputStream)

View File

@ -1,21 +0,0 @@
@article{1303.3130v1,
abstract={We study the role of the dipolar interaction, correctly accounting for the
Dipolar-Induced Resonance (DIR), in a quasi-one-dimensional system of ultracold
bosons. We first show how the DIR affects the lowest-energy states of two
particles in a harmonic trap. Then, we consider a deep optical lattice loaded
with ultracold dipolar bosons. We describe this many-body system using an
atom-dimer extended Bose-Hubbard model. We analyze the impact of the DIR on the
phase diagram at T=0 by exact diagonalization of a small-sized system. In
particular, the resonance strongly modifies the range of parameters for which a
mass density wave should occur.},
archiveprefix={arXiv},
author={N. Bartolo and D. J. Papoular and L. Barbiero and C. Menotti and A. Recati},
eprint={1303.3130v1},
link={http://arxiv.org/abs/1303.3130v1},
month={Mar},
primaryclass={cond-mat.quant-gas},
title={Dipolar-Induced Resonance for Ultracold Bosons in a Quasi-1D Optical
Lattice},
year={2013},
}

View File

@ -1,12 +0,0 @@
@article{Hou_2013,
doi = {10.1103/physreva.88.043630},
url = {http://dx.doi.org/10.1103/physreva.88.043630},
year = 2013,
month = {oct},
publisher = {American Physical Society ({APS})},
volume = {88},
number = {4},
author = {Yan-Hua Hou and Lev P. Pitaevskii and Sandro Stringari},
title = {First and second sound in a highly elongated Fermi gas at unitarity},
journal = {Phys. Rev. A}
}

View File

@ -1,7 +0,0 @@
@book{9780198507192,
title = {Bose-Einstein Condensation},
author = {Lev. P. Pitaevskii and S. Stringari},
isbn = {9780198507192},
year = {2004},
publisher = {Clarendon Press}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,160 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import unicode_literals
import unittest
from libbmc.backend import *
import bibtexparser
import os
import shutil
import tempfile
class TestFetcher(unittest.TestCase):
def setUp(self):
config.set("folder", tempfile.mkdtemp()+"/")
self.bibtex_article_string = """
@article{1303.3130v1,
abstract={We study the role of the dipolar interaction, correctly accounting for the
Dipolar-Induced Resonance (DIR), in a quasi-one-dimensional system of ultracold
bosons. We first show how the DIR affects the lowest-energy states of two
particles in a harmonic trap. Then, we consider a deep optical lattice loaded
with ultracold dipolar bosons. We describe this many-body system using an
atom-dimer extended Bose-Hubbard model. We analyze the impact of the DIR on the
phase diagram at T=0 by exact diagonalization of a small-sized system. In
particular, the resonance strongly modifies the range of parameters for which a
mass density wave should occur.},
archiveprefix={arXiv},
author={N. Bartolo and D. J. Papoular and L. Barbiero and C. Menotti and A. Recati},
eprint={1303.3130v1},
file={%sN_Bartolo_A_Recati-j-2013.pdf},
link={http://arxiv.org/abs/1303.3130v1},
month={Mar},
primaryclass={cond-mat.quant-gas},
tag={},
title={Dipolar-Induced Resonance for Ultracold Bosons in a Quasi-1D Optical
Lattice},
year={2013},
}""" % config.get("folder")
self.bibtex_article = bibtexparser.loads(self.bibtex_article_string).entries_dict
self.bibtex_article = self.bibtex_article[list(self.bibtex_article.keys())[0]]
self.bibtex_book_string = """
@book{9780521846516,
author={C. J. Pethick and H. Smith},
isbn={9780521846516},
publisher={Cambridge University Press},
title={Bose-Einstein Condensation In Dilute Gases},
year={2008},
}
"""
self.bibtex_book = bibtexparser.loads(self.bibtex_book_string).entries_dict
self.bibtex_book = self.bibtex_book[list(self.bibtex_book.keys())[0]]
def test_getNewName_article(self):
self.assertEqual(getNewName("test.pdf", self.bibtex_article),
config.get("folder")+"N_Bartolo_A_Recati-j-2013-v1.pdf")
def test_getNewName_article_override(self):
self.assertEqual(getNewName("test.pdf", self.bibtex_article, override_format="%f"),
config.get("folder")+"N_Bartolo.pdf")
def test_getNewName_book(self):
self.assertEqual(getNewName("test.pdf", self.bibtex_book),
config.get("folder")+"C_J_Pethick_H_Smith-Bose-Einstein_Condensation_In_Dilute_Gases.pdf")
def test_getNewName_book_override(self):
self.assertEqual(getNewName("test.pdf", self.bibtex_book, override_format="%a"),
config.get("folder")+"C_J_Pethick_H_Smith.pdf")
def test_bibtexAppend(self):
bibtexAppend(self.bibtex_article)
with open(config.get("folder")+'index.bib', 'r') as fh:
self.assertEqual(fh.read(),
'@article{1303.3130v1,\n\tabstract={We study the role of the dipolar interaction, correctly accounting for the\nDipolar-Induced Resonance (DIR), in a quasi-one-dimensional system of ultracold\nbosons. We first show how the DIR affects the lowest-energy states of two\nparticles in a harmonic trap. Then, we consider a deep optical lattice loaded\nwith ultracold dipolar bosons. We describe this many-body system using an\natom-dimer extended Bose-Hubbard model. We analyze the impact of the DIR on the\nphase diagram at T=0 by exact diagonalization of a small-sized system. In\nparticular, the resonance strongly modifies the range of parameters for which a\nmass density wave should occur.},\n\tarchiveprefix={arXiv},\n\tauthor={N. Bartolo and D. J. Papoular and L. Barbiero and C. Menotti and A. Recati},\n\teprint={1303.3130v1},\n\tfile={'+config.get("folder")+'N_Bartolo_A_Recati-j-2013.pdf},\n\tlink={http://arxiv.org/abs/1303.3130v1},\n\tmonth={Mar},\n\tprimaryclass={cond-mat.quant-gas},\n\ttag={},\n\ttitle={Dipolar-Induced Resonance for Ultracold Bosons in a Quasi-1D Optical\nLattice},\n\tyear={2013},\n}\n\n\n')
def test_bibtexEdit(self):
bibtexAppend(self.bibtex_article)
bibtexEdit(self.bibtex_article['ID'], {'ID': 'bidule'})
with open(config.get("folder")+'index.bib', 'r') as fh:
self.assertEqual(fh.read(),
'@article{bidule,\n\tabstract={We study the role of the dipolar interaction, correctly accounting for the\nDipolar-Induced Resonance (DIR), in a quasi-one-dimensional system of ultracold\nbosons. We first show how the DIR affects the lowest-energy states of two\nparticles in a harmonic trap. Then, we consider a deep optical lattice loaded\nwith ultracold dipolar bosons. We describe this many-body system using an\natom-dimer extended Bose-Hubbard model. We analyze the impact of the DIR on the\nphase diagram at T=0 by exact diagonalization of a small-sized system. In\nparticular, the resonance strongly modifies the range of parameters for which a\nmass density wave should occur.},\n\tarchiveprefix={arXiv},\n\tauthor={N. Bartolo and D. J. Papoular and L. Barbiero and C. Menotti and A. Recati},\n\teprint={1303.3130v1},\n\tfile={'+config.get("folder")+'N_Bartolo_A_Recati-j-2013.pdf},\n\tlink={http://arxiv.org/abs/1303.3130v1},\n\tmonth={Mar},\n\tprimaryclass={cond-mat.quant-gas},\n\ttag={},\n\ttitle={Dipolar-Induced Resonance for Ultracold Bosons in a Quasi-1D Optical\nLattice},\n\tyear={2013},\n}\n\n\n')
def test_bibtexRewrite(self):
bibtexAppend(self.bibtex_book)
bibtexRewrite({0: self.bibtex_article})
with open(config.get("folder")+'index.bib', 'r') as fh:
self.assertEqual(fh.read(),
'@article{1303.3130v1,\n\tabstract={We study the role of the dipolar interaction, correctly accounting for the\nDipolar-Induced Resonance (DIR), in a quasi-one-dimensional system of ultracold\nbosons. We first show how the DIR affects the lowest-energy states of two\nparticles in a harmonic trap. Then, we consider a deep optical lattice loaded\nwith ultracold dipolar bosons. We describe this many-body system using an\natom-dimer extended Bose-Hubbard model. We analyze the impact of the DIR on the\nphase diagram at T=0 by exact diagonalization of a small-sized system. In\nparticular, the resonance strongly modifies the range of parameters for which a\nmass density wave should occur.},\n\tarchiveprefix={arXiv},\n\tauthor={N. Bartolo and D. J. Papoular and L. Barbiero and C. Menotti and A. Recati},\n\teprint={1303.3130v1},\n\tfile={%sN_Bartolo_A_Recati-j-2013.pdf},\n\tlink={http://arxiv.org/abs/1303.3130v1},\n\tmonth={Mar},\n\tprimaryclass={cond-mat.quant-gas},\n\ttag={},\n\ttitle={Dipolar-Induced Resonance for Ultracold Bosons in a Quasi-1D Optical\nLattice},\n\tyear={2013},\n}\n\n\n' % config.get("folder"))
def test_deleteId(self):
self.bibtex_article['file'] = config.get("folder")+'test.pdf'
bibtexAppend(self.bibtex_article)
open(config.get("folder")+'test.pdf', 'w').close()
deleteId(self.bibtex_article['ID'])
with open(config.get("folder")+'index.bib', 'r') as fh:
self.assertEqual(fh.read().strip(), "")
self.assertFalse(os.path.isfile(config.get("folder")+'test.pdf'))
def test_deleteFile(self):
self.bibtex_article['file'] = config.get("folder")+'test.pdf'
bibtexAppend(self.bibtex_article)
open(config.get("folder")+'test.pdf', 'w').close()
deleteFile(self.bibtex_article['file'])
with open(config.get("folder")+'index.bib', 'r') as fh:
self.assertEqual(fh.read().strip(), "")
self.assertFalse(os.path.isfile(config.get("folder")+'test.pdf'))
def test_diffFilesIndex(self):
# TODO
return
def test_getBibtex(self):
bibtexAppend(self.bibtex_article)
got = getBibtex(self.bibtex_article['ID'])
self.assertEqual(got, self.bibtex_article)
def test_getBibtex_id(self):
bibtexAppend(self.bibtex_article)
got = getBibtex(self.bibtex_article['ID'], file_id='id')
self.assertEqual(got, self.bibtex_article)
def test_getBibtex_file(self):
self.bibtex_article['file'] = config.get("folder")+'test.pdf'
open(config.get("folder")+'test.pdf', 'w').close()
bibtexAppend(self.bibtex_article)
got = getBibtex(self.bibtex_article['file'], file_id='file')
self.assertEqual(got, self.bibtex_article)
def test_getBibtex_clean(self):
config.set("ignore_fields", ['ID', 'abstract'])
bibtexAppend(self.bibtex_article)
got = getBibtex(self.bibtex_article['ID'], clean=True)
for i in config.get("ignore_fields"):
self.assertNotIn(i, got)
def test_getEntries(self):
bibtexAppend(self.bibtex_article)
self.assertEqual(getEntries(),
[self.bibtex_article['ID']])
def test_updateArxiv(self):
# TODO
return
def test_search(self):
# TODO
return
def tearDown(self):
shutil.rmtree(config.get("folder"))
if __name__ == '__main__':
unittest.main()

View File

@ -1,73 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import unicode_literals
import unittest
import json
import os
import tempfile
import shutil
from libbmc.config import Config
class TestConfig(unittest.TestCase):
def setUp(self):
self.folder = tempfile.mkdtemp()+"/"
self.default_config = {"folder": os.path.expanduser("~/Papers/"),
"proxies": [''],
"format_articles": "%f_%l-%j-%Y%v",
"format_books": "%a-%t",
"format_custom": [],
"ignore_fields": ["file", "doi", "tag"]}
def tearDown(self):
shutil.rmtree(self.folder)
def test_load_without_file(self):
config = Config(base_config_path=self.folder)
self.assertEqual(config.as_dict(), self.default_config)
with open(self.folder+"bmc.json", 'r') as fh:
read = json.loads(fh.read())
self.assertEqual(read, self.default_config)
def test_load_with_file(self):
config = self.default_config
config["foo"] = "bar"
with open(self.folder+"bmc.json", 'w') as fh:
json.dump(config, fh)
config_read = Config(base_config_path=self.folder)
self.assertEqual(config, config_read.as_dict())
def test_get(self):
config = Config(base_config_path=self.folder)
self.assertEqual(config.get("proxies"), [''])
def test_set(self):
config = Config(base_config_path=self.folder)
config.set("foo", "bar")
self.assertEqual(config.get("foo"), "bar")
def test_save(self):
config = Config(base_config_path=self.folder)
config.set("foo", "bar")
config.save()
with open(self.folder+"bmc.json", 'r') as fh:
read = json.loads(fh.read())
self.assertEqual(read, config.as_dict())
def test_masks(self):
with open(self.folder+"masks.py", 'w') as fh:
fh.write("def f(x): return x")
config = Config(base_config_path=self.folder)
self.assertEqual("foo", config.get("format_custom")[0]("foo"))
if __name__ == '__main__':
unittest.main()

View File

@ -1,101 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
import unittest
from libbmc.fetcher import *
class TestFetcher(unittest.TestCase):
def setUp(self):
with open("libbmc/tests/src/doi.bib", 'r') as fh:
self.doi_bib = fh.read()
with open("libbmc/tests/src/arxiv.bib", 'r') as fh:
self.arxiv_bib = fh.read()
with open("libbmc/tests/src/isbn.bib", 'r') as fh:
self.isbn_bib = fh.read()
def test_download(self):
dl, contenttype = download('http://arxiv.org/pdf/1312.4006.pdf')
self.assertIn(contenttype, ['pdf', 'djvu'])
self.assertNotEqual(dl, '')
def test_download_invalid_type(self):
self.assertFalse(download('http://phyks.me/')[0])
def test_download_invalid_url(self):
self.assertFalse(download('a')[0])
def test_findISBN_DJVU(self):
# ISBN is incomplete in this test because my djvu file is bad
self.assertEqual(findISBN("libbmc/tests/src/test_book.djvu"), '978295391873')
def test_findISBN_PDF(self):
self.assertEqual(findISBN("libbmc/tests/src/test_book.pdf"), '9782953918731')
def test_findISBN_False(self):
self.assertFalse(findISBN("libbmc/tests/src/test.pdf"))
def test_isbn2Bib(self):
self.assertEqual(isbn2Bib('0198507194'), self.isbn_bib)
def test_isbn2Bib_False(self):
self.assertEqual(isbn2Bib('foo'), '')
def test_findDOI_PDF(self):
self.assertEqual(findArticleID("libbmc/tests/src/test.pdf"),
("DOI", "10.1103/physrevlett.112.253201"))
def test_findOnlyDOI(self):
self.assertEqual(findArticleID("libbmc/tests/src/test.pdf",
only=["DOI"]),
("DOI", "10.1103/physrevlett.112.253201"))
def test_findDOID_DJVU(self):
# DOI is incomplete in this test because my djvu file is bad
self.assertEqual(findArticleID("libbmc/tests/src/test.djvu"),
("DOI", "10.1103/physrevlett.112"))
def test_findDOI_False(self):
self.assertFalse(findArticleID("libbmc/tests/src/test_arxiv_multi.pdf",
only=["DOI"])[0])
def test_doi2Bib(self):
self.assertEqual(doi2Bib('10.1103/physreva.88.043630'), self.doi_bib)
def test_doi2Bib_False(self):
self.assertEqual(doi2Bib('blabla'), '')
def test_findArXivId(self):
self.assertEqual(findArticleID("libbmc/tests/src/test_arxiv_multi.pdf"),
("arXiv", '1303.3130v1'))
def test_findOnlyArXivId(self):
self.assertEqual(findArticleID("libbmc/tests/src/test_arxiv_multi.pdf",
only=["arXiv"]),
("arXiv", '1303.3130v1'))
def test_findArticleID(self):
# cf https://github.com/Phyks/BMC/issues/19
self.assertEqual(findArticleID("libbmc/tests/src/test_arxiv_doi_conflict.pdf"),
("arXiv", '1107.4487v1'))
def test_arXiv2Bib(self):
self.assertEqual(arXiv2Bib('1303.3130v1'), self.arxiv_bib)
def test_arXiv2Bib_False(self):
self.assertEqual(arXiv2Bib('blabla'), '')
def test_findHALId(self):
self.assertTupleEqual(findHALId("libbmc/tests/src/test_hal.pdf"),
('hal-00750893', '3'))
if __name__ == '__main__':
unittest.main()

View File

@ -1,37 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import unicode_literals
import unittest
from libbmc.tools import *
class TestTools(unittest.TestCase):
def test_slugify(self):
self.assertEqual(slugify(u"à&é_truc.pdf"), "ae_trucpdf")
def test_parsed2Bibtex(self):
parsed = {'ENTRYTYPE': 'article', 'ID': 'test', 'field1': 'test1',
'field2': 'test2'}
expected = ('@article{test,\n\tfield1={test1},\n' +
'\tfield2={test2},\n}\n\n')
self.assertEqual(parsed2Bibtex(parsed), expected)
def test_getExtension(self):
self.assertEqual(getExtension('test.ext'), '.ext')
def test_replaceAll(self):
replace_dict = {"test": "bidule", "machin": "chose"}
self.assertEqual(replaceAll("test machin truc", replace_dict),
"bidule chose truc")
if __name__ == '__main__':
unittest.main()

View File

@ -1,90 +0,0 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import print_function, unicode_literals
import os
import re
import sys
if os.name == "posix":
from termios import tcflush, TCIOFLUSH
try:
input = raw_input
except NameError:
pass
_slugify_strip_re = re.compile(r'[^\w\s-]')
_slugify_hyphenate_re = re.compile(r'[\s]+')
def slugify(value):
"""Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens to have nice filenames.
From Django's "django/template/defaultfilters.py".
"""
import unicodedata
try:
unicode_type = unicode
except NameError:
unicode_type = str
if not isinstance(value, unicode_type):
value = unicode_type(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = unicode_type(_slugify_strip_re.sub('', value).strip())
return _slugify_hyphenate_re.sub('_', value)
def parsed2Bibtex(parsed):
"""Convert a single bibtex entry dict to bibtex string"""
bibtex = '@'+parsed['ENTRYTYPE']+'{'+parsed['ID']+",\n"
for field in [i for i in sorted(parsed) if i not in ['ENTRYTYPE', 'ID']]:
bibtex += "\t"+field+"={"+parsed[field]+"},\n"
bibtex += "}\n\n"
return bibtex
def getExtension(filename):
"""Get the extension of filename"""
return filename[filename.rfind('.'):]
def replaceAll(text, dic):
"""Replace all the dic keys by the associated item in text"""
for i, j in dic.items():
text = text.replace(i, j)
return text
def rawInput(string):
"""Flush stdin and then prompt the user for something"""
if os.name == "posix":
tcflush(sys.stdin, TCIOFLUSH)
return input(string)
def warning(*objs):
"""Write warnings to stderr"""
print("WARNING: ", *objs, file=sys.stderr)
def listDir(path):
"""List all files in path directory, works recursively
Return files list
"""
filenames = []
for root, dirs, files in os.walk(path):
for i in files:
filenames.append(os.path.join(root, i))
return filenames

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
libbmc

View File

@ -4,12 +4,13 @@ from distutils.core import setup
setup(
name = 'BMC',
version = "0.3dev",
version = "0.4",
url = "https://github.com/Phyks/BMC",
author = "",
license = "no-alcohol beer-ware license",
author_email = "",
description = "simple script to download and store your articles",
author = "Phyks (Lucas Verney)",
license = "MIT License",
author_email = "phyks@phyks.me",
description = "Simple script to download and store your articles",
# TODO
packages = ['libbmc'],
scripts = ['bmc.py'],
)