libbmc/libbmc/bibtex.py

286 lines
8.1 KiB
Python

"""
This file contains functions to deal with Bibtex files and edit them.
TODO: Unittests
"""
import re
import bibtexparser
from libbmc import tools
DEFAULT_PAPERS_FILENAME_MASK = "{first}_{last}-{journal}-{year}{arxiv_version}"
DEFAULT_BOOKS_FILENAME_MASK = "{authors} - {title}"
def dict2bibtex(data):
"""
Convert a single BibTeX entry dict to a BibTeX string.
:param data: A dict representing BibTeX entry, as the ones from \
``bibtexparser.BibDatabase.entries`` output.
:return: A formatted BibTeX string.
"""
bibtex = '@' + data['ENTRYTYPE'] + '{' + data['ID'] + ",\n"
for field in [i for i in sorted(data) if i not in ['ENTRYTYPE', 'ID']]:
bibtex += "\t" + field + "={" + data[field] + "},\n"
bibtex += "}\n\n"
return bibtex
def bibdatabase2bibtex(data):
"""
Convert a BibDatabase object to a BibTeX string.
:param data: A ``bibtexparser.BibDatabase`` object.
:return: A formatted BibTeX string.
"""
return bibtexparser.dumps(data)
def write(filename, data):
"""
Create a new BibTeX file.
:param filename: The name of the BibTeX file to write.
:param data: A ``bibtexparser.BibDatabase`` object.
"""
with open(filename, 'w') as fh:
fh.write(bibdatabase2bibtex(data))
def append(filename, data):
"""
Append some entries to a bibtex file.
:param filename: The name of the BibTeX file to edit.
:param data: A ``bibtexparser.BibDatabase`` object.
"""
with open(filename, 'a') as fh:
fh.write(bibdatabase2bibtex(data))
def edit(filename, identifier, data):
"""
Update an entry in a BibTeX file.
:param filename: The name of the BibTeX file to edit.
:param identifier: The id of the entry to update, in the BibTeX file.
:param data: A dict associating fields and updated values. Fields present \
in the BibTeX file but not in this dict will be kept as is.
"""
# Get current bibtex
with open(filename, 'r') as fh:
bibtex = bibtexparser.load(fh)
# Update it
# TODO: Not working
bibtex.entries_dict[identifier] = data.entries[0]
# Write the resulting BibTeX
write(filename, bibtex)
def replace(filename, identifier, data):
"""
Replace an entry in a BibTeX file.
:param filename: The name of the BibTeX file to edit.
:param identifier: The id of the entry to replace, in the BibTeX file.
:param data: A ``bibtexparser.BibDatabase`` object containing a single \
entry.
"""
# Get current bibtex
with open(filename, 'r') as fh:
bibtex = bibtexparser.load(fh)
# Use entries_dict representation to update easily
# TODO: Not working
bibtex.entries_dict[identifier] = data.entries[0]
# Write the resulting BibTeX
write(filename, bibtex)
def delete(filename, identifier):
"""
Delete an entry in a BibTeX file.
:param filename: The name of the BibTeX file to edit.
:param identifier: The id of the entry to delete, in the BibTeX file.
"""
# Get current bibtex
with open(filename, 'r') as fh:
bibtex = bibtexparser.load(fh)
# Delete the bibtex entry
# TODO: Not working
try:
del bibtex.entries_dict[identifier]
except KeyError:
pass
# Write the resulting BibTeX
write(filename, bibtex)
def get(filename, ignore_fields=None):
"""
Get all entries from a BibTeX file.
:param filename: The name of the BibTeX file.
:param ignore_fields: An optional list of fields to strip from the BibTeX \
file.
:returns: A ``bibtexparser.BibDatabase`` object representing the fetched \
entries.
"""
# Handle default argument
if ignore_fields is None:
ignore_fields = []
# Open bibtex file
with open(filename, 'r') as fh:
bibtex = bibtexparser.load(fh)
# Clean the entries if necessary
bibtex.entries = [{k: entry[k]
for k in entry if k not in ignore_fields}
for entry in bibtex.entries]
return bibtex
def get_entry_by_filter(filename, filter_function, ignore_fields=None):
"""
Get an entry from a BibTeX file.
.. note ::
Returns the first matching entry.
:param filename: The name of the BibTeX file.
:param filter_function: A function returning ``True`` or ``False`` \
whether the entry should be included or not.
:param ignore_fields: An optional list of fields to strip from the BibTeX \
file.
:returns: A ``bibtexparser.BibDatabase`` object representing the \
first matching entry. ``None`` if entry was not found.
"""
# Handle default argument
if ignore_fields is None:
ignore_fields = []
# Open bibtex file
with open(filename, 'r') as fh:
bibtex = bibtexparser.load(fh)
matching_entry = None
try:
# Try to fetch the matching entry dict
for entry in bibtex.entries:
if filter_function(entry):
matching_entry = entry
except KeyError:
# If none found, return None
return None
if matching_entry is None:
return None
# Clean the entry dict if necessary
matching_entry = {k: matching_entry[k]
for k in matching_entry if k not in ignore_fields}
bib_db = bibtexparser.bibdatabase.BibDatabase()
bib_db.entries = [matching_entry]
return bib_db
def get_entry(filename, identifier, ignore_fields=None):
"""
Get an entry from a BibTeX file.
:param filename: The name of the BibTeX file.
:param identifier: An id of the entry to fetch, in the BibTeX file.
:param ignore_fields: An optional list of fields to strip from the BibTeX \
file.
:returns: A ``bibtexparser.BibDatabase`` object representing the \
fetched entry. ``None`` if entry was not found.
"""
# Handle default argument
if ignore_fields is None:
ignore_fields = []
return get_entry_by_filter(filename,
lambda x: x["ID"] == identifier,
ignore_fields)
def to_filename(data,
mask=DEFAULT_PAPERS_FILENAME_MASK,
extra_formatters=None):
"""
Convert a bibtex entry to a formatted filename according to a given mask.
.. note ::
Available formatters out of the box are:
- ``journal``
- ``title``
- ``year``
- ``first`` for the first author
- ``last`` for the last author
- ``authors`` for the list of authors
- ``arxiv_version`` (discarded if no arXiv version in the BibTeX)
Filename is slugified after applying the masks.
:param data: A ``bibtexparser.BibDatabase`` object representing a \
BibTeX entry, as the one from ``bibtexparser`` output.
:param mask: A Python format string.
:param extra_formatters: A dict of format string (in the mask) and \
associated lambdas to perform the formatting.
:returns: A formatted filename.
"""
# Handle default argument
if extra_formatters is None:
extra_formatters = {}
entry = data.entries[0]
authors = re.split(' and ', entry['author'])
formatters = {
"journal": "",
"title": "",
"year": "",
"first": "",
"last": "",
"authors": "",
"arxiv_version": ""
}
formatters["journal"] = entry.get("journal", "")
formatters["title"] = entry.get("title", "")
formatters["year"] = entry.get("year", "")
formatters["first"] = authors[0].split(',')[0].strip()
formatters["last"] = authors[-1].split(',')[0].strip()
formatters["authors"] = ", ".join([i.split(',')[0].strip()
for i in authors])
for extra_formatter in extra_formatters:
formatters[extra_formatter] = extra_formatters[extra_formatter](entry)
arxiv_version = ""
if "eprint" in entry:
arxiv_version = '-' + entry['eprint'][entry['eprint'].rfind('v'):]
formatters["arxiv_version"] = arxiv_version
return tools.slugify(mask.format(**formatters))