From 9019833dbbed44f0ee5394929a417e7f508b1a85 Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Tue, 19 Jan 2016 18:17:12 +0100 Subject: [PATCH] Add some doc, especially about external dependencies --- .gitmodules | 3 --- README.md | 43 ++++++++++++++++++++++++++++++++++++ libbmc/citations/bbl.py | 6 +++++ libbmc/citations/bibtex.py | 2 +- libbmc/papers/identifiers.py | 8 ++++++- 5 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 README.md diff --git a/.gitmodules b/.gitmodules index 4603d9f..a986a5d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "libbmc/external/opendetex"] path = libbmc/external/opendetex url = https://github.com/Phyks/opendetex -[submodule "libbmc/external/poppler"] - path = libbmc/external/poppler - url = git://git.freedesktop.org/git/poppler/poppler diff --git a/README.md b/README.md new file mode 100644 index 0000000..58c711c --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +libBMC +====== + +A generic Python library to manage bibliography and play with scientific +papers. + + +_Note_: This library is written for Python 3 and may not work with Python 2. +This is not a major priority for me, but if anyone needed to make it work with +Python 2 and want to make a PR, I will happily merge it :) + + +## Dependencies + +Python dependencies are listed in the `requirements.txt` file at the root of +this repo, and can be installed with `pip install -r requirements.txt`. + + +External dependencies are [OpenDeTeX](https://code.google.com/p/opendetex/) +(an improved version of DeTeX) and the `pdftotext` and `djvutxt` programs. + + +OpenDeTeX is available as a Git submodule in the `libbmc/external` folder. If +you do not have it installed system-wide, you can use the following steps to +build it in this repo and the library will use it: + +* `git submodule init; git submodule update` to initialize the Git submodules. +* `cd libbmc/external/opendetex; make` to build OpenDeTeX (see `INSTALL` file + in the same folder for more info, you will need `make`, `gcc` and `flex` to + build it). + +OpenDeTeX is used to get references from a `.bbl` file (or directly from arXiv +as it uses the same pipeline). + + +`pdftotext` and `djvutxt` should be available in the packages of your +distribution and should be installed systemwide. Both are used to extract +identifiers from papers PDF files. + + +If you plan on using the `libbmc.citations.pdf` functions, you should also +install the matching software (`CERMINE`, `Grobid` or `pdf-extract`). See the +docstrings of those functions for more infos on this particular point. diff --git a/libbmc/citations/bbl.py b/libbmc/citations/bbl.py index f36509b..3fde99e 100644 --- a/libbmc/citations/bbl.py +++ b/libbmc/citations/bbl.py @@ -25,6 +25,12 @@ def bibitem_as_plaintext(bibitem): This plaintext representation can be super ugly, contain URLs and so \ on. + .. note:: + + You need to have ``delatex`` installed system-wide, or to build it in \ + this repo, according to the ``README.md`` before using this \ + function. + :param bibitem: The text content of the bibitem. :returns: A cleaned plaintext citation from the bibitem. """ diff --git a/libbmc/citations/bibtex.py b/libbmc/citations/bibtex.py index d241141..4c357d8 100644 --- a/libbmc/citations/bibtex.py +++ b/libbmc/citations/bibtex.py @@ -68,7 +68,7 @@ def get_cited_DOIs(bibtex): BibTeX file. :returns: A dict of cleaned plaintext citations and their associated DOI. """ - # Get the plaintext citations from the bbl file + # Get the plaintext citations from the bibtex file plaintext_citations = get_plaintext_citations(bibtex) # Use the plaintext citations parser on these citations return plaintext.get_cited_DOIs(plaintext_citations) diff --git a/libbmc/papers/identifiers.py b/libbmc/papers/identifiers.py index a0c408d..f3083fd 100644 --- a/libbmc/papers/identifiers.py +++ b/libbmc/papers/identifiers.py @@ -14,12 +14,18 @@ def find_identifiers(src): """ Search for a valid identifier (DOI, ISBN, arXiv, HAL) in a given file. - .. note :: + .. note:: This function returns the first matching identifier, that is the most likely to be relevant for this file. However, it may fail and return an identifier taken from the references or another paper. + .. note:: + + You will need to have ``pdftotext`` and/or ``djvutxt`` installed \ + system-wide before processing files with this function. + + :params src: Path to the file to scan. :returns: a tuple (type, identifier) or ``None`` if not found or \