Add a citations fetcher for bibtex files
This commit is contained in:
parent
0d17254f6c
commit
168e37f247
@ -7,7 +7,7 @@ import re
|
|||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from libbmc import tools
|
from libbmc import tools
|
||||||
from libbmc.citations import bbl
|
from libbmc.citations import plaintext
|
||||||
|
|
||||||
|
|
||||||
# Regex to match bibitems
|
# Regex to match bibitems
|
||||||
@ -18,7 +18,7 @@ ENDTHEBIBLIOGRAPHY_REGEX = re.compile(r"\\end\{thebibliography}.*")
|
|||||||
|
|
||||||
def bibitem_as_plaintext(bibitem):
|
def bibitem_as_plaintext(bibitem):
|
||||||
"""
|
"""
|
||||||
Return a plaintext representation of the bibitem from the ``.bbl`` file.
|
Return a plaintext representation of a bibitem from the ``.bbl`` file.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
@ -67,16 +67,16 @@ def get_plaintext_citations(bbl):
|
|||||||
return cleaned_bbl
|
return cleaned_bbl
|
||||||
|
|
||||||
|
|
||||||
def get_cited_DOIs(bbl_input):
|
def get_cited_DOIs(bbl):
|
||||||
"""
|
"""
|
||||||
Get the DOIs of the papers cited in this .bbl file.
|
Get the DOIs of the papers cited in a .bbl file.
|
||||||
|
|
||||||
:param bbl_input: Either the path to a .bbl file or the content \
|
:param bbl: Either the path to a .bbl file or the content \
|
||||||
of a .bbl file.
|
of a .bbl file.
|
||||||
|
|
||||||
:returns: A dict of cleaned plaintext citations and their associated DOI.
|
:returns: A dict of cleaned plaintext citations and their associated DOI.
|
||||||
"""
|
"""
|
||||||
# Get the plaintext citations from the bbl file
|
# Get the plaintext citations from the bbl file
|
||||||
plaintext_citations = get_plaintext_citations(bbl_input)
|
plaintext_citations = get_plaintext_citations(bbl)
|
||||||
# Use the plaintext citations parser on these citations
|
# Use the plaintext citations parser on these citations
|
||||||
return bbl.get_cited_DOIs(plaintext_citations)
|
return plaintext.get_cited_DOIs(plaintext_citations)
|
||||||
|
74
libbmc/citations/bibtex.py
Normal file
74
libbmc/citations/bibtex.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
"""
|
||||||
|
This files contains all the functions to extract DOIs of citations from
|
||||||
|
BibTeX files.
|
||||||
|
"""
|
||||||
|
import bibtexparser
|
||||||
|
import os
|
||||||
|
|
||||||
|
from bibtexparser.bparser import BibTexParser
|
||||||
|
from bibtexparser.customization import convert_to_unicode
|
||||||
|
|
||||||
|
from libbmc import tools
|
||||||
|
from libbmc.citations import plaintext
|
||||||
|
|
||||||
|
|
||||||
|
def bibentry_as_plaintext(bibentry):
|
||||||
|
"""
|
||||||
|
Return a plaintext representation of a bibentry from BibTeX file.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This plaintext representation can be super ugly, contain URLs and so \
|
||||||
|
on.
|
||||||
|
|
||||||
|
:param bibentry: A bibentry as parsed by ``bibtexparser``.
|
||||||
|
:returns: A cleaned plaintext citation from the bibentry.
|
||||||
|
"""
|
||||||
|
# Just flatten the bibentry
|
||||||
|
return tools.clean_whitespaces(" ".join([bibentry[k] for k in bibentry]))
|
||||||
|
|
||||||
|
|
||||||
|
def get_plaintext_citations(bibtex):
|
||||||
|
"""
|
||||||
|
Parse a BibTeX file to get a clean list of plaintext citations.
|
||||||
|
|
||||||
|
:param bibtex: Either the path to the BibTeX file or the content of a \
|
||||||
|
BibTeX file.
|
||||||
|
:returns: A list of cleaned plaintext citations.
|
||||||
|
"""
|
||||||
|
parser = BibTexParser()
|
||||||
|
parser.customization = convert_to_unicode
|
||||||
|
# Load the BibTeX
|
||||||
|
if os.path.isfile(bibtex):
|
||||||
|
with open(bibtex) as fh:
|
||||||
|
bib_database = bibtexparser.load(fh, parser=parser)
|
||||||
|
else:
|
||||||
|
bib_database = bibtexparser.loads(bibtex, parser=parser)
|
||||||
|
# Convert bibentries to plaintext
|
||||||
|
bibentries = [bibentry_as_plaintext(bibentry)
|
||||||
|
for bibentry in bib_database.entries]
|
||||||
|
# Return them
|
||||||
|
return bibentries
|
||||||
|
|
||||||
|
|
||||||
|
def get_cited_DOIs(bibtex):
|
||||||
|
"""
|
||||||
|
Get the DOIs of the papers cited in a BibTeX file.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
For now, this function is actually flattening the BibTeX file \
|
||||||
|
(loosing any structure provided by the BibTeX) and calling \
|
||||||
|
the matching method for plaintext citations, relying on \
|
||||||
|
CrossRef API. This is the best method I have found so far, \
|
||||||
|
although it can be quite frustrating. Let me know if you have \
|
||||||
|
anything better!
|
||||||
|
|
||||||
|
:param bibtex: Either the path to a BibTeX file or the content of a \
|
||||||
|
BibTeX file.
|
||||||
|
:returns: A dict of cleaned plaintext citations and their associated DOI.
|
||||||
|
"""
|
||||||
|
# Get the plaintext citations from the bbl file
|
||||||
|
plaintext_citations = get_plaintext_citations(bibtex)
|
||||||
|
# Use the plaintext citations parser on these citations
|
||||||
|
return plaintext.get_cited_DOIs(plaintext_citations)
|
@ -1,3 +1,4 @@
|
|||||||
arxiv2bib>=1.0.7
|
arxiv2bib>=1.0.7
|
||||||
|
bibtexparser>=0.6.2
|
||||||
isbnlib>=3.5.7
|
isbnlib>=3.5.7
|
||||||
requests>=2.9.1
|
requests>=2.9.1
|
||||||
|
Loading…
Reference in New Issue
Block a user