Update arXiv articles via CLI

This commit is contained in:
Phyks 2014-05-07 22:04:46 +02:00
parent acd9cfb5e0
commit 73f3fe7778
3 changed files with 69 additions and 3 deletions

View File

@ -66,11 +66,14 @@ TODO
TODO
### Edit entries
Run `./main.py edit PARAM` where `PARAM` should be either a path to a paper file or an ident in the bibtex index. This will open a text editor to edit the corresponding bibtex entry.
### Download the latest version for papers from arXiv
Run `./main.py update` to look for available updated versions of your arXiv papers. You can use the optionnal `--entries ID` argument (where ID is either a bibtex index identifier or a filename) to search only for a limited subset of papers.
### Data storage
All your documents will be stored in the papers dir specified in `params.py`. All the bibtex entries will be added to the `index.bib` file. You should **not** add entries to this file (but you can edit existing entries without any problem), as this will break synchronization between documents in papers dir and the index. If you do so, you can resync the index file with `./main.py resync`.
@ -107,10 +110,11 @@ Here are some sources of inspirations for this project :
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
60. Automatic download of new arXiv version
65. Look for published version in arXiv
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
80. Search engine
90. Webserver interface
100. UTF-8 ?
200. Webserver interface ? GUI ? (not likely for now…)
## Issues ?

View File

@ -4,6 +4,7 @@
import os
import re
import tools
import fetcher
import params
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogeneize_latex_encoding
@ -238,3 +239,42 @@ def getBibtex(entry, file_id='both'):
bibtex_entry = bibtex[key]
break
return bibtex_entry
def getEntries():
"""Returns the list of all entries in the bibtex index"""
try:
with open(params.folder+'index.bib', 'r') as fh:
bibtex = BibTexParser(fh.read(),
customization=homogeneize_latex_encoding)
bibtex = bibtex.get_entry_dict()
except:
tools.warning("Unable to open index file.")
return False
return bibtex.keys()
def updateArXiv(entry):
bibtex = getBibtex(entry)
# Check arXiv
if('ArchivePrefix' not in bibtex and
'arxiv' not in bibtex['ArchivePrefix']):
return False
arxiv_id = bibtex['Eprint']
last_bibtex = BibTexParser(fetcher.arXiv2Bib(arxiv_id),
customization=homogeneize_latex_encoding)
last_bibtex = last_bibtex.get_entry_dict()
if last_bibtex['Eprint'] != arxiv_id:
# New version available
with open(bibtex['file'], 'w+') as fh:
fh.write(fetcher.download(last_bibtex['Url']))
bibtex['Eprint'] = last_bibtex['Eprint']
bibtex['URL'] = last_bibtex['URL']
for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
bibtex[i] = last_bibtex[i]
return last_bibtex
else:
return False

22
main.py
View File

@ -321,6 +321,14 @@ def resync():
" but could not delete it.")
def update(entries):
update = backend.updateArXiv(entry)
if update is not False:
print("New version found for "+entry)
print("Downloaded latest version "+update['Eprint'])
editEntry(update['file'], 'file')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="A bibliography " +
"management tool.")
@ -385,6 +393,11 @@ if __name__ == '__main__':
parser_resync = subparsers.add_parser('resync', help="resync help")
parser_resync.set_defaults(func='resync')
parser_update = subparsers.add_parser('update', help="update help")
parser_delete.add_argument('--entries', metavar='entry', nargs='+',
help="a filename or an identifier")
parser_update.set_defaults(func='update')
args = parser.parse_args()
try:
if args.func == 'download':
@ -454,5 +467,14 @@ if __name__ == '__main__':
resync()
sys.exit()
elif args.func == 'update':
if args.entries is None:
entries = backend.getEntries()
else:
entries = args.entries
for entry in entries:
update(entry)
sys.exit()
except KeyboardInterrupt:
sys.exit()