Update arXiv articles via CLI

This commit is contained in:
Phyks 2014-05-07 22:04:46 +02:00
parent acd9cfb5e0
commit 73f3fe7778
3 changed files with 69 additions and 3 deletions

View File

@ -66,11 +66,14 @@ TODO
TODO TODO
### Edit entries ### Edit entries
Run `./main.py edit PARAM` where `PARAM` should be either a path to a paper file or an ident in the bibtex index. This will open a text editor to edit the corresponding bibtex entry. Run `./main.py edit PARAM` where `PARAM` should be either a path to a paper file or an ident in the bibtex index. This will open a text editor to edit the corresponding bibtex entry.
### Download the latest version for papers from arXiv
Run `./main.py update` to look for available updated versions of your arXiv papers. You can use the optionnal `--entries ID` argument (where ID is either a bibtex index identifier or a filename) to search only for a limited subset of papers.
### Data storage ### Data storage
All your documents will be stored in the papers dir specified in `params.py`. All the bibtex entries will be added to the `index.bib` file. You should **not** add entries to this file (but you can edit existing entries without any problem), as this will break synchronization between documents in papers dir and the index. If you do so, you can resync the index file with `./main.py resync`. All your documents will be stored in the papers dir specified in `params.py`. All the bibtex entries will be added to the `index.bib` file. You should **not** add entries to this file (but you can edit existing entries without any problem), as this will break synchronization between documents in papers dir and the index. If you do so, you can resync the index file with `./main.py resync`.
@ -107,10 +110,11 @@ Here are some sources of inspirations for this project :
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns. A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
60. Automatic download of new arXiv version 65. Look for published version in arXiv
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/ 70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
80. Search engine 80. Search engine
90. Webserver interface 100. UTF-8 ?
200. Webserver interface ? GUI ? (not likely for now…)
## Issues ? ## Issues ?

View File

@ -4,6 +4,7 @@
import os import os
import re import re
import tools import tools
import fetcher
import params import params
from bibtexparser.bparser import BibTexParser from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import homogeneize_latex_encoding from bibtexparser.customization import homogeneize_latex_encoding
@ -238,3 +239,42 @@ def getBibtex(entry, file_id='both'):
bibtex_entry = bibtex[key] bibtex_entry = bibtex[key]
break break
return bibtex_entry return bibtex_entry
def getEntries():
"""Returns the list of all entries in the bibtex index"""
try:
with open(params.folder+'index.bib', 'r') as fh:
bibtex = BibTexParser(fh.read(),
customization=homogeneize_latex_encoding)
bibtex = bibtex.get_entry_dict()
except:
tools.warning("Unable to open index file.")
return False
return bibtex.keys()
def updateArXiv(entry):
bibtex = getBibtex(entry)
# Check arXiv
if('ArchivePrefix' not in bibtex and
'arxiv' not in bibtex['ArchivePrefix']):
return False
arxiv_id = bibtex['Eprint']
last_bibtex = BibTexParser(fetcher.arXiv2Bib(arxiv_id),
customization=homogeneize_latex_encoding)
last_bibtex = last_bibtex.get_entry_dict()
if last_bibtex['Eprint'] != arxiv_id:
# New version available
with open(bibtex['file'], 'w+') as fh:
fh.write(fetcher.download(last_bibtex['Url']))
bibtex['Eprint'] = last_bibtex['Eprint']
bibtex['URL'] = last_bibtex['URL']
for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
bibtex[i] = last_bibtex[i]
return last_bibtex
else:
return False

22
main.py
View File

@ -321,6 +321,14 @@ def resync():
" but could not delete it.") " but could not delete it.")
def update(entries):
update = backend.updateArXiv(entry)
if update is not False:
print("New version found for "+entry)
print("Downloaded latest version "+update['Eprint'])
editEntry(update['file'], 'file')
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description="A bibliography " + parser = argparse.ArgumentParser(description="A bibliography " +
"management tool.") "management tool.")
@ -385,6 +393,11 @@ if __name__ == '__main__':
parser_resync = subparsers.add_parser('resync', help="resync help") parser_resync = subparsers.add_parser('resync', help="resync help")
parser_resync.set_defaults(func='resync') parser_resync.set_defaults(func='resync')
parser_update = subparsers.add_parser('update', help="update help")
parser_delete.add_argument('--entries', metavar='entry', nargs='+',
help="a filename or an identifier")
parser_update.set_defaults(func='update')
args = parser.parse_args() args = parser.parse_args()
try: try:
if args.func == 'download': if args.func == 'download':
@ -454,5 +467,14 @@ if __name__ == '__main__':
resync() resync()
sys.exit() sys.exit()
elif args.func == 'update':
if args.entries is None:
entries = backend.getEntries()
else:
entries = args.entries
for entry in entries:
update(entry)
sys.exit()
except KeyboardInterrupt: except KeyboardInterrupt:
sys.exit() sys.exit()