Update arXiv articles via CLI

2014-05-07 22:04:46 +02:00 · 2014-05-07 22:04:46 +02:00 · 73f3fe7778
parent acd9cfb5e0
commit 73f3fe7778
3 changed files with 69 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -66,11 +66,14 @@ TODO
 TODO
 ### Edit entries
 Run `./main.py edit PARAM` where `PARAM` should be either a path to a paper file or an ident in the bibtex index. This will open a text editor to edit the corresponding bibtex entry.
 ### Download the latest version for papers from arXiv
 Run `./main.py update` to look for available updated versions of your arXiv papers. You can use the optionnal `--entries ID` argument (where ID is either a bibtex index identifier or a filename) to search only for a limited subset of papers.
 ### Data storage
 All your documents will be stored in the papers dir specified in `params.py`. All the bibtex entries will be added to the `index.bib` file. You should **not** add entries to this file (but you can edit existing entries without any problem), as this will break synchronization between documents in papers dir and the index. If you do so, you can resync the index file with `./main.py resync`.
@ -107,10 +110,11 @@ Here are some sources of inspirations for this project :
 A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
-60. Automatic download of new arXiv version
+65. Look for published version in arXiv
 70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
 80. Search engine
-90. Webserver interface
+100. UTF-8 ?
 200. Webserver interface ? GUI ? (not likely for now…)
 ## Issues ?
--- a/backend.py
+++ b/backend.py
@ -4,6 +4,7 @@
 import os
 import re
 import tools
 import fetcher
 import params
 from bibtexparser.bparser import BibTexParser
 from bibtexparser.customization import homogeneize_latex_encoding
@ -238,3 +239,42 @@ def getBibtex(entry, file_id='both'):
                bibtex_entry = bibtex[key]
                break
    return bibtex_entry
 def getEntries():
    """Returns the list of all entries in the bibtex index"""
    try:
        with open(params.folder+'index.bib', 'r') as fh:
            bibtex = BibTexParser(fh.read(),
                                  customization=homogeneize_latex_encoding)
        bibtex = bibtex.get_entry_dict()
    except:
        tools.warning("Unable to open index file.")
        return False
    return bibtex.keys()
 def updateArXiv(entry):
    bibtex = getBibtex(entry)
    # Check arXiv
    if('ArchivePrefix' not in bibtex and
       'arxiv' not in bibtex['ArchivePrefix']):
        return False
    arxiv_id = bibtex['Eprint']
    last_bibtex = BibTexParser(fetcher.arXiv2Bib(arxiv_id),
                               customization=homogeneize_latex_encoding)
    last_bibtex = last_bibtex.get_entry_dict()
    if last_bibtex['Eprint'] != arxiv_id:
        # New version available
        with open(bibtex['file'], 'w+') as fh:
            fh.write(fetcher.download(last_bibtex['Url']))
        bibtex['Eprint'] = last_bibtex['Eprint']
        bibtex['URL'] = last_bibtex['URL']
        for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
            bibtex[i] = last_bibtex[i]
        return last_bibtex
    else:
        return False
--- a/main.py
+++ b/main.py
@ -321,6 +321,14 @@ def resync():
                              " but could not delete it.")
 def update(entries):
    update = backend.updateArXiv(entry)
    if update is not False:
        print("New version found for "+entry)
        print("Downloaded latest version "+update['Eprint'])
        editEntry(update['file'], 'file')
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="A bibliography " +
                                     "management tool.")
@ -385,6 +393,11 @@ if __name__ == '__main__':
    parser_resync = subparsers.add_parser('resync', help="resync help")
    parser_resync.set_defaults(func='resync')
    parser_update = subparsers.add_parser('update', help="update help")
    parser_delete.add_argument('--entries', metavar='entry', nargs='+',
                               help="a filename or an identifier")
    parser_update.set_defaults(func='update')
    args = parser.parse_args()
    try:
        if args.func == 'download':
@ -454,5 +467,14 @@ if __name__ == '__main__':
                resync()
            sys.exit()
        elif args.func == 'update':
            if args.entries is None:
                entries = backend.getEntries()
            else:
                entries = args.entries
            for entry in entries:
                update(entry)
            sys.exit()
    except KeyboardInterrupt:
        sys.exit()