From b2488e57077372d3395ec718ef1b17599b6f5e81 Mon Sep 17 00:00:00 2001 From: Phyks Date: Wed, 14 May 2014 22:45:25 +0200 Subject: [PATCH] Update arXiv papers --- README.md | 12 +++--------- backend.py | 29 +++++++++++++++-------------- main.py | 33 ++++++++++++++++++++++++--------- params.py.example | 3 ++- 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 4bdd13a..2473c90 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,12 @@ Should be almost working and usable now, although still to be considered as **ex * Resync * working * Update - * Testing + * working ## Installation * Clone this git repository where you want : `git clone https://github.com/Phyks/BMC` -* Install `requesocks`, `PyPDF2` and `isbntools` _via_ Pypi +* Install `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi * Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want * Copy `params.py.example` to `params.py` and customize it to fit your needs @@ -133,18 +133,12 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really 65. Look for published version in arXiv 70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/ 80. Search engine -100. UTF-8 ? 200. Webserver interface ? GUI ? (not likely for now…) Keep multiple versions of papers +Check stored versions when updating arxiv papers Export of bibtex Tree à la docear ? -## Issues ? - -* Multiplication of {{}} => solved in bibtexparser -* UTF-8 and bibtexparser => solved upstream in bibtexparser -===> TODO : update bibtexparser when available in pip - ## Thanks * Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module diff --git a/backend.py b/backend.py index 00dfde9..c402378 100644 --- a/backend.py +++ b/backend.py @@ -36,6 +36,12 @@ def getNewName(src, bibtex, tag=''): new_name = new_name.replace("%l", authors[-1].split(',')[0].strip()) new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip() for i in authors])) + if('archiveprefix' not in bibtex or + 'arXiv' not in bibtex['archiveprefix']): + new_name = new_name.replace("%v", + bibtex[eprint][bibtex['eprint'].rfind('v'):]) + else: + new_name = new_name.replace("%v", '') if tag == '': new_name = (params.folder + tools.slugify(new_name) + @@ -187,7 +193,7 @@ def diffFilesIndex(): files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ] try: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: - index = BibTexParser(fh.read().encode('utf-8')) + index = BibTexParser(fh.read()) index_diff = index.get_entry_dict() except: tools.warning("Unable to open index file.") @@ -213,7 +219,7 @@ def getBibtex(entry, file_id='both'): """ try: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: - bibtex = BibTexParser(fh.read().encode('utf-8')) + bibtex = BibTexParser(fh.read()) bibtex = bibtex.get_entry_dict() except: tools.warning("Unable to open index file.") @@ -251,28 +257,23 @@ def updateArXiv(entry): """Look for new versions of arXiv entry `entry` Returns False if no new versions or not an arXiv entry, - Updates the file and returns the new bibtex otherwise. + Returns the new bibtex otherwise. """ bibtex = getBibtex(entry) # Check arXiv - if('ArchivePrefix' not in bibtex and - 'arxiv' not in bibtex['ArchivePrefix']): + if('archiveprefix' not in bibtex or + 'arXiv' not in bibtex['archiveprefix']): return False - arxiv_id = bibtex['Eprint'] + arxiv_id = bibtex['eprint'] last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z', '', arxiv_id))) last_bibtex = last_bibtex.get_entry_dict() + last_bibtex = last_bibtex[last_bibtex.keys()[0]] - if last_bibtex['Eprint'] != arxiv_id: - # New version available - with open(bibtex['file'], 'w+') as fh: - fh.write(fetcher.download(last_bibtex['Url'])) - bibtex['Eprint'] = last_bibtex['Eprint'] - bibtex['URL'] = last_bibtex['URL'] - for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]: - bibtex[i] = last_bibtex[i] + if last_bibtex['eprint'] != arxiv_id: + # TODO: Check that not already imported return last_bibtex else: return False diff --git a/main.py b/main.py index 876578d..817a479 100755 --- a/main.py +++ b/main.py @@ -22,7 +22,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim' def checkBibtex(filename, bibtex_string): print("The bibtex entry found for "+filename+" is:") - bibtex = BibTexParser(bibtex_string.encode('utf-8')) + bibtex = BibTexParser(bibtex_string) bibtex = bibtex.get_entry_dict() bibtex = bibtex[bibtex.keys()[0]] print(bibtex_string) @@ -35,11 +35,11 @@ def checkBibtex(filename, bibtex_string): while check.lower() == 'n': with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile: - tmpfile.write(bibtex_string.encode('utf-8')) + tmpfile.write(bibtex_string) tmpfile.flush() subprocess.call([EDITOR, tmpfile.name]) tmpfile.seek(0) - bibtex = BibTexParser(tmpfile.read().encode('utf-8')+"\n") + bibtex = BibTexParser(tmpfile.read()+"\n") bibtex = bibtex.get_entry_dict() try: @@ -135,7 +135,7 @@ def addFile(src, filetype, manual): else: bibtex = '' - bibtex = BibTexParser(bibtex.encode('utf-8')) + bibtex = BibTexParser(bibtex) bibtex = bibtex.get_entry_dict() if len(bibtex) > 0: bibtex_name = bibtex.keys()[0] @@ -224,7 +224,7 @@ def editEntry(entry, file_id='both'): try: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: - index = BibTexParser(fh.read().encode('utf-8')) + index = BibTexParser(fh.read()) index = index.get_entry_dict() except: tools.warning("Unable to open index file.") @@ -256,7 +256,7 @@ def downloadFile(url, filetype, manual): def openFile(ident): try: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: - bibtex = BibTexParser(fh.read().encode('utf-8')) + bibtex = BibTexParser(fh.read()) bibtex = bibtex.get_entry_dict() except: tools.warning("Unable to open index file.") @@ -360,12 +360,27 @@ def resync(): " but could not delete it.") -def update(entries): +def update(entry): update = backend.updateArXiv(entry) if update is not False: print("New version found for "+entry) - print("Downloaded latest version "+update['Eprint']) - editEntry(update['file'], 'file') + print("\t Title: "+update['title']) + confirm = tools.rawInput("Download it ? [Y/n] ") + if confirm.lower() == 'n': + return + new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'], + 'article', False) + if new_name is not False: + print(update['eprint']+" successfully imported as "+new_name) + else: + tools.warning("An error occurred while downloading "+url) + confirm = tools.rawInput("Delete previous version ? [y/N] ") + if confirm.lower() == 'y': + if not backend.deleteId(entry): + if not backend.deleteFile(entry): + tools.warning("Unable to remove previous version.") + return + print("Previous version successfully deleted.") if __name__ == '__main__': diff --git a/params.py.example b/params.py.example index b5d37bc..6f04822 100644 --- a/params.py.example +++ b/params.py.example @@ -15,5 +15,6 @@ proxies = [ # %Y = published year # %t = title # %a = authors -format_articles = "%f_%l-%j-%Y" +# %v = arXiv version +format_articles = "%f_%l-%j-%Y-%v" format_books = "%a-%t"