Update arXiv papers

This commit is contained in:
Phyks 2014-05-14 22:45:25 +02:00
parent 558946f48d
commit b2488e5707
4 changed files with 44 additions and 33 deletions

View File

@ -48,12 +48,12 @@ Should be almost working and usable now, although still to be considered as **ex
* Resync * Resync
* working * working
* Update * Update
* Testing * working
## Installation ## Installation
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC` * Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
* Install `requesocks`, `PyPDF2` and `isbntools` _via_ Pypi * Install `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
* Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want * Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want
* Copy `params.py.example` to `params.py` and customize it to fit your needs * Copy `params.py.example` to `params.py` and customize it to fit your needs
@ -133,18 +133,12 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
65. Look for published version in arXiv 65. Look for published version in arXiv
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/ 70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
80. Search engine 80. Search engine
100. UTF-8 ?
200. Webserver interface ? GUI ? (not likely for now…) 200. Webserver interface ? GUI ? (not likely for now…)
Keep multiple versions of papers Keep multiple versions of papers
Check stored versions when updating arxiv papers
Export of bibtex Export of bibtex
Tree à la docear ? Tree à la docear ?
## Issues ?
* Multiplication of {{}} => solved in bibtexparser
* UTF-8 and bibtexparser => solved upstream in bibtexparser
===> TODO : update bibtexparser when available in pip
## Thanks ## Thanks
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module * Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module

View File

@ -36,6 +36,12 @@ def getNewName(src, bibtex, tag=''):
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip()) new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip() new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
for i in authors])) for i in authors]))
if('archiveprefix' not in bibtex or
'arXiv' not in bibtex['archiveprefix']):
new_name = new_name.replace("%v",
bibtex[eprint][bibtex['eprint'].rfind('v'):])
else:
new_name = new_name.replace("%v", '')
if tag == '': if tag == '':
new_name = (params.folder + tools.slugify(new_name) + new_name = (params.folder + tools.slugify(new_name) +
@ -187,7 +193,7 @@ def diffFilesIndex():
files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ] files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ]
try: try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
index = BibTexParser(fh.read().encode('utf-8')) index = BibTexParser(fh.read())
index_diff = index.get_entry_dict() index_diff = index.get_entry_dict()
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")
@ -213,7 +219,7 @@ def getBibtex(entry, file_id='both'):
""" """
try: try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
bibtex = BibTexParser(fh.read().encode('utf-8')) bibtex = BibTexParser(fh.read())
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")
@ -251,28 +257,23 @@ def updateArXiv(entry):
"""Look for new versions of arXiv entry `entry` """Look for new versions of arXiv entry `entry`
Returns False if no new versions or not an arXiv entry, Returns False if no new versions or not an arXiv entry,
Updates the file and returns the new bibtex otherwise. Returns the new bibtex otherwise.
""" """
bibtex = getBibtex(entry) bibtex = getBibtex(entry)
# Check arXiv # Check arXiv
if('ArchivePrefix' not in bibtex and if('archiveprefix' not in bibtex or
'arxiv' not in bibtex['ArchivePrefix']): 'arXiv' not in bibtex['archiveprefix']):
return False return False
arxiv_id = bibtex['Eprint'] arxiv_id = bibtex['eprint']
last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z', last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z',
'', '',
arxiv_id))) arxiv_id)))
last_bibtex = last_bibtex.get_entry_dict() last_bibtex = last_bibtex.get_entry_dict()
last_bibtex = last_bibtex[last_bibtex.keys()[0]]
if last_bibtex['Eprint'] != arxiv_id: if last_bibtex['eprint'] != arxiv_id:
# New version available # TODO: Check that not already imported
with open(bibtex['file'], 'w+') as fh:
fh.write(fetcher.download(last_bibtex['Url']))
bibtex['Eprint'] = last_bibtex['Eprint']
bibtex['URL'] = last_bibtex['URL']
for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
bibtex[i] = last_bibtex[i]
return last_bibtex return last_bibtex
else: else:
return False return False

33
main.py
View File

@ -22,7 +22,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
def checkBibtex(filename, bibtex_string): def checkBibtex(filename, bibtex_string):
print("The bibtex entry found for "+filename+" is:") print("The bibtex entry found for "+filename+" is:")
bibtex = BibTexParser(bibtex_string.encode('utf-8')) bibtex = BibTexParser(bibtex_string)
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
bibtex = bibtex[bibtex.keys()[0]] bibtex = bibtex[bibtex.keys()[0]]
print(bibtex_string) print(bibtex_string)
@ -35,11 +35,11 @@ def checkBibtex(filename, bibtex_string):
while check.lower() == 'n': while check.lower() == 'n':
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile: with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
tmpfile.write(bibtex_string.encode('utf-8')) tmpfile.write(bibtex_string)
tmpfile.flush() tmpfile.flush()
subprocess.call([EDITOR, tmpfile.name]) subprocess.call([EDITOR, tmpfile.name])
tmpfile.seek(0) tmpfile.seek(0)
bibtex = BibTexParser(tmpfile.read().encode('utf-8')+"\n") bibtex = BibTexParser(tmpfile.read()+"\n")
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
try: try:
@ -135,7 +135,7 @@ def addFile(src, filetype, manual):
else: else:
bibtex = '' bibtex = ''
bibtex = BibTexParser(bibtex.encode('utf-8')) bibtex = BibTexParser(bibtex)
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
if len(bibtex) > 0: if len(bibtex) > 0:
bibtex_name = bibtex.keys()[0] bibtex_name = bibtex.keys()[0]
@ -224,7 +224,7 @@ def editEntry(entry, file_id='both'):
try: try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
index = BibTexParser(fh.read().encode('utf-8')) index = BibTexParser(fh.read())
index = index.get_entry_dict() index = index.get_entry_dict()
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")
@ -256,7 +256,7 @@ def downloadFile(url, filetype, manual):
def openFile(ident): def openFile(ident):
try: try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh: with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
bibtex = BibTexParser(fh.read().encode('utf-8')) bibtex = BibTexParser(fh.read())
bibtex = bibtex.get_entry_dict() bibtex = bibtex.get_entry_dict()
except: except:
tools.warning("Unable to open index file.") tools.warning("Unable to open index file.")
@ -360,12 +360,27 @@ def resync():
" but could not delete it.") " but could not delete it.")
def update(entries): def update(entry):
update = backend.updateArXiv(entry) update = backend.updateArXiv(entry)
if update is not False: if update is not False:
print("New version found for "+entry) print("New version found for "+entry)
print("Downloaded latest version "+update['Eprint']) print("\t Title: "+update['title'])
editEntry(update['file'], 'file') confirm = tools.rawInput("Download it ? [Y/n] ")
if confirm.lower() == 'n':
return
new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'],
'article', False)
if new_name is not False:
print(update['eprint']+" successfully imported as "+new_name)
else:
tools.warning("An error occurred while downloading "+url)
confirm = tools.rawInput("Delete previous version ? [y/N] ")
if confirm.lower() == 'y':
if not backend.deleteId(entry):
if not backend.deleteFile(entry):
tools.warning("Unable to remove previous version.")
return
print("Previous version successfully deleted.")
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -15,5 +15,6 @@ proxies = [
# %Y = published year # %Y = published year
# %t = title # %t = title
# %a = authors # %a = authors
format_articles = "%f_%l-%j-%Y" # %v = arXiv version
format_articles = "%f_%l-%j-%Y-%v"
format_books = "%a-%t" format_books = "%a-%t"