Update arXiv papers

This commit is contained in:
Phyks 2014-05-14 22:45:25 +02:00
parent 558946f48d
commit b2488e5707
4 changed files with 44 additions and 33 deletions

View File

@ -48,12 +48,12 @@ Should be almost working and usable now, although still to be considered as **ex
* Resync
* working
* Update
* Testing
* working
## Installation
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
* Install `requesocks`, `PyPDF2` and `isbntools` _via_ Pypi
* Install `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
* Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want
* Copy `params.py.example` to `params.py` and customize it to fit your needs
@ -133,18 +133,12 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
65. Look for published version in arXiv
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
80. Search engine
100. UTF-8 ?
200. Webserver interface ? GUI ? (not likely for now…)
Keep multiple versions of papers
Check stored versions when updating arxiv papers
Export of bibtex
Tree à la docear ?
## Issues ?
* Multiplication of {{}} => solved in bibtexparser
* UTF-8 and bibtexparser => solved upstream in bibtexparser
===> TODO : update bibtexparser when available in pip
## Thanks
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module

View File

@ -36,6 +36,12 @@ def getNewName(src, bibtex, tag=''):
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
for i in authors]))
if('archiveprefix' not in bibtex or
'arXiv' not in bibtex['archiveprefix']):
new_name = new_name.replace("%v",
bibtex[eprint][bibtex['eprint'].rfind('v'):])
else:
new_name = new_name.replace("%v", '')
if tag == '':
new_name = (params.folder + tools.slugify(new_name) +
@ -187,7 +193,7 @@ def diffFilesIndex():
files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ]
try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
index = BibTexParser(fh.read().encode('utf-8'))
index = BibTexParser(fh.read())
index_diff = index.get_entry_dict()
except:
tools.warning("Unable to open index file.")
@ -213,7 +219,7 @@ def getBibtex(entry, file_id='both'):
"""
try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
bibtex = BibTexParser(fh.read().encode('utf-8'))
bibtex = BibTexParser(fh.read())
bibtex = bibtex.get_entry_dict()
except:
tools.warning("Unable to open index file.")
@ -251,28 +257,23 @@ def updateArXiv(entry):
"""Look for new versions of arXiv entry `entry`
Returns False if no new versions or not an arXiv entry,
Updates the file and returns the new bibtex otherwise.
Returns the new bibtex otherwise.
"""
bibtex = getBibtex(entry)
# Check arXiv
if('ArchivePrefix' not in bibtex and
'arxiv' not in bibtex['ArchivePrefix']):
if('archiveprefix' not in bibtex or
'arXiv' not in bibtex['archiveprefix']):
return False
arxiv_id = bibtex['Eprint']
arxiv_id = bibtex['eprint']
last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z',
'',
arxiv_id)))
last_bibtex = last_bibtex.get_entry_dict()
last_bibtex = last_bibtex[last_bibtex.keys()[0]]
if last_bibtex['Eprint'] != arxiv_id:
# New version available
with open(bibtex['file'], 'w+') as fh:
fh.write(fetcher.download(last_bibtex['Url']))
bibtex['Eprint'] = last_bibtex['Eprint']
bibtex['URL'] = last_bibtex['URL']
for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
bibtex[i] = last_bibtex[i]
if last_bibtex['eprint'] != arxiv_id:
# TODO: Check that not already imported
return last_bibtex
else:
return False

33
main.py
View File

@ -22,7 +22,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
def checkBibtex(filename, bibtex_string):
print("The bibtex entry found for "+filename+" is:")
bibtex = BibTexParser(bibtex_string.encode('utf-8'))
bibtex = BibTexParser(bibtex_string)
bibtex = bibtex.get_entry_dict()
bibtex = bibtex[bibtex.keys()[0]]
print(bibtex_string)
@ -35,11 +35,11 @@ def checkBibtex(filename, bibtex_string):
while check.lower() == 'n':
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
tmpfile.write(bibtex_string.encode('utf-8'))
tmpfile.write(bibtex_string)
tmpfile.flush()
subprocess.call([EDITOR, tmpfile.name])
tmpfile.seek(0)
bibtex = BibTexParser(tmpfile.read().encode('utf-8')+"\n")
bibtex = BibTexParser(tmpfile.read()+"\n")
bibtex = bibtex.get_entry_dict()
try:
@ -135,7 +135,7 @@ def addFile(src, filetype, manual):
else:
bibtex = ''
bibtex = BibTexParser(bibtex.encode('utf-8'))
bibtex = BibTexParser(bibtex)
bibtex = bibtex.get_entry_dict()
if len(bibtex) > 0:
bibtex_name = bibtex.keys()[0]
@ -224,7 +224,7 @@ def editEntry(entry, file_id='both'):
try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
index = BibTexParser(fh.read().encode('utf-8'))
index = BibTexParser(fh.read())
index = index.get_entry_dict()
except:
tools.warning("Unable to open index file.")
@ -256,7 +256,7 @@ def downloadFile(url, filetype, manual):
def openFile(ident):
try:
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
bibtex = BibTexParser(fh.read().encode('utf-8'))
bibtex = BibTexParser(fh.read())
bibtex = bibtex.get_entry_dict()
except:
tools.warning("Unable to open index file.")
@ -360,12 +360,27 @@ def resync():
" but could not delete it.")
def update(entries):
def update(entry):
update = backend.updateArXiv(entry)
if update is not False:
print("New version found for "+entry)
print("Downloaded latest version "+update['Eprint'])
editEntry(update['file'], 'file')
print("\t Title: "+update['title'])
confirm = tools.rawInput("Download it ? [Y/n] ")
if confirm.lower() == 'n':
return
new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'],
'article', False)
if new_name is not False:
print(update['eprint']+" successfully imported as "+new_name)
else:
tools.warning("An error occurred while downloading "+url)
confirm = tools.rawInput("Delete previous version ? [y/N] ")
if confirm.lower() == 'y':
if not backend.deleteId(entry):
if not backend.deleteFile(entry):
tools.warning("Unable to remove previous version.")
return
print("Previous version successfully deleted.")
if __name__ == '__main__':

View File

@ -15,5 +15,6 @@ proxies = [
# %Y = published year
# %t = title
# %a = authors
format_articles = "%f_%l-%j-%Y"
# %v = arXiv version
format_articles = "%f_%l-%j-%Y-%v"
format_books = "%a-%t"