Update arXiv papers
This commit is contained in:
parent
558946f48d
commit
b2488e5707
12
README.md
12
README.md
@ -48,12 +48,12 @@ Should be almost working and usable now, although still to be considered as **ex
|
||||
* Resync
|
||||
* working
|
||||
* Update
|
||||
* Testing
|
||||
* working
|
||||
|
||||
## Installation
|
||||
|
||||
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
|
||||
* Install `requesocks`, `PyPDF2` and `isbntools` _via_ Pypi
|
||||
* Install `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
|
||||
* Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want
|
||||
* Copy `params.py.example` to `params.py` and customize it to fit your needs
|
||||
|
||||
@ -133,18 +133,12 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
|
||||
65. Look for published version in arXiv
|
||||
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
|
||||
80. Search engine
|
||||
100. UTF-8 ?
|
||||
200. Webserver interface ? GUI ? (not likely for now…)
|
||||
Keep multiple versions of papers
|
||||
Check stored versions when updating arxiv papers
|
||||
Export of bibtex
|
||||
Tree à la docear ?
|
||||
|
||||
## Issues ?
|
||||
|
||||
* Multiplication of {{}} => solved in bibtexparser
|
||||
* UTF-8 and bibtexparser => solved upstream in bibtexparser
|
||||
===> TODO : update bibtexparser when available in pip
|
||||
|
||||
## Thanks
|
||||
|
||||
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module
|
||||
|
29
backend.py
29
backend.py
@ -36,6 +36,12 @@ def getNewName(src, bibtex, tag=''):
|
||||
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
|
||||
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
|
||||
for i in authors]))
|
||||
if('archiveprefix' not in bibtex or
|
||||
'arXiv' not in bibtex['archiveprefix']):
|
||||
new_name = new_name.replace("%v",
|
||||
bibtex[eprint][bibtex['eprint'].rfind('v'):])
|
||||
else:
|
||||
new_name = new_name.replace("%v", '')
|
||||
|
||||
if tag == '':
|
||||
new_name = (params.folder + tools.slugify(new_name) +
|
||||
@ -187,7 +193,7 @@ def diffFilesIndex():
|
||||
files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ]
|
||||
try:
|
||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||
index = BibTexParser(fh.read().encode('utf-8'))
|
||||
index = BibTexParser(fh.read())
|
||||
index_diff = index.get_entry_dict()
|
||||
except:
|
||||
tools.warning("Unable to open index file.")
|
||||
@ -213,7 +219,7 @@ def getBibtex(entry, file_id='both'):
|
||||
"""
|
||||
try:
|
||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||
bibtex = BibTexParser(fh.read().encode('utf-8'))
|
||||
bibtex = BibTexParser(fh.read())
|
||||
bibtex = bibtex.get_entry_dict()
|
||||
except:
|
||||
tools.warning("Unable to open index file.")
|
||||
@ -251,28 +257,23 @@ def updateArXiv(entry):
|
||||
"""Look for new versions of arXiv entry `entry`
|
||||
|
||||
Returns False if no new versions or not an arXiv entry,
|
||||
Updates the file and returns the new bibtex otherwise.
|
||||
Returns the new bibtex otherwise.
|
||||
"""
|
||||
bibtex = getBibtex(entry)
|
||||
# Check arXiv
|
||||
if('ArchivePrefix' not in bibtex and
|
||||
'arxiv' not in bibtex['ArchivePrefix']):
|
||||
if('archiveprefix' not in bibtex or
|
||||
'arXiv' not in bibtex['archiveprefix']):
|
||||
return False
|
||||
|
||||
arxiv_id = bibtex['Eprint']
|
||||
arxiv_id = bibtex['eprint']
|
||||
last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z',
|
||||
'',
|
||||
arxiv_id)))
|
||||
last_bibtex = last_bibtex.get_entry_dict()
|
||||
last_bibtex = last_bibtex[last_bibtex.keys()[0]]
|
||||
|
||||
if last_bibtex['Eprint'] != arxiv_id:
|
||||
# New version available
|
||||
with open(bibtex['file'], 'w+') as fh:
|
||||
fh.write(fetcher.download(last_bibtex['Url']))
|
||||
bibtex['Eprint'] = last_bibtex['Eprint']
|
||||
bibtex['URL'] = last_bibtex['URL']
|
||||
for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
|
||||
bibtex[i] = last_bibtex[i]
|
||||
if last_bibtex['eprint'] != arxiv_id:
|
||||
# TODO: Check that not already imported
|
||||
return last_bibtex
|
||||
else:
|
||||
return False
|
||||
|
33
main.py
33
main.py
@ -22,7 +22,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
|
||||
def checkBibtex(filename, bibtex_string):
|
||||
print("The bibtex entry found for "+filename+" is:")
|
||||
|
||||
bibtex = BibTexParser(bibtex_string.encode('utf-8'))
|
||||
bibtex = BibTexParser(bibtex_string)
|
||||
bibtex = bibtex.get_entry_dict()
|
||||
bibtex = bibtex[bibtex.keys()[0]]
|
||||
print(bibtex_string)
|
||||
@ -35,11 +35,11 @@ def checkBibtex(filename, bibtex_string):
|
||||
|
||||
while check.lower() == 'n':
|
||||
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
|
||||
tmpfile.write(bibtex_string.encode('utf-8'))
|
||||
tmpfile.write(bibtex_string)
|
||||
tmpfile.flush()
|
||||
subprocess.call([EDITOR, tmpfile.name])
|
||||
tmpfile.seek(0)
|
||||
bibtex = BibTexParser(tmpfile.read().encode('utf-8')+"\n")
|
||||
bibtex = BibTexParser(tmpfile.read()+"\n")
|
||||
|
||||
bibtex = bibtex.get_entry_dict()
|
||||
try:
|
||||
@ -135,7 +135,7 @@ def addFile(src, filetype, manual):
|
||||
else:
|
||||
bibtex = ''
|
||||
|
||||
bibtex = BibTexParser(bibtex.encode('utf-8'))
|
||||
bibtex = BibTexParser(bibtex)
|
||||
bibtex = bibtex.get_entry_dict()
|
||||
if len(bibtex) > 0:
|
||||
bibtex_name = bibtex.keys()[0]
|
||||
@ -224,7 +224,7 @@ def editEntry(entry, file_id='both'):
|
||||
|
||||
try:
|
||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||
index = BibTexParser(fh.read().encode('utf-8'))
|
||||
index = BibTexParser(fh.read())
|
||||
index = index.get_entry_dict()
|
||||
except:
|
||||
tools.warning("Unable to open index file.")
|
||||
@ -256,7 +256,7 @@ def downloadFile(url, filetype, manual):
|
||||
def openFile(ident):
|
||||
try:
|
||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||
bibtex = BibTexParser(fh.read().encode('utf-8'))
|
||||
bibtex = BibTexParser(fh.read())
|
||||
bibtex = bibtex.get_entry_dict()
|
||||
except:
|
||||
tools.warning("Unable to open index file.")
|
||||
@ -360,12 +360,27 @@ def resync():
|
||||
" but could not delete it.")
|
||||
|
||||
|
||||
def update(entries):
|
||||
def update(entry):
|
||||
update = backend.updateArXiv(entry)
|
||||
if update is not False:
|
||||
print("New version found for "+entry)
|
||||
print("Downloaded latest version "+update['Eprint'])
|
||||
editEntry(update['file'], 'file')
|
||||
print("\t Title: "+update['title'])
|
||||
confirm = tools.rawInput("Download it ? [Y/n] ")
|
||||
if confirm.lower() == 'n':
|
||||
return
|
||||
new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'],
|
||||
'article', False)
|
||||
if new_name is not False:
|
||||
print(update['eprint']+" successfully imported as "+new_name)
|
||||
else:
|
||||
tools.warning("An error occurred while downloading "+url)
|
||||
confirm = tools.rawInput("Delete previous version ? [y/N] ")
|
||||
if confirm.lower() == 'y':
|
||||
if not backend.deleteId(entry):
|
||||
if not backend.deleteFile(entry):
|
||||
tools.warning("Unable to remove previous version.")
|
||||
return
|
||||
print("Previous version successfully deleted.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -15,5 +15,6 @@ proxies = [
|
||||
# %Y = published year
|
||||
# %t = title
|
||||
# %a = authors
|
||||
format_articles = "%f_%l-%j-%Y"
|
||||
# %v = arXiv version
|
||||
format_articles = "%f_%l-%j-%Y-%v"
|
||||
format_books = "%a-%t"
|
||||
|
Loading…
Reference in New Issue
Block a user