Update arXiv papers
This commit is contained in:
parent
558946f48d
commit
b2488e5707
12
README.md
12
README.md
@ -48,12 +48,12 @@ Should be almost working and usable now, although still to be considered as **ex
|
|||||||
* Resync
|
* Resync
|
||||||
* working
|
* working
|
||||||
* Update
|
* Update
|
||||||
* Testing
|
* working
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
|
* Clone this git repository where you want : `git clone https://github.com/Phyks/BMC`
|
||||||
* Install `requesocks`, `PyPDF2` and `isbntools` _via_ Pypi
|
* Install `requesocks`, `bibtexparser` (https://github.com/sciunto/python-bibtexparser), `PyPDF2` and `isbntools` _via_ Pypi
|
||||||
* Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want
|
* Install `pdftotext` (provided by Xpdf) and `djvulibre` _via_ your package manager the way you want
|
||||||
* Copy `params.py.example` to `params.py` and customize it to fit your needs
|
* Copy `params.py.example` to `params.py` and customize it to fit your needs
|
||||||
|
|
||||||
@ -133,18 +133,12 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
|
|||||||
65. Look for published version in arXiv
|
65. Look for published version in arXiv
|
||||||
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
|
70. No DOI for HAL => metadata with SOAP API… don't want to handle it for now :/
|
||||||
80. Search engine
|
80. Search engine
|
||||||
100. UTF-8 ?
|
|
||||||
200. Webserver interface ? GUI ? (not likely for now…)
|
200. Webserver interface ? GUI ? (not likely for now…)
|
||||||
Keep multiple versions of papers
|
Keep multiple versions of papers
|
||||||
|
Check stored versions when updating arxiv papers
|
||||||
Export of bibtex
|
Export of bibtex
|
||||||
Tree à la docear ?
|
Tree à la docear ?
|
||||||
|
|
||||||
## Issues ?
|
|
||||||
|
|
||||||
* Multiplication of {{}} => solved in bibtexparser
|
|
||||||
* UTF-8 and bibtexparser => solved upstream in bibtexparser
|
|
||||||
===> TODO : update bibtexparser when available in pip
|
|
||||||
|
|
||||||
## Thanks
|
## Thanks
|
||||||
|
|
||||||
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module
|
* Nathan Grigg for his [arxiv2bib](https://pypi.python.org/pypi/arxiv2bib/1.0.5#downloads) python module
|
||||||
|
29
backend.py
29
backend.py
@ -36,6 +36,12 @@ def getNewName(src, bibtex, tag=''):
|
|||||||
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
|
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
|
||||||
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
|
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
|
||||||
for i in authors]))
|
for i in authors]))
|
||||||
|
if('archiveprefix' not in bibtex or
|
||||||
|
'arXiv' not in bibtex['archiveprefix']):
|
||||||
|
new_name = new_name.replace("%v",
|
||||||
|
bibtex[eprint][bibtex['eprint'].rfind('v'):])
|
||||||
|
else:
|
||||||
|
new_name = new_name.replace("%v", '')
|
||||||
|
|
||||||
if tag == '':
|
if tag == '':
|
||||||
new_name = (params.folder + tools.slugify(new_name) +
|
new_name = (params.folder + tools.slugify(new_name) +
|
||||||
@ -187,7 +193,7 @@ def diffFilesIndex():
|
|||||||
files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ]
|
files = [ i for i in files if tools.getExtension(i) in ['.pdf', '.djvu'] ]
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||||
index = BibTexParser(fh.read().encode('utf-8'))
|
index = BibTexParser(fh.read())
|
||||||
index_diff = index.get_entry_dict()
|
index_diff = index.get_entry_dict()
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
@ -213,7 +219,7 @@ def getBibtex(entry, file_id='both'):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||||
bibtex = BibTexParser(fh.read().encode('utf-8'))
|
bibtex = BibTexParser(fh.read())
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
@ -251,28 +257,23 @@ def updateArXiv(entry):
|
|||||||
"""Look for new versions of arXiv entry `entry`
|
"""Look for new versions of arXiv entry `entry`
|
||||||
|
|
||||||
Returns False if no new versions or not an arXiv entry,
|
Returns False if no new versions or not an arXiv entry,
|
||||||
Updates the file and returns the new bibtex otherwise.
|
Returns the new bibtex otherwise.
|
||||||
"""
|
"""
|
||||||
bibtex = getBibtex(entry)
|
bibtex = getBibtex(entry)
|
||||||
# Check arXiv
|
# Check arXiv
|
||||||
if('ArchivePrefix' not in bibtex and
|
if('archiveprefix' not in bibtex or
|
||||||
'arxiv' not in bibtex['ArchivePrefix']):
|
'arXiv' not in bibtex['archiveprefix']):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
arxiv_id = bibtex['Eprint']
|
arxiv_id = bibtex['eprint']
|
||||||
last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z',
|
last_bibtex = BibTexParser(fetcher.arXiv2Bib(re.sub(r'v\d+\Z',
|
||||||
'',
|
'',
|
||||||
arxiv_id)))
|
arxiv_id)))
|
||||||
last_bibtex = last_bibtex.get_entry_dict()
|
last_bibtex = last_bibtex.get_entry_dict()
|
||||||
|
last_bibtex = last_bibtex[last_bibtex.keys()[0]]
|
||||||
|
|
||||||
if last_bibtex['Eprint'] != arxiv_id:
|
if last_bibtex['eprint'] != arxiv_id:
|
||||||
# New version available
|
# TODO: Check that not already imported
|
||||||
with open(bibtex['file'], 'w+') as fh:
|
|
||||||
fh.write(fetcher.download(last_bibtex['Url']))
|
|
||||||
bibtex['Eprint'] = last_bibtex['Eprint']
|
|
||||||
bibtex['URL'] = last_bibtex['URL']
|
|
||||||
for i in [j for j in last_bibtex.keys() if j not in bibtex.keys()]:
|
|
||||||
bibtex[i] = last_bibtex[i]
|
|
||||||
return last_bibtex
|
return last_bibtex
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
33
main.py
33
main.py
@ -22,7 +22,7 @@ EDITOR = os.environ.get('EDITOR') if os.environ.get('EDITOR') else 'vim'
|
|||||||
def checkBibtex(filename, bibtex_string):
|
def checkBibtex(filename, bibtex_string):
|
||||||
print("The bibtex entry found for "+filename+" is:")
|
print("The bibtex entry found for "+filename+" is:")
|
||||||
|
|
||||||
bibtex = BibTexParser(bibtex_string.encode('utf-8'))
|
bibtex = BibTexParser(bibtex_string)
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
bibtex = bibtex[bibtex.keys()[0]]
|
bibtex = bibtex[bibtex.keys()[0]]
|
||||||
print(bibtex_string)
|
print(bibtex_string)
|
||||||
@ -35,11 +35,11 @@ def checkBibtex(filename, bibtex_string):
|
|||||||
|
|
||||||
while check.lower() == 'n':
|
while check.lower() == 'n':
|
||||||
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
|
with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
|
||||||
tmpfile.write(bibtex_string.encode('utf-8'))
|
tmpfile.write(bibtex_string)
|
||||||
tmpfile.flush()
|
tmpfile.flush()
|
||||||
subprocess.call([EDITOR, tmpfile.name])
|
subprocess.call([EDITOR, tmpfile.name])
|
||||||
tmpfile.seek(0)
|
tmpfile.seek(0)
|
||||||
bibtex = BibTexParser(tmpfile.read().encode('utf-8')+"\n")
|
bibtex = BibTexParser(tmpfile.read()+"\n")
|
||||||
|
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
try:
|
try:
|
||||||
@ -135,7 +135,7 @@ def addFile(src, filetype, manual):
|
|||||||
else:
|
else:
|
||||||
bibtex = ''
|
bibtex = ''
|
||||||
|
|
||||||
bibtex = BibTexParser(bibtex.encode('utf-8'))
|
bibtex = BibTexParser(bibtex)
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
if len(bibtex) > 0:
|
if len(bibtex) > 0:
|
||||||
bibtex_name = bibtex.keys()[0]
|
bibtex_name = bibtex.keys()[0]
|
||||||
@ -224,7 +224,7 @@ def editEntry(entry, file_id='both'):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||||
index = BibTexParser(fh.read().encode('utf-8'))
|
index = BibTexParser(fh.read())
|
||||||
index = index.get_entry_dict()
|
index = index.get_entry_dict()
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
@ -256,7 +256,7 @@ def downloadFile(url, filetype, manual):
|
|||||||
def openFile(ident):
|
def openFile(ident):
|
||||||
try:
|
try:
|
||||||
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
with open(params.folder+'index.bib', 'r', encoding='utf-8') as fh:
|
||||||
bibtex = BibTexParser(fh.read().encode('utf-8'))
|
bibtex = BibTexParser(fh.read())
|
||||||
bibtex = bibtex.get_entry_dict()
|
bibtex = bibtex.get_entry_dict()
|
||||||
except:
|
except:
|
||||||
tools.warning("Unable to open index file.")
|
tools.warning("Unable to open index file.")
|
||||||
@ -360,12 +360,27 @@ def resync():
|
|||||||
" but could not delete it.")
|
" but could not delete it.")
|
||||||
|
|
||||||
|
|
||||||
def update(entries):
|
def update(entry):
|
||||||
update = backend.updateArXiv(entry)
|
update = backend.updateArXiv(entry)
|
||||||
if update is not False:
|
if update is not False:
|
||||||
print("New version found for "+entry)
|
print("New version found for "+entry)
|
||||||
print("Downloaded latest version "+update['Eprint'])
|
print("\t Title: "+update['title'])
|
||||||
editEntry(update['file'], 'file')
|
confirm = tools.rawInput("Download it ? [Y/n] ")
|
||||||
|
if confirm.lower() == 'n':
|
||||||
|
return
|
||||||
|
new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'],
|
||||||
|
'article', False)
|
||||||
|
if new_name is not False:
|
||||||
|
print(update['eprint']+" successfully imported as "+new_name)
|
||||||
|
else:
|
||||||
|
tools.warning("An error occurred while downloading "+url)
|
||||||
|
confirm = tools.rawInput("Delete previous version ? [y/N] ")
|
||||||
|
if confirm.lower() == 'y':
|
||||||
|
if not backend.deleteId(entry):
|
||||||
|
if not backend.deleteFile(entry):
|
||||||
|
tools.warning("Unable to remove previous version.")
|
||||||
|
return
|
||||||
|
print("Previous version successfully deleted.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -15,5 +15,6 @@ proxies = [
|
|||||||
# %Y = published year
|
# %Y = published year
|
||||||
# %t = title
|
# %t = title
|
||||||
# %a = authors
|
# %a = authors
|
||||||
format_articles = "%f_%l-%j-%Y"
|
# %v = arXiv version
|
||||||
|
format_articles = "%f_%l-%j-%Y-%v"
|
||||||
format_books = "%a-%t"
|
format_books = "%a-%t"
|
||||||
|
Loading…
Reference in New Issue
Block a user