Resync function. To be tested…

This commit is contained in:
Phyks 2014-05-01 00:45:31 +02:00
parent 741cde352e
commit 4eb2aeb9d8
3 changed files with 158 additions and 30 deletions

View File

@ -73,7 +73,9 @@ TODO
### Data storage
All your documents will be stored in the papers dir specified in `params.py`. All the bibtex entries will be added to the `index.bib` file. You should **not** add entries to this file (but you can edit existing entries without any problem), as this will break synchronization between documents in papers dir and the index. If you do so, you can rebuild the index fie with `./main.py rebuild`.
All your documents will be stored in the papers dir specified in `params.py`. All the bibtex entries will be added to the `index.bib` file. You should **not** add entries to this file (but you can edit existing entries without any problem), as this will break synchronization between documents in papers dir and the index. If you do so, you can resync the index file with `./main.py resync`.
The resync option will check that all bibtex entries have a corresponding file and all file have a corresponding bibtex entry. It will prompt you what to do for unmatched entries.
## License
@ -107,7 +109,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
10. Refactor
11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
12. Rebuild function
20. No DOI for arXiv / HAL
30. Parameter to disable remote search
40. Open file

View File

@ -2,6 +2,7 @@
# coding=utf8
import os
import re
import tools
import params
from bibtexparser.bparser import BibTexParser
@ -9,6 +10,35 @@ from bibtexparser.customization import homogeneize_latex_encoding
from bibtexparser.bwriter import bibtex as bibTexWriter
def getNewName(src, bibtex):
"""
Return the formatted name according to params for the given
bibtex entry
"""
authors = re.split(' and ', bibtex['author'])
if bibtex['type'] == 'article':
new_name = params.format_articles
try:
new_name = new_name.replace("%j", bibtex['journal'])
except:
pass
elif bibtex['type'] == 'book':
new_name = params.format_books
new_name = new_name.replace("%t", bibtex['title'])
try:
new_name = new_name.replace("%Y", bibtex['year'])
except:
pass
new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
for i in authors]))
new_name = params.folder+tools.slugify(new_name)+tools.getExtension(src)
def parsed2Bibtex(parsed):
"""Convert a single bibtex entry dict to bibtex string"""
bibtex = '@'+parsed['type']+'{'+parsed['id']+",\n"
@ -28,6 +58,19 @@ def bibtexAppend(data):
fh.write(parsed2Bibtex(data)+"\n")
def bibtexEdit(ident, modifs):
"""Update ident key in bibtex file, modifications are in modifs dict"""
with open(params.folder+'index.bib', 'r') as fh:
bibtex = BibTexParser(fh.read(),
customization=homogeneize_latex_encoding)
bibtex = bibtex.get_entry_dict()
for key in modifs.keys():
bibtex[ident][key] = modifs[key]
bibtexRewrite(bibtex)
def bibtexRewrite(data):
"""Rewrite the bibtex index file.
@ -55,8 +98,12 @@ def deleteId(ident):
except:
tools.warning("Unable to delete file associated to id "+ident+" : " +
bibtex[ident]['file'])
del(bibtex[ident])
bibtexRewrite(bibtex)
try:
del(bibtex[ident])
bibtexRewrite(bibtex)
except KeyError:
tools.warning("No associated bibtex entry in index for file " +
bibtex[ident]['file'])
return True
@ -76,7 +123,38 @@ def deleteFile(filename):
except:
tools.warning("Unable to delete file associated to id " +
key+" : "+bibtex[key]['file'])
del(bibtex[key])
try:
del(bibtex[key])
except KeyError:
tools.warning("No associated bibtex entry in index for file " +
bibtex[key]['file'])
if found:
bibtexRewrite(bibtex)
return found
def diffFilesIndex():
"""Compute differences between Bibtex index and PDF files
Returns a dict with bibtex entry:
* full bibtex entry with file='' if file is not found
* only file entry if file with missing bibtex entry
"""
files = tools.listDir(params.folder)
with open(params.folder+'index.bib', 'r') as fh:
index = BibTexParser(fh.read(),
customization=homogeneize_latex_encoding)
index_diff = index.get_entry_dict()
for key in index_diff.keys():
if index_diff[key]['file'] not in files:
index_diff[key]['file'] = ''
else:
files.remove(index_diff[key]['file'])
for filename in files:
index_diff[filename] = {'file': filename}
return index

99
main.py
View File

@ -2,7 +2,6 @@
# -*- coding: utf8 -*-
import os
import re
import shutil
import subprocess
import sys
@ -98,28 +97,7 @@ def addFile(src, filetype):
bibtex = checkBibtex(src, bibtex)
authors = re.split(' and ', bibtex['author'])
if bibtex['type'] == 'article':
new_name = params.format_articles
try:
new_name = new_name.replace("%j", bibtex['journal'])
except:
pass
elif bibtex['type'] == 'book':
new_name = params.format_books
new_name = new_name.replace("%t", bibtex['title'])
try:
new_name = new_name.replace("%Y", bibtex['year'])
except:
pass
new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
for i in authors]))
new_name = params.folder+tools.slugify(new_name)+tools.getExtension(src)
new_name = backend.getNewName(src, bibtex)
while os.path.exists(new_name):
tools.warning("file "+new_name+" already exists.")
@ -162,6 +140,72 @@ def downloadFile(url, filetype):
return False
def resync():
diff = backend.diffFilesIndex()
for entry in diff:
if entry['file'] == '':
print("Found entry in index without associated file.")
confirm = False
while not confirm:
filename = tools.rawInput("File to import for this entry " +
"(leave empty to delete the " +
"entry) ? ")
if filename == '':
break
else:
confirm = True
if 'doi' in entry.keys():
doi = fetcher.findDOI(filename)
if doi is not False and doi != entry['doi']:
confirm = tools.rawInput("Found DOI does not " +
"match bibtex entry " +
"DOI, continue anyway " +
"? [y/N]")
confirm = (confirm.lower() == 'y')
elif 'isbn' in entry.keys():
isbn = fetcher.findISBN(filename)
if isbn is not False and isbn != entry['isbn']:
confirm = tools.rawInput("Found ISBN does not " +
"match bibtex entry " +
"ISBN, continue anyway " +
"? [y/N]")
confirm = (confirm.lower() == 'y')
continue
if filename == '':
backend.deleteId(entry['id'])
else:
new_name = backend.getNewName(filename, entry)
try:
shutil.copy2(filename, new_name)
except IOError:
new_name = False
sys.exit("Unable to move file to library dir " +
params.folder+".")
backend.bibtexEdit(entry['id'], {'file': filename})
else:
print("Found file without any associated entry in index.")
action = ''
while action.lower() not in ['import', 'delete']:
action = tools.rawInput("What to do ? [import / delete] ")
action = action.lower()
if action == 'import':
tmp = tempfile.NamedTemporaryFile()
shutil.copy(entry['file'], tmp.name)
filetype = tools.getExtension(entry['file'])
try:
os.remove(entry['file'])
except:
tools.warning("Unable to delete file "+entry['file'])
if not addFile(tmp.name, filetype):
tools.warning("Unable to reimport file "+entry['file'])
tmp.close()
else:
backend.deleteFile(entry['file'])
print(entry['file'] + " removed from disk and " +
"index.")
if __name__ == '__main__':
try:
if len(sys.argv) < 2:
@ -217,7 +261,12 @@ if __name__ == '__main__':
elif sys.argv[1] == 'search':
raise Exception('TODO')
elif sys.argv[1] == 'rebuild':
raise Exception('TODO')
elif sys.argv[1] == 'resync':
if len(sys.argv) > 2 and sys.argv[2] == 'help':
sys.exit("Usage : " + sys.argv[0] + " resync")
confirm = tools.rawInput("Resync files and bibtex index ? [y/N] ")
if confirm.lower() == 'y':
resync()
except KeyboardInterrupt:
sys.exit()