License + Consolidating fetcher.py

This commit is contained in:
Phyks 2014-05-26 16:12:21 +02:00
parent 845f70027a
commit 5a8ea0750f
8 changed files with 64 additions and 19 deletions

View File

@ -110,7 +110,7 @@ The resync option will check that all bibtex entries have a corresponding file a
## License
All the source code I wrote is under a `no-alcoohol beer-ware license`. All functions that I didn't write myself are under the original license and their origin is specified in the function itself.
All the source code I wrote is under a `no-alcohol beer-ware license`. All functions that I didn't write myself are under the original license and their origin is specified in the function itself.
```
* --------------------------------------------------------------------------------
* "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
@ -138,6 +138,7 @@ Here are some sources of inspirations for this project :
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
60. Unittest
70. Export of bibtex
80. Search engine
85. Anti-duplicate ?

0
arxiv2bib.py Executable file → Normal file
View File

View File

@ -1,5 +1,14 @@
#!/usr/bin/env python2
# coding=utf8
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
import os
import re

19
bmc.py
View File

@ -87,14 +87,19 @@ def addFile(src, filetype, manual, autoconfirm, tag):
isbn = False
if not manual:
if filetype == 'article' or filetype is None:
doi = fetcher.findDOI(src)
if doi is False and (filetype == 'article' or filetype is None):
arxiv = fetcher.findArXivId(src)
try:
if filetype == 'article' or filetype is None:
doi = fetcher.findDOI(src)
if doi is False and (filetype == 'article' or filetype is None):
arxiv = fetcher.findArXivId(src)
if filetype == 'book' or (doi is False and arxiv is False and
filetype is None):
isbn = fetcher.findISBN(src)
if filetype == 'book' or (doi is False and arxiv is False and
filetype is None):
isbn = fetcher.findISBN(src)
except KeyboardInterrupt:
doi = False
arxiv = False
isbn = False
if doi is False and isbn is False and arxiv is False:
if filetype is None:

View File

@ -1,5 +1,14 @@
#!/usr/bin/env python2
# coding=utf8
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
import isbntools
import re
@ -55,10 +64,10 @@ def download(url):
return False
isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
#isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
re.IGNORECASE)
def findISBN(src):
"""Search for a valid ISBN in src.
@ -100,7 +109,10 @@ def findISBN(src):
def isbn2Bib(isbn):
"""Tries to get bibtex entry from an ISBN number"""
# Default merges results from worldcat.org and google books
return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
try:
return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
except:
return ''
doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
@ -129,8 +141,9 @@ def findDOI(src):
else:
return False
extractfull = ''
while totext.poll() is None:
extractfull = totext.stdout.readline()
extractfull += totext.stdout.readline().strip()
extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-'))
if not extractDOI:
# PNAS fix
@ -214,8 +227,9 @@ def findArXivId(src):
else:
return False
extractfull = ''
while totext.poll() is None:
extractfull = totext.stdout.readline()
extractfull += totext.stdout.readline().strip()
extractID = arXiv_re.search(extractfull)
if extractID:
totext.terminate()
@ -250,7 +264,7 @@ def arXiv2Bib(arxiv):
except:
pass
return tools.parsed2Bibtex(fetched_bibtex)
return False
return ''
HAL_re = re.compile(r'(hal-\d{8}), version (\d+)')

View File

@ -1,3 +1,4 @@
# -*- coding: utf8 -*-
# The folder in which the papers should be stored
# /!\ Keep the trailing slash /!\
folder = "/home/phyks/Papers/"

View File

@ -1,3 +1,4 @@
# -*- coding: utf8 -*-
"""Search query parser
Modified by Phyks, 2014-05-18. Original source code is here:
@ -122,7 +123,11 @@ class SearchQueryParser:
operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
).setResultsName("or") | operatorAnd)
return operatorOr.parseString
operatorQ = Forward()
operatorQ << Group(operatorOr + Suppress('=') +
operatorOr).setResultsName('field')
return operatorQ.parseString
def evaluateAnd(self, argument):
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))

View File

@ -1,4 +1,14 @@
# -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import print_function
import os