License + Consolidating fetcher.py

This commit is contained in:
Phyks 2014-05-26 16:12:21 +02:00
parent 845f70027a
commit 5a8ea0750f
8 changed files with 64 additions and 19 deletions

View File

@ -110,7 +110,7 @@ The resync option will check that all bibtex entries have a corresponding file a
## License ## License
All the source code I wrote is under a `no-alcoohol beer-ware license`. All functions that I didn't write myself are under the original license and their origin is specified in the function itself. All the source code I wrote is under a `no-alcohol beer-ware license`. All functions that I didn't write myself are under the original license and their origin is specified in the function itself.
``` ```
* -------------------------------------------------------------------------------- * --------------------------------------------------------------------------------
* "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42): * "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
@ -138,6 +138,7 @@ Here are some sources of inspirations for this project :
A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns. A list of ideas and TODO. Don't hesitate to give feedback on the ones you really want or to propose your owns.
60. Unittest
70. Export of bibtex 70. Export of bibtex
80. Search engine 80. Search engine
85. Anti-duplicate ? 85. Anti-duplicate ?

0
arxiv2bib.py Executable file → Normal file
View File

View File

@ -1,5 +1,14 @@
#!/usr/bin/env python2 # -*- coding: utf8 -*-
# coding=utf8 # -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
import os import os
import re import re

19
bmc.py
View File

@ -87,14 +87,19 @@ def addFile(src, filetype, manual, autoconfirm, tag):
isbn = False isbn = False
if not manual: if not manual:
if filetype == 'article' or filetype is None: try:
doi = fetcher.findDOI(src) if filetype == 'article' or filetype is None:
if doi is False and (filetype == 'article' or filetype is None): doi = fetcher.findDOI(src)
arxiv = fetcher.findArXivId(src) if doi is False and (filetype == 'article' or filetype is None):
arxiv = fetcher.findArXivId(src)
if filetype == 'book' or (doi is False and arxiv is False and if filetype == 'book' or (doi is False and arxiv is False and
filetype is None): filetype is None):
isbn = fetcher.findISBN(src) isbn = fetcher.findISBN(src)
except KeyboardInterrupt:
doi = False
arxiv = False
isbn = False
if doi is False and isbn is False and arxiv is False: if doi is False and isbn is False and arxiv is False:
if filetype is None: if filetype is None:

View File

@ -1,5 +1,14 @@
#!/usr/bin/env python2 # -*- coding: utf8 -*-
# coding=utf8 # -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
import isbntools import isbntools
import re import re
@ -55,10 +64,10 @@ def download(url):
return False return False
isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])", #isbn_re = re.compile(r"isbn (([0-9]{3}[ -])?[0-9][ -][0-9]{2}[ -][0-9]{6}[ -][0-9])",
isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
re.IGNORECASE) re.IGNORECASE)
def findISBN(src): def findISBN(src):
"""Search for a valid ISBN in src. """Search for a valid ISBN in src.
@ -100,7 +109,10 @@ def findISBN(src):
def isbn2Bib(isbn): def isbn2Bib(isbn):
"""Tries to get bibtex entry from an ISBN number""" """Tries to get bibtex entry from an ISBN number"""
# Default merges results from worldcat.org and google books # Default merges results from worldcat.org and google books
return fmtbib('bibtex', isbntools.meta(isbn, 'default')) try:
return fmtbib('bibtex', isbntools.meta(isbn, 'default'))
except:
return ''
doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE) doi_re = re.compile('(?<=doi)/?:?\s?[0-9\.]{7}/\S*[0-9]', re.IGNORECASE)
@ -129,8 +141,9 @@ def findDOI(src):
else: else:
return False return False
extractfull = ''
while totext.poll() is None: while totext.poll() is None:
extractfull = totext.stdout.readline() extractfull += totext.stdout.readline().strip()
extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-')) extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-'))
if not extractDOI: if not extractDOI:
# PNAS fix # PNAS fix
@ -214,8 +227,9 @@ def findArXivId(src):
else: else:
return False return False
extractfull = ''
while totext.poll() is None: while totext.poll() is None:
extractfull = totext.stdout.readline() extractfull += totext.stdout.readline().strip()
extractID = arXiv_re.search(extractfull) extractID = arXiv_re.search(extractfull)
if extractID: if extractID:
totext.terminate() totext.terminate()
@ -250,7 +264,7 @@ def arXiv2Bib(arxiv):
except: except:
pass pass
return tools.parsed2Bibtex(fetched_bibtex) return tools.parsed2Bibtex(fetched_bibtex)
return False return ''
HAL_re = re.compile(r'(hal-\d{8}), version (\d+)') HAL_re = re.compile(r'(hal-\d{8}), version (\d+)')

View File

@ -1,3 +1,4 @@
# -*- coding: utf8 -*-
# The folder in which the papers should be stored # The folder in which the papers should be stored
# /!\ Keep the trailing slash /!\ # /!\ Keep the trailing slash /!\
folder = "/home/phyks/Papers/" folder = "/home/phyks/Papers/"

View File

@ -1,3 +1,4 @@
# -*- coding: utf8 -*-
"""Search query parser """Search query parser
Modified by Phyks, 2014-05-18. Original source code is here: Modified by Phyks, 2014-05-18. Original source code is here:
@ -122,7 +123,11 @@ class SearchQueryParser:
operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
).setResultsName("or") | operatorAnd) ).setResultsName("or") | operatorAnd)
return operatorOr.parseString operatorQ = Forward()
operatorQ << Group(operatorOr + Suppress('=') +
operatorOr).setResultsName('field')
return operatorQ.parseString
def evaluateAnd(self, argument): def evaluateAnd(self, argument):
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))

View File

@ -1,4 +1,14 @@
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
# -----------------------------------------------------------------------------
# "THE NO-ALCOHOL BEER-WARE LICENSE" (Revision 42):
# Phyks (webmaster@phyks.me) wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff (and you can also do whatever
# you want with this stuff without retaining it, but that's not cool...). If we
# meet some day, and you think this stuff is worth it, you can buy me a
# <del>beer</del> soda in return.
# Phyks
# -----------------------------------------------------------------------------
from __future__ import print_function from __future__ import print_function
import os import os