Continue weboob wrapper

* Export full absolute URLs in resulting JSON.
* Export session cookies in resulting JSON, to download required files
on Cozy side.
* Add comments in the code.
This commit is contained in:
Lucas Verney 2016-09-30 05:03:09 +02:00
parent 697edaefa3
commit 6fb8a24e48
7 changed files with 129 additions and 45 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
*.pyc *.pyc
konnectors.json* konnectors.json*
out.json

6
TODO
View File

@ -1,6 +0,0 @@
* Bills vs Details?
* Update modules?
* amazon.com is buggy
* LDLC is out of date
* Bouygues is out of date

View File

@ -1,44 +1,76 @@
"""
This module contains all the conversion functions associated to the Document
capability.
"""
from base import clean_object from base import clean_object
def to_cozy(document): def to_cozy(document):
""" """
Export a CapDocument object to JSON, to pass it to Cozy instance. Export a CapDocument object to a JSON-serializable dict, to pass it to Cozy
instance.
Args:
document: The CapDocument object to handle.
Returns: A JSON-serializable dict for the input object.
""" """
# Get the BASEURL to generate absolute URLs
base_url = document.browser.BASEURL
# Fetch the list of subscriptions # Fetch the list of subscriptions
try: try:
subscriptions = list(document.iter_subscription()) subscriptions = list(document.iter_subscription())
except NotImplementedError: except NotImplementedError:
subscriptions = None subscriptions = None
# Fetch and clean the list of bills # Fetch and clean the list of bills
try: try:
assert(subscriptions) assert subscriptions
bills = { bills = {
subscription.id: [ subscription.id: [
clean_object(bill) for bill in document.iter_documents(subscription) clean_object(bill, base_url=base_url)
for bill in document.iter_documents(subscription)
] ]
for subscription in subscriptions for subscription in subscriptions
} }
except (NotImplementedError, AssertionError): except (NotImplementedError, AssertionError):
bills = None bills = None
# Fetch and clean the list of history bills (detailed consumption)
# Fetch and clean the list of details of the subscription (detailed
# consumption)
# TODO: What is this?
try: try:
assert(subscriptions) assert subscriptions
detailed_bills = { detailed_bills = {
subscription.id: [ subscription.id: [
clean_object(detailed_bill) clean_object(detailed_bill, base_url=base_url)
for detailed_bill in document.get_details(subscription) for detailed_bill in document.get_details(subscription)
] ]
for subscription in subscriptions for subscription in subscriptions
} }
except (NotImplementedError, AssertionError): except (NotImplementedError, AssertionError):
detailed_bills = None detailed_bills = None
# Fetch and clean the list of history bills
try:
assert subscriptions
history_bills = {
subscription.id: [
clean_object(history_bill, base_url=base_url)
for history_bill in
document.iter_documents_history(subscription)
]
for subscription in subscriptions
}
except (NotImplementedError, AssertionError):
history_bills = None
# Return a formatted dict with all the infos # Return a formatted dict with all the infos
ret = { return {
"subscriptions": [ # Clean the subscriptions list "subscriptions": [ # Clean the subscriptions list
clean_object(subscription) for subscription in subscriptions clean_object(subscription, base_url=base_url)
for subscription in subscriptions
], ],
"bills": bills, "bills": bills,
"detailed_bills": detailed_bills "detailed_bills": detailed_bills,
"history_bills": history_bills
} }
return ret

View File

@ -1,14 +1,29 @@
"""
Common conversion functions for all the available capabilities.
"""
from weboob.capabilities.base import empty from weboob.capabilities.base import empty
def clean_object(o): def clean_object(obj, base_url=None):
""" """
Returns a JSON-serializable dict from a Weboob object. Helper to get nice JSON-serializable objects from the fields of any Weboob
object deriving from BaseObject.
Args:
obj: The object to handle.
base_url: An optional base url to generate full URLs in output dict.
Returns:
a JSON-serializable dict for the input object.
""" """
o = o.to_dict() # Convert object to a dict of its fields
for k, v in o.items(): obj = obj.to_dict()
# Clean the various fields to be JSON-serializable
for k, v in obj.items():
if empty(v): if empty(v):
# Replace empty values by None, avoid "NotLoaded is not # Replace empty values by None, avoid "NotLoaded is not
# serializable" error # serializable" error
o[k] = None obj[k] = None
return o elif k == "url" and base_url:
# Render full absolute URLs
obj[k] = base_url + v
return obj

View File

@ -1,20 +1,25 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
""" """
TODO Wrapper script around Weboob to be able to use it in combination with Cozy +
Konnectors easily.
""" """
from __future__ import print_function from __future__ import print_function
import collections
import getpass import getpass
import importlib import importlib
import json import json
import logging import logging
import sys import sys
from requests.utils import dict_from_cookiejar
from weboob.core import Weboob from weboob.core import Weboob
from tools.jsonwriter import pretty_json from tools.jsonwriter import pretty_json
from tools.progress import DummyProgress
# Dynamically load capabilities conversion modules # Dynamically load capabilities conversion modules
# Dynamic loading is required to be able to call them programatically.
CAPABILITIES_CONVERSION_MODULES = importlib.import_module("capabilities") CAPABILITIES_CONVERSION_MODULES = importlib.import_module("capabilities")
@ -30,7 +35,10 @@ class WeboobProxy(object):
@staticmethod @staticmethod
def version(): def version():
""" """
Return Weboob version. Get Weboob version.
Returns:
the version of installed Weboob.
""" """
return Weboob.VERSION return Weboob.VERSION
@ -39,31 +47,32 @@ class WeboobProxy(object):
""" """
Ensure modules are up to date. Ensure modules are up to date.
""" """
return Weboob().update() Weboob().update()
def __init__(self, modulename, parameters): def __init__(self, modulename, parameters):
""" """
Create a Weboob handle and try to load the modules. Create a Weboob handle and try to load the modules.
Args:
modulename: the name of the weboob module to use.
parameters: A dict of parameters to pass the weboob module.
""" """
# Get a weboob instance
self.weboob = Weboob() self.weboob = Weboob()
# Careful: this is extracted from weboob's code.
# Install the module if necessary and hide the progress. # Install the module if necessary and hide the progress.
class DummyProgress:
def progress(self, a, b):
pass
repositories = self.weboob.repositories repositories = self.weboob.repositories
minfo = repositories.get_module_info(modulename) minfo = repositories.get_module_info(modulename)
if minfo is not None and not minfo.is_installed(): if minfo is not None and not minfo.is_installed():
repositories.install(minfo, progress=DummyProgress()) repositories.install(minfo, progress=DummyProgress())
# Build a backend for this module
# Calls the backend.
self.backend = self.weboob.build_backend(modulename, parameters) self.backend = self.weboob.build_backend(modulename, parameters)
def get_backend(self): def get_backend(self):
""" """
Get the built backend. Backend getter.
Returns:
the built backend.
""" """
return self.backend return self.backend
@ -71,12 +80,16 @@ class WeboobProxy(object):
def main(used_modules): def main(used_modules):
""" """
Main code Main code
Args:
used_modules: A list of modules description dicts.
Returns: A dict of all the results, ready to be JSON serialized.
""" """
# Update all available modules # Update all available modules
# TODO: WeboobProxy.update() # TODO: WeboobProxy.update()
# Fetch data for the specified modules # Fetch data for the specified modules
fetched_data = {} fetched_data = collections.defaultdict(dict)
logging.info("Start fetching from konnectors.") logging.info("Start fetching from konnectors.")
for module in used_modules: for module in used_modules:
logging.info("Fetching data from module %s.", module["id"]) logging.info("Fetching data from module %s.", module["id"])
@ -85,12 +98,10 @@ def main(used_modules):
module["name"], module["name"],
module["parameters"] module["parameters"]
).get_backend() ).get_backend()
# List all supported capabilities for capability in backend.iter_caps(): # Supported capabilities
for capability in backend.iter_caps(): # Get capability class name for dynamic import of converter
# Convert capability class to string name
capability = capability.__name__ capability = capability.__name__
try: try:
# Get conversion function for this capability
fetching_function = ( fetching_function = (
getattr( getattr(
getattr( getattr(
@ -101,25 +112,32 @@ def main(used_modules):
) )
) )
logging.info("Fetching capability %s.", capability) logging.info("Fetching capability %s.", capability)
# Fetch data and store them # Fetch data and merge them with the ones from other
# TODO: Ensure there is no overwrite # capabilities
fetched_data[module["id"]] = fetching_function(backend) fetched_data[module["id"]].update(fetching_function(backend))
except AttributeError: except AttributeError:
# In case the converter does not exist on our side
logging.error("%s capability is not implemented.", capability) logging.error("%s capability is not implemented.", capability)
continue continue
# Store session cookie of this module, to fetch files afterwards
fetched_data[module["id"]]["cookies"] = dict_from_cookiejar(
backend.browser.session.cookies
)
logging.info("Done fetching from konnectors.") logging.info("Done fetching from konnectors.")
return fetched_data return fetched_data
if __name__ == '__main__': if __name__ == '__main__':
try: try:
# Dev: Set logging level and format
logging.basicConfig( logging.basicConfig(
format='%(levelname)s: %(message)s', format='%(levelname)s: %(message)s',
level=logging.INFO level=logging.INFO
) )
try: try:
# Fetch konnectors JSON description from stdin
konnectors = json.load(sys.stdin) konnectors = json.load(sys.stdin)
# Handle missing passwords using getpass # Dev: Handle missing passwords using getpass
for module in range(len(konnectors)): for module in range(len(konnectors)):
for param in konnectors[module]["parameters"]: for param in konnectors[module]["parameters"]:
if not konnectors[module]["parameters"][param]: if not konnectors[module]["parameters"][param]:
@ -130,6 +148,7 @@ if __name__ == '__main__':
logging.error("Invalid JSON input.") logging.error("Invalid JSON input.")
sys.exit(-1) sys.exit(-1)
# Output the JSON formatted results on stdout
print( print(
pretty_json( pretty_json(
main(konnectors) main(konnectors)

View File

@ -1,3 +1,7 @@
"""
This module implements a custom JSON writer to be able to serialize data
returned by Weboob and pretty print the output JSON.
"""
import json import json
from datetime import date, datetime from datetime import date, datetime
@ -18,10 +22,15 @@ class CustomJSONEncoder(json.JSONEncoder):
return json.JSONEncoder.default(self, o) return json.JSONEncoder.default(self, o)
def pretty_json(foo): def pretty_json(obj):
""" """
Pretty printing of JSON output, using the custom JSONEncoder. Pretty printing of JSON output, using the custom JSONEncoder.
Args:
obj: the object to JSON serialize.
Returns:
the pretty printed JSON string.
""" """
return json.dumps(foo, sort_keys=True, return json.dumps(obj, sort_keys=True,
indent=4, separators=(',', ': '), indent=4, separators=(',', ': '),
cls=CustomJSONEncoder) cls=CustomJSONEncoder)

14
tools/progress.py Normal file
View File

@ -0,0 +1,14 @@
"""
Miscellaneous progress functions.
"""
class DummyProgress:
"""
Dummy progress bar, to disable it.
"""
def progress(self, *args):
"""
Progress function. Do nothing.
"""
pass