diff --git a/.gitignore b/.gitignore index 29ad228..01bd1cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ *.pyc *.swp -konnectors.json* -out.json +konnectors.*.json* +*.json diff --git a/README.md b/README.md index 07bade9..abc24a2 100644 --- a/README.md +++ b/README.md @@ -33,19 +33,32 @@ Typical command-line usage for this script is: This script spawns a Bottle webserver, listening on `localhost:8080` (by default). -It has a single route, the index route, which supports `POST` method to send a -valid JSON string defining konnectors to be used in a `params` field. Typical -example to send it some content is: -```bash -curl -X POST --data "params=$(cat konnectors.json)" "http://localhost:8080/" -``` -where `konnectors.json` is a valid JSON file defining konnectors to be used. +It exposes a couple of routes: +* the `/fetch` route, which supports `POST` method to send a valid JSON string + defining konnectors to be used in a `params` field. Typical example to send + it some content is: -The server also exposes a `/list` endpoint, which will provide you a JSON dump -of all the available modules, their descriptions and the configuration options -you should provide them. + ```bash + curl -X POST --data "params=$(cat konnectors.json)" "http://localhost:8080/" + ``` + where `konnectors.json` is a valid JSON file defining konnectors to be used. + Downloaded files will be stored in a temporary directory, and their file URI + will be passed back in the output JSON. If you do not have a direct access + to the filesystem, you can use the `/retrieve` endpoint below to retrieve + such downloaded files through the network. +* the `/list` route, which will provide you a JSON dump of all the available + modules, their descriptions and the configuration options you should provide + them. + +* the `/retrieve` route, which supports `POST` method and a single `path` `POST` + parameter which is the path to the previously downloaded file to retrieve. + +**IMPORTANT:** Note this small webserver is **not** production ready and only +here as a proof of concept and to be used in a controlled development +environment. The `/retrieve` route will basically provide anyone to access any +file from your temp directory, which is a real security concern in production. Note: You can specify the host and port to listen on using the `COZYWEBOOB_HOST` and `COZYWEBOOB_PORT` environment variables. @@ -66,6 +79,8 @@ Available commands are: * `GET /list` to list all available modules. * `POST /fetch JSON_PARAMS` where `JSON_PARAMS` is an input JSON for module parameters. + Downloaded files will be stored in a temporary directory, and their file URI + will be passed back in the output JSON. * `exit` to quit the script and end the conversation. JSON responses are the same one as from the HTTP server script. It is @@ -107,8 +122,8 @@ map should have at the following three keys: of contents to fetch. Typically, you can pass `"fetch": { "CapDocument": ["bills"]}` to fetch only bills from the `CapDocuments` capability. You can also pass - `"download": { "CapDocument": ["someID"] }` to download a specific id (which - can be either type of fields in the `CapDocument` capability). + `"download": { "CapDocument": ["someID"] }` to download a specific document, + identified by its ID. If not provided, the default is to fetch only, and do not download anything. diff --git a/cozyweboob/capabilities/CapDocument.py b/cozyweboob/capabilities/CapDocument.py index 626f3d7..c6adae1 100644 --- a/cozyweboob/capabilities/CapDocument.py +++ b/cozyweboob/capabilities/CapDocument.py @@ -2,8 +2,10 @@ This module contains all the conversion functions associated to the Document capability. """ +import tempfile + from cozyweboob.capabilities.base import clean_object -from weboob.capabilities.bill import Bill +from weboob.capabilities.bill import Bill, DocumentNotFound def fetch_subscriptions(document): @@ -158,6 +160,42 @@ def fetch(document, fetch_actions): return (subscriptions, documents, bills, detailed_bills, history_bills) +def download(document, ids): + """ + Download all required documents from a CapDocument object. + + Args: + document: The CapDocument object to fetch from. + ids: A list of document IDs to download. + Returns: + A dict associating requested IDs with paths to downloaded files. None + if no ids are passed. + """ + if not ids: + # Do not do anything if no ids are passed + return None + + # Create a tmp directory to store downloaded items + tmp_dir = tempfile.mkdtemp(suffix='-tmp', prefix='cozyweboob-') + + # Download every requested document + downloaded_documents = {} + for doc_id in ids: + try: + downloaded_content = document.download_document(doc_id) + except DocumentNotFound: + downloaded_documents[doc_id] = None + continue + with tempfile.NamedTemporaryFile(mode="w+", + dir=tmp_dir, + delete=False) as tmp_file: + tmp_file.write(downloaded_content) + downloaded_documents[doc_id] = tmp_file.name + + # Return a dict associating requested IDs and downloaded filenames + return downloaded_documents + + def to_cozy(document, actions=None): """ Export a CapDocument object to a JSON-serializable dict, to pass it to Cozy @@ -176,13 +214,33 @@ def to_cozy(document, actions=None): base_url = document.browser.BASEURL # Handle fetch actions - if actions["fetch"] is True or "CapDocument" in actions["fetch"]: + if actions["fetch"] is False: + fetch_actions = [] + elif actions["fetch"] is True or "CapDocument" in actions["fetch"]: if actions["fetch"] is True: fetch_actions = actions["fetch"] else: fetch_actions = actions["fetch"]["CapDocument"] - subscriptions, documents, bills, detailed_bills, history_bills = fetch( - document, fetch_actions) + else: + fetch_actions = [] + # Force-fetch documents if download is set to True + if actions["download"] is True: + fetch_actions = fetch_actions + ["documents"] + # Fetch items + subscriptions, documents, bills, detailed_bills, history_bills = fetch( + document, fetch_actions) + + # Handle download actions + if actions["download"] is False: + downloaded_documents = None + elif actions["download"] is True or "CapDocument" in actions["download"]: + if actions["download"] is True: + download_ids = [doc.id for doc in documents] + else: + download_ids = actions["download"]["CapDocument"] + downloaded_documents = download(document, download_ids) + else: + downloaded_documents = None # Return a formatted dict with all the infos return { @@ -193,5 +251,6 @@ def to_cozy(document, actions=None): "bills": bills, "detailed_bills": detailed_bills, "documents": documents, - "history_bills": history_bills + "history_bills": history_bills, + "downloaded": downloaded_documents } diff --git a/server.py b/server.py index 8d5a985..889168d 100755 --- a/server.py +++ b/server.py @@ -4,13 +4,13 @@ HTTP server wrapper around weboob """ import logging import os +import tempfile -from bottle import post, request, response, route, run +from bottle import post, request, route, run, static_file from cozyweboob import main as cozyweboob from cozyweboob import WeboobProxy from cozyweboob.tools.env import is_in_debug_mode -from cozyweboob.tools.jsonwriter import pretty_json # Module specific logger logger = logging.getLogger(__name__) @@ -22,18 +22,22 @@ def fetch_view(): Fetch from weboob modules. """ params = request.forms.get("params") - response.content_type = "application/json" - return pretty_json(cozyweboob(params)) + return cozyweboob(params) -@post("/download") -def download_view(): +@post("/retrieve") +def retrieve_view(): """ - Download from weboob modules. + Retrieve a previously downloaded file from weboob modules. + + Note: Beware, this route is meant to be used in a controlled development + environment and can result in leakage of information from your temp + default directory. """ - params = request.forms.get("params") - response.content_type = "application/json" - # TODO return pretty_json(proxy.download(params)) + path = request.forms.get("path") + return static_file(path.replace(tempfile.gettempdir(), './'), + tempfile.gettempdir(), + download=True) @route("/list") @@ -42,8 +46,7 @@ def list_view(): List all available weboob modules and their configuration options. """ proxy = WeboobProxy() - response.content_type = "application/json" - return pretty_json(proxy.list_modules()) + return proxy.list_modules() def init():