From 5faf0eec151c9aa3630f55265f0b77c6b881318b Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Wed, 12 Oct 2016 16:07:42 -0400 Subject: [PATCH] Fix issue #5 Provide richer API for modules Provide a mechanism to download documents using Weboob modules abilities. If a user wants to download any document, it will be fetched in a temporary folder. The file URI will be passed back in the output JSON. If the users then have access to the server filesystem, they can directly get the requested document and remove temporary files when they are no longer needed. If the users do not have such access, they can use the `/retrieve` endpoint exposed by the webserver to retrieve the content of this file. Note that this endpoint is not designed for a production environment and might expose other sensitive content from your temporary directory. Note also that this endpoint will note delete the temporary file. Closes #5. --- .gitignore | 4 +- README.md | 39 ++++++++++----- cozyweboob/capabilities/CapDocument.py | 69 ++++++++++++++++++++++++-- server.py | 27 +++++----- 4 files changed, 108 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 29ad228..01bd1cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ *.pyc *.swp -konnectors.json* -out.json +konnectors.*.json* +*.json diff --git a/README.md b/README.md index 07bade9..abc24a2 100644 --- a/README.md +++ b/README.md @@ -33,19 +33,32 @@ Typical command-line usage for this script is: This script spawns a Bottle webserver, listening on `localhost:8080` (by default). -It has a single route, the index route, which supports `POST` method to send a -valid JSON string defining konnectors to be used in a `params` field. Typical -example to send it some content is: -```bash -curl -X POST --data "params=$(cat konnectors.json)" "http://localhost:8080/" -``` -where `konnectors.json` is a valid JSON file defining konnectors to be used. +It exposes a couple of routes: +* the `/fetch` route, which supports `POST` method to send a valid JSON string + defining konnectors to be used in a `params` field. Typical example to send + it some content is: -The server also exposes a `/list` endpoint, which will provide you a JSON dump -of all the available modules, their descriptions and the configuration options -you should provide them. + ```bash + curl -X POST --data "params=$(cat konnectors.json)" "http://localhost:8080/" + ``` + where `konnectors.json` is a valid JSON file defining konnectors to be used. + Downloaded files will be stored in a temporary directory, and their file URI + will be passed back in the output JSON. If you do not have a direct access + to the filesystem, you can use the `/retrieve` endpoint below to retrieve + such downloaded files through the network. +* the `/list` route, which will provide you a JSON dump of all the available + modules, their descriptions and the configuration options you should provide + them. + +* the `/retrieve` route, which supports `POST` method and a single `path` `POST` + parameter which is the path to the previously downloaded file to retrieve. + +**IMPORTANT:** Note this small webserver is **not** production ready and only +here as a proof of concept and to be used in a controlled development +environment. The `/retrieve` route will basically provide anyone to access any +file from your temp directory, which is a real security concern in production. Note: You can specify the host and port to listen on using the `COZYWEBOOB_HOST` and `COZYWEBOOB_PORT` environment variables. @@ -66,6 +79,8 @@ Available commands are: * `GET /list` to list all available modules. * `POST /fetch JSON_PARAMS` where `JSON_PARAMS` is an input JSON for module parameters. + Downloaded files will be stored in a temporary directory, and their file URI + will be passed back in the output JSON. * `exit` to quit the script and end the conversation. JSON responses are the same one as from the HTTP server script. It is @@ -107,8 +122,8 @@ map should have at the following three keys: of contents to fetch. Typically, you can pass `"fetch": { "CapDocument": ["bills"]}` to fetch only bills from the `CapDocuments` capability. You can also pass - `"download": { "CapDocument": ["someID"] }` to download a specific id (which - can be either type of fields in the `CapDocument` capability). + `"download": { "CapDocument": ["someID"] }` to download a specific document, + identified by its ID. If not provided, the default is to fetch only, and do not download anything. diff --git a/cozyweboob/capabilities/CapDocument.py b/cozyweboob/capabilities/CapDocument.py index 626f3d7..c6adae1 100644 --- a/cozyweboob/capabilities/CapDocument.py +++ b/cozyweboob/capabilities/CapDocument.py @@ -2,8 +2,10 @@ This module contains all the conversion functions associated to the Document capability. """ +import tempfile + from cozyweboob.capabilities.base import clean_object -from weboob.capabilities.bill import Bill +from weboob.capabilities.bill import Bill, DocumentNotFound def fetch_subscriptions(document): @@ -158,6 +160,42 @@ def fetch(document, fetch_actions): return (subscriptions, documents, bills, detailed_bills, history_bills) +def download(document, ids): + """ + Download all required documents from a CapDocument object. + + Args: + document: The CapDocument object to fetch from. + ids: A list of document IDs to download. + Returns: + A dict associating requested IDs with paths to downloaded files. None + if no ids are passed. + """ + if not ids: + # Do not do anything if no ids are passed + return None + + # Create a tmp directory to store downloaded items + tmp_dir = tempfile.mkdtemp(suffix='-tmp', prefix='cozyweboob-') + + # Download every requested document + downloaded_documents = {} + for doc_id in ids: + try: + downloaded_content = document.download_document(doc_id) + except DocumentNotFound: + downloaded_documents[doc_id] = None + continue + with tempfile.NamedTemporaryFile(mode="w+", + dir=tmp_dir, + delete=False) as tmp_file: + tmp_file.write(downloaded_content) + downloaded_documents[doc_id] = tmp_file.name + + # Return a dict associating requested IDs and downloaded filenames + return downloaded_documents + + def to_cozy(document, actions=None): """ Export a CapDocument object to a JSON-serializable dict, to pass it to Cozy @@ -176,13 +214,33 @@ def to_cozy(document, actions=None): base_url = document.browser.BASEURL # Handle fetch actions - if actions["fetch"] is True or "CapDocument" in actions["fetch"]: + if actions["fetch"] is False: + fetch_actions = [] + elif actions["fetch"] is True or "CapDocument" in actions["fetch"]: if actions["fetch"] is True: fetch_actions = actions["fetch"] else: fetch_actions = actions["fetch"]["CapDocument"] - subscriptions, documents, bills, detailed_bills, history_bills = fetch( - document, fetch_actions) + else: + fetch_actions = [] + # Force-fetch documents if download is set to True + if actions["download"] is True: + fetch_actions = fetch_actions + ["documents"] + # Fetch items + subscriptions, documents, bills, detailed_bills, history_bills = fetch( + document, fetch_actions) + + # Handle download actions + if actions["download"] is False: + downloaded_documents = None + elif actions["download"] is True or "CapDocument" in actions["download"]: + if actions["download"] is True: + download_ids = [doc.id for doc in documents] + else: + download_ids = actions["download"]["CapDocument"] + downloaded_documents = download(document, download_ids) + else: + downloaded_documents = None # Return a formatted dict with all the infos return { @@ -193,5 +251,6 @@ def to_cozy(document, actions=None): "bills": bills, "detailed_bills": detailed_bills, "documents": documents, - "history_bills": history_bills + "history_bills": history_bills, + "downloaded": downloaded_documents } diff --git a/server.py b/server.py index 8d5a985..889168d 100755 --- a/server.py +++ b/server.py @@ -4,13 +4,13 @@ HTTP server wrapper around weboob """ import logging import os +import tempfile -from bottle import post, request, response, route, run +from bottle import post, request, route, run, static_file from cozyweboob import main as cozyweboob from cozyweboob import WeboobProxy from cozyweboob.tools.env import is_in_debug_mode -from cozyweboob.tools.jsonwriter import pretty_json # Module specific logger logger = logging.getLogger(__name__) @@ -22,18 +22,22 @@ def fetch_view(): Fetch from weboob modules. """ params = request.forms.get("params") - response.content_type = "application/json" - return pretty_json(cozyweboob(params)) + return cozyweboob(params) -@post("/download") -def download_view(): +@post("/retrieve") +def retrieve_view(): """ - Download from weboob modules. + Retrieve a previously downloaded file from weboob modules. + + Note: Beware, this route is meant to be used in a controlled development + environment and can result in leakage of information from your temp + default directory. """ - params = request.forms.get("params") - response.content_type = "application/json" - # TODO return pretty_json(proxy.download(params)) + path = request.forms.get("path") + return static_file(path.replace(tempfile.gettempdir(), './'), + tempfile.gettempdir(), + download=True) @route("/list") @@ -42,8 +46,7 @@ def list_view(): List all available weboob modules and their configuration options. """ proxy = WeboobProxy() - response.content_type = "application/json" - return pretty_json(proxy.list_modules()) + return proxy.list_modules() def init():