diff --git a/flatisfy/cmds.py b/flatisfy/cmds.py index 4f2b21e..69a51cd 100644 --- a/flatisfy/cmds.py +++ b/flatisfy/cmds.py @@ -88,7 +88,7 @@ def import_and_filter(config, load_from_db=False): flats_list_by_status = filter_flats(config, flats_list=flats_list, fetch_details=True) # Create database connection - get_session = database.init_db(config["database"]) + get_session = database.init_db(config["database"], config["search_index"]) LOGGER.info("Merging fetched flats in database...") with get_session() as session: @@ -130,12 +130,15 @@ def purge_db(config): :param config: A config dict. :return: ``None`` """ - get_session = database.init_db(config["database"]) + get_session = database.init_db(config["database"], config["search_index"]) with get_session() as session: # Delete every flat in the db LOGGER.info("Purge all flats from the database.") - session.query(flat_model.Flat).delete(synchronize_session=False) + for flat in session.query(flat_model.Flat).all(): + # Use (slower) deletion by object, to ensure whoosh index is + # updated + session.delete(flat) def serve(config): diff --git a/flatisfy/config.py b/flatisfy/config.py index 0cc4514..ff4b8bb 100644 --- a/flatisfy/config.py +++ b/flatisfy/config.py @@ -49,6 +49,9 @@ DEFAULT_CONFIG = { "modules_path": None, # SQLAlchemy URI to the database to use "database": None, + # Path to the Whoosh search index file. Use ``None`` to put it in + # ``data_directory``. + "search_index": None, # Web app port "port": 8080, # Web app host to listen on @@ -56,7 +59,7 @@ DEFAULT_CONFIG = { # Web server to use to serve the webapp (see Bottle deployment doc) "webserver": None, # List of Weboob backends to use (default to any backend available) - "backends": None + "backends": None, } LOGGER = logging.getLogger(__name__) @@ -130,6 +133,7 @@ def validate_config(config): assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501 assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 + assert isinstance(config["search_index"], str) assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501 assert config["database"] is None or isinstance(config["database"], str) # noqa: E501 @@ -207,6 +211,12 @@ def load_config(args=None): "flatisfy.db" ) + if config_data["search_index"] is None: + config_data["search_index"] = os.path.join( + config_data["data_directory"], + "search_index" + ) + config_validation = validate_config(config_data) if config_validation is True: LOGGER.info("Config has been fully initialized.") diff --git a/flatisfy/database/__init__.py b/flatisfy/database/__init__.py index b23e1c7..4092758 100644 --- a/flatisfy/database/__init__.py +++ b/flatisfy/database/__init__.py @@ -11,9 +11,11 @@ from contextlib import contextmanager from sqlalchemy import event, create_engine from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker +from sqlalchemy.exc import SQLAlchemyError import flatisfy.models.flat # noqa: F401 from flatisfy.database.base import BASE +from flatisfy.database.whooshalchemy import IndexService @event.listens_for(Engine, "connect") @@ -28,12 +30,13 @@ def set_sqlite_pragma(dbapi_connection, _): cursor.close() -def init_db(database_uri=None): +def init_db(database_uri=None, search_db_uri=None): """ Initialize the database, ensuring tables exist etc. :param database_uri: An URI describing an engine to use. Defaults to in-memory SQLite database. + :param search_db_uri: Path to the Whoosh index file to use. :return: A tuple of an SQLAlchemy session maker and the created engine. """ if database_uri is None: @@ -54,10 +57,16 @@ def init_db(database_uri=None): """ # pylint: enable=line-too-long,locally-disabled session = Session() + if search_db_uri: + index_service = IndexService( + whoosh_base=search_db_uri, + session=session + ) + index_service.register_class(flatisfy.models.flat.Flat) try: yield session session.commit() - except: + except SQLAlchemyError: session.rollback() raise finally: diff --git a/flatisfy/database/whooshalchemy.py b/flatisfy/database/whooshalchemy.py new file mode 100644 index 0000000..223dd8a --- /dev/null +++ b/flatisfy/database/whooshalchemy.py @@ -0,0 +1,178 @@ +""" +This file comes from https://github.com/sfermigier/WhooshAlchemy. + +WhooshAlchemy +~~~~~~~~~~~~~ + +Adds Whoosh indexing capabilities to SQLAlchemy models. + +Based on Flask-whooshalchemy by Karl Gyllstrom (Flask is still supported, but not mandatory). + +:copyright: (c) 2012 by Stefane Fermigier +:copyright: (c) 2012 by Karl Gyllstrom +:license: BSD (see LICENSE.txt) +""" + +from __future__ import absolute_import, print_function, unicode_literals + +import os + +from six import text_type + +import sqlalchemy +import whoosh.index +from sqlalchemy import event +from sqlalchemy.orm.session import Session +from whoosh.analysis import StemmingAnalyzer +from whoosh.fields import Schema +from whoosh.qparser import MultifieldParser + + +class IndexService(object): + + def __init__(self, config=None, session=None, whoosh_base=None): + self.session = session + if not whoosh_base and config: + whoosh_base = config.get("WHOOSH_BASE") + if not whoosh_base: + whoosh_base = "whoosh_indexes" # Default value + self.whoosh_base = whoosh_base + self.indexes = {} + + event.listen(Session, "before_commit", self.before_commit) + event.listen(Session, "after_commit", self.after_commit) + + def register_class(self, model_class): + """ + Registers a model class, by creating the necessary Whoosh index if needed. + """ + + index_path = os.path.join(self.whoosh_base, model_class.__name__) + + schema, primary = self._get_whoosh_schema_and_primary(model_class) + + if whoosh.index.exists_in(index_path): + index = whoosh.index.open_dir(index_path) + else: + if not os.path.exists(index_path): + os.makedirs(index_path) + index = whoosh.index.create_in(index_path, schema) + + self.indexes[model_class.__name__] = index + model_class.search_query = Searcher(model_class, primary, index, + self.session) + return index + + def index_for_model_class(self, model_class): + """ + Gets the whoosh index for this model, creating one if it does not exist. + in creating one, a schema is created based on the fields of the model. + Currently we only support primary key -> whoosh.ID, and sqlalchemy.TEXT + -> whoosh.TEXT, but can add more later. A dict of model -> whoosh index + is added to the ``app`` variable. + """ + index = self.indexes.get(model_class.__name__) + if index is None: + index = self.register_class(model_class) + return index + + def _get_whoosh_schema_and_primary(self, model_class): + schema = {} + primary = None + for field in model_class.__table__.columns: + if field.primary_key: + schema[field.name] = whoosh.fields.ID(stored=True, unique=True) + primary = field.name + continue + if field.name in model_class.__searchable__: + schema[field.name] = whoosh.fields.TEXT( + analyzer=StemmingAnalyzer()) + return Schema(**schema), primary + + def before_commit(self, session): + self.to_update = {} + + for model in session.new: + model_class = model.__class__ + if hasattr(model_class, '__searchable__'): + self.to_update.setdefault(model_class.__name__, []).append( + ("new", model)) + + for model in session.deleted: + model_class = model.__class__ + if hasattr(model_class, '__searchable__'): + self.to_update.setdefault(model_class.__name__, []).append( + ("deleted", model)) + + for model in session.dirty: + model_class = model.__class__ + if hasattr(model_class, '__searchable__'): + self.to_update.setdefault(model_class.__name__, []).append( + ("changed", model)) + + def after_commit(self, session): + """ + Any db updates go through here. We check if any of these models have + ``__searchable__`` fields, indicating they need to be indexed. With these + we update the whoosh index for the model. If no index exists, it will be + created here; this could impose a penalty on the initial commit of a model. + """ + + for typ, values in self.to_update.items(): + model_class = values[0][1].__class__ + index = self.index_for_model_class(model_class) + with index.writer() as writer: + primary_field = model_class.search_query.primary + searchable = model_class.__searchable__ + + for change_type, model in values: + # delete everything. stuff that's updated or inserted will get + # added as a new doc. Could probably replace this with a whoosh + # update. + + writer.delete_by_term( + primary_field, text_type(getattr(model, primary_field))) + + if change_type in ("new", "changed"): + attrs = dict((key, getattr(model, key)) + for key in searchable) + attrs = { + attr: text_type(getattr(model, attr)) + for attr in attrs.keys() + } + attrs[primary_field] = text_type(getattr(model, primary_field)) + writer.add_document(**attrs) + + self.to_update = {} + + +class Searcher(object): + """ + Assigned to a Model class as ``search_query``, which enables text-querying. + """ + + def __init__(self, model_class, primary, index, session=None): + self.model_class = model_class + self.primary = primary + self.index = index + self.session = session + self.searcher = index.searcher() + fields = set(index.schema._fields.keys()) - set([self.primary]) + self.parser = MultifieldParser(list(fields), index.schema) + + def __call__(self, query, limit=None): + session = self.session + # When using Flask, get the session from the query attached to the model class. + if not session: + session = self.model_class.query.session + + results = self.index.searcher().search( + self.parser.parse(query), limit=limit) + + keys = [x[self.primary] for x in results] + primary_column = getattr(self.model_class, self.primary) + + db_query = session.query(self.model_class) + if keys: + return db_query.filter(primary_column.in_(keys)) + return db_query.filter(sqlalchemy.sql.false()) diff --git a/flatisfy/fetch.py b/flatisfy/fetch.py index 4012928..02bf8b5 100644 --- a/flatisfy/fetch.py +++ b/flatisfy/fetch.py @@ -291,7 +291,7 @@ def load_flats_list_from_db(config): :return: A list of all the flats in the database. """ flats_list = [] - get_session = database.init_db(config["database"]) + get_session = database.init_db(config["database"], config["search_index"]) with get_session() as session: # TODO: Better serialization diff --git a/flatisfy/models/flat.py b/flatisfy/models/flat.py index 7275467..aee1f31 100644 --- a/flatisfy/models/flat.py +++ b/flatisfy/models/flat.py @@ -56,6 +56,7 @@ class Flat(BASE): SQLAlchemy ORM model to store a flat. """ __tablename__ = "flats" + __searchable__ = ["title", "text", "station", "location", "details"] # Weboob data id = Column(String, primary_key=True) diff --git a/flatisfy/web/app.py b/flatisfy/web/app.py index a14022f..c79f6ff 100644 --- a/flatisfy/web/app.py +++ b/flatisfy/web/app.py @@ -49,7 +49,7 @@ def get_app(config): :return: The built bottle app. """ - get_session = database.init_db(config["database"]) + get_session = database.init_db(config["database"], config["search_index"]) app = bottle.default_app() app.install(DatabasePlugin(get_session)) @@ -79,6 +79,8 @@ def get_app(config): app.route("/api/v1/flat/:flat_id/status", "POST", api_routes.update_flat_status_v1) + app.route("/api/v1/search", "POST", api_routes.search_v1) + # Index app.route("/", "GET", lambda: _serve_static_file("index.html")) diff --git a/flatisfy/web/js_src/api/index.js b/flatisfy/web/js_src/api/index.js index d40097f..770a013 100644 --- a/flatisfy/web/js_src/api/index.js +++ b/flatisfy/web/js_src/api/index.js @@ -65,7 +65,10 @@ export const updateFlatStatus = function (flatId, newStatus, callback) { status: newStatus }) } - ).then(callback) + ).then(callback).catch(function (ex) { + console.error('Unable to update flat status: ' + ex) + }) + } export const getTimeToPlaces = function (callback) { @@ -78,3 +81,25 @@ export const getTimeToPlaces = function (callback) { console.error('Unable to fetch time to places: ' + ex) }) } + + +export const doSearch = function (query, callback) { + fetch( + '/api/v1/search', + { + credentials: 'same-origin', + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + query: query + }) + } + ).then(response => response.json()).then(json => { + callback(json.data) + }).catch(function (ex) { + console.error('Unable to perform search: ' + ex) + }) + +} diff --git a/flatisfy/web/js_src/components/app.vue b/flatisfy/web/js_src/components/app.vue index 4125db6..801fd3c 100644 --- a/flatisfy/web/js_src/components/app.vue +++ b/flatisfy/web/js_src/components/app.vue @@ -6,6 +6,7 @@
  • {{ $t("menu.available_flats") }}
  • {{ $t("menu.followed_flats") }}
  • {{ $t("menu.by_status") }}
  • +
  • {{ $t("menu.search") }}
  • diff --git a/flatisfy/web/js_src/components/flatstable.vue b/flatisfy/web/js_src/components/flatstable.vue index 242f383..20e2556 100644 --- a/flatisfy/web/js_src/components/flatstable.vue +++ b/flatisfy/web/js_src/components/flatstable.vue @@ -36,6 +36,10 @@ + + [{{ flat.id.split("@")[1] }}] {{ flat.title }}