diff --git a/doc/0.getting_started.md b/doc/0.getting_started.md index 5a6ef0c..1e365e8 100644 --- a/doc/0.getting_started.md +++ b/doc/0.getting_started.md @@ -135,11 +135,7 @@ List of configuration options: doc](http://bottlepy.org/docs/dev/deployment.html). * `backends` is a list of Woob backends to enable. It defaults to any available and supported Woob backend. -* `force_fetch_all` is a boolean indicating whether or not Flatisfy should - fetch all available flats or only theones added from the last fetch (relying - on last known housing date). By default, Flatisfy will only iterate on - housings until the last known housing date. -* `store_personal_data` is a boolean indicating whether or not Flatisfy should +* `store_personal_data` is a boolean indicated whether or not Flatisfy should fetch personal data from housing posts and store them in database. Such personal data include contact phone number for instance. By default, Flatisfy does not store such personal data. diff --git a/flatisfy/config.py b/flatisfy/config.py index 7182140..4c20e3f 100644 --- a/flatisfy/config.py +++ b/flatisfy/config.py @@ -55,9 +55,6 @@ DEFAULT_CONFIG = { # Time is in seconds } }, - # Whether to force fetching all available flats at each time or only fetch - # diff - "force_fetch_all": False, # Whether or not to store personal data from housing posts (phone number # etc) "store_personal_data": False, @@ -165,7 +162,6 @@ def validate_config(config, check_with_data): assert config["smtp_to"] is None or isinstance(config["smtp_to"], list) assert config["notification_lang"] is None or isinstance(config["notification_lang"], str) - assert isinstance(config["force_fetch_all"], bool) assert isinstance(config["store_personal_data"], bool) assert isinstance(config["max_distance_housing_station"], (int, float)) assert isinstance(config["duplicate_threshold"], int) diff --git a/flatisfy/fetch.py b/flatisfy/fetch.py index 7d1cc5a..4e79a83 100644 --- a/flatisfy/fetch.py +++ b/flatisfy/fetch.py @@ -5,9 +5,7 @@ This module contains all the code related to fetching and loading flats lists. from __future__ import absolute_import, print_function, unicode_literals from builtins import str -import arrow import collections -import datetime import itertools import json import logging @@ -17,7 +15,6 @@ from flatisfy import database from flatisfy import tools from flatisfy.constants import BACKENDS_BY_PRECEDENCE from flatisfy.models import flat as flat_model -from flatisfy.models import last_fetch as last_fetch_model LOGGER = logging.getLogger(__name__) @@ -164,11 +161,7 @@ class WoobProxy(object): return queries - def query( - self, query, - max_entries=None, store_personal_data=False, force_fetch_all=False, - last_fetch_by_backend=None - ): + def query(self, query, max_entries=None, store_personal_data=False): """ Fetch the housings posts matching a given Woob query. @@ -176,18 +169,12 @@ class WoobProxy(object): :param max_entries: Maximum number of entries to fetch. :param store_personal_data: Whether personal data should be fetched from housing posts (phone number etc). - :param force_fetch_all: Whether to force fetching all available flats - or only diff from last fetch (based on timestamps). - :param last_fetch_by_backend: A dict mapping all backends to last fetch - datetimes. :return: The matching housing posts, dumped as a list of JSON objects. """ - if last_fetch_by_backend is None: - last_fetch_by_backend = {} - housings = [] # List the useful backends for this specific query useful_backends = [x.backend for x in query.cities] + # TODO: Handle max_entries better try: for housing in itertools.islice( self.webnip.do( @@ -200,16 +187,6 @@ class WoobProxy(object): ), max_entries, ): - if not force_fetch_all: - # Check whether we should continue iterating or not - last_fetch_datetime = last_fetch_by_backend.get(housing.backend) - if last_fetch_datetime and housing.date and housing.date < last_fetch_datetime: - LOGGER.info( - 'Done iterating till last fetch (housing.date=%s, last_fetch=%s). Stopping iteration.', - housing.date, - last_fetch_datetime - ) - break if not store_personal_data: housing.phone = None housings.append(json.dumps(housing, cls=WoobEncoder)) @@ -263,66 +240,19 @@ def fetch_flats(config): """ fetched_flats = {} - # Get last fetch datetimes for all constraints / backends - get_session = database.init_db(config["database"], config["search_index"]) - with get_session() as session: - last_fetch = collections.defaultdict(dict) - for item in session.query(last_fetch_model.LastFetch).all(): - last_fetch[item.constraint_name][item.backend] = item.last_fetch - - # Do the actual fetching for constraint_name, constraint in config["constraints"].items(): LOGGER.info("Loading flats for constraint %s...", constraint_name) - with WoobProxy(config) as woob_proxy: queries = woob_proxy.build_queries(constraint) housing_posts = [] for query in queries: - housing_posts.extend( - woob_proxy.query( - query, - config["max_entries"], - config["store_personal_data"], - config["force_fetch_all"], - last_fetch[constraint_name] - ) - ) - - housing_posts = [json.loads(flat) for flat in housing_posts] - - # Update last_fetch - last_fetch_by_backends = collections.defaultdict(lambda: None) - for flat in housing_posts: - backend = flat['id'].split('@')[-1] - if ( - last_fetch_by_backends[backend] is None - or last_fetch_by_backends[backend] < flat['date'] - ): - last_fetch_by_backends[backend] = flat['date'] - for backend in last_fetch_by_backends: - last_fetch_in_db = session.query(last_fetch_model.LastFetch).where( - last_fetch_model.LastFetch.constraint_name == constraint_name, - last_fetch_model.LastFetch.backend == backend - ).first() - if last_fetch_in_db: - last_fetch_in_db.last_fetch = arrow.get( - last_fetch_by_backends[backend] - ).date() - else: - last_fetch_in_db = last_fetch_model.LastFetch( - constraint_name=constraint_name, - backend=backend, - last_fetch=arrow.get(last_fetch_by_backends[backend]).date() - ) - session.add(last_fetch_in_db) - session.commit() - + housing_posts.extend(woob_proxy.query(query, config["max_entries"], config["store_personal_data"])) housing_posts = housing_posts[: config["max_entries"]] LOGGER.info("Fetched %d flats.", len(housing_posts)) - constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in housing_posts] + constraint_flats_list = [json.loads(flat) for flat in housing_posts] + constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in constraint_flats_list] fetched_flats[constraint_name] = constraint_flats_list - return fetched_flats diff --git a/flatisfy/models/last_fetch.py b/flatisfy/models/last_fetch.py deleted file mode 100644 index 281fed9..0000000 --- a/flatisfy/models/last_fetch.py +++ /dev/null @@ -1,31 +0,0 @@ -# coding: utf-8 -""" -This modules defines an SQLAlchemy ORM model for a flat. -""" -# pylint: disable=locally-disabled,invalid-name,too-few-public-methods -from __future__ import absolute_import, print_function, unicode_literals - -import logging - -from sqlalchemy import ( - Column, - DateTime, - String, -) - -from flatisfy.database.base import BASE - - -LOGGER = logging.getLogger(__name__) - - -class LastFetch(BASE): - """ - SQLAlchemy ORM model to store last timestamp of fetch by backend. - """ - - __tablename__ = "last_fetch" - - backend = Column(String, primary_key=True) - last_fetch = Column(DateTime) - constraint_name = Column(String) diff --git a/flatisfy/web/js_src/store/getters.js b/flatisfy/web/js_src/store/getters.js index 0f3fea9..4d9b94e 100644 --- a/flatisfy/web/js_src/store/getters.js +++ b/flatisfy/web/js_src/store/getters.js @@ -9,80 +9,80 @@ export default { isLoading: (state) => state.loading > 0, inseeCodesFlatsBuckets: (state, getters) => (filter) => { - const buckets = {} + const buckets = {}; state.flats.forEach((flat) => { if (!filter || filter(flat)) { - const insee = flat.flatisfy_postal_code.insee_code + const insee = flat.flatisfy_postal_code.insee_code; if (!buckets[insee]) { buckets[insee] = { name: flat.flatisfy_postal_code.name, - flats: [] - } + flats: [], + }; } - buckets[insee].flats.push(flat) + buckets[insee].flats.push(flat); } - }) + }); - return buckets + return buckets; }, flatsMarkers: (state, getters) => (router, filter) => { - const markers = [] + const markers = []; state.flats.forEach((flat) => { if (filter && filter(flat)) { - const gps = findFlatGPS(flat) + const gps = findFlatGPS(flat); if (gps) { const previousMarker = markers.find( (marker) => marker.gps[0] === gps[0] && marker.gps[1] === gps[1] - ) + ); if (previousMarker) { // randomize position a bit // gps[0] += (Math.random() - 0.5) / 500 // gps[1] += (Math.random() - 0.5) / 500 } const href = router.resolve({ - name: 'details', - params: { id: flat.id } - }).href + name: "details", + params: { id: flat.id }, + }).href; const cost = flat.cost ? costFilter(flat.cost, flat.currency) - : '' + : ""; markers.push({ - title: '', + title: "", content: '' + flat.title + - '' + + "" + cost, gps: gps, - flatId: flat.id - }) + flatId: flat.id, + }); } } - }) + }); - return markers + return markers; }, allTimeToPlaces: (state) => { - const places = {} + const places = {}; Object.keys(state.timeToPlaces).forEach((constraint) => { - const constraintTimeToPlaces = state.timeToPlaces[constraint] + const constraintTimeToPlaces = state.timeToPlaces[constraint]; Object.keys(constraintTimeToPlaces).forEach((name) => { - places[name] = constraintTimeToPlaces[name] - }) - }) - return places + places[name] = constraintTimeToPlaces[name]; + }); + }); + return places; }, timeToPlaces: (state, getters) => (constraintName) => { - return state.timeToPlaces[constraintName] + return state.timeToPlaces[constraintName]; }, - metadata: (state) => state.metadata -} + metadata: (state) => state.metadata, +};