Browse Source

Revert "Reduce number of requests to housing websites"

This reverts commit 977e354646.
Phyks (Lucas Verney) 7 months ago
parent
commit
a7ee94653f

+ 1
- 5
doc/0.getting_started.md View File

@@ -135,11 +135,7 @@ List of configuration options:
135 135
   doc](http://bottlepy.org/docs/dev/deployment.html).
136 136
 * `backends` is a list of Woob backends to enable. It defaults to any
137 137
   available and supported Woob backend.
138
-* `force_fetch_all` is a boolean indicating whether or not Flatisfy should
139
-  fetch all available flats or only theones added from the last fetch (relying
140
-  on last known housing date). By default, Flatisfy will only iterate on
141
-  housings until the last known housing date.
142
-* `store_personal_data` is a boolean indicating whether or not Flatisfy should
138
+* `store_personal_data` is a boolean indicated whether or not Flatisfy should
143 139
   fetch personal data from housing posts and store them in database. Such
144 140
   personal data include contact phone number for instance. By default,
145 141
   Flatisfy does not store such personal data.

+ 0
- 4
flatisfy/config.py View File

@@ -55,9 +55,6 @@ DEFAULT_CONFIG = {
55 55
             # Time is in seconds
56 56
         }
57 57
     },
58
-    # Whether to force fetching all available flats at each time or only fetch
59
-    # diff
60
-    "force_fetch_all": False,
61 58
     # Whether or not to store personal data from housing posts (phone number
62 59
     # etc)
63 60
     "store_personal_data": False,
@@ -165,7 +162,6 @@ def validate_config(config, check_with_data):
165 162
         assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
166 163
         assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
167 164
 
168
-        assert isinstance(config["force_fetch_all"], bool)
169 165
         assert isinstance(config["store_personal_data"], bool)
170 166
         assert isinstance(config["max_distance_housing_station"], (int, float))
171 167
         assert isinstance(config["duplicate_threshold"], int)

+ 5
- 75
flatisfy/fetch.py View File

@@ -5,9 +5,7 @@ This module contains all the code related to fetching and loading flats lists.
5 5
 from __future__ import absolute_import, print_function, unicode_literals
6 6
 from builtins import str
7 7
 
8
-import arrow
9 8
 import collections
10
-import datetime
11 9
 import itertools
12 10
 import json
13 11
 import logging
@@ -17,7 +15,6 @@ from flatisfy import database
17 15
 from flatisfy import tools
18 16
 from flatisfy.constants import BACKENDS_BY_PRECEDENCE
19 17
 from flatisfy.models import flat as flat_model
20
-from flatisfy.models import last_fetch as last_fetch_model
21 18
 
22 19
 LOGGER = logging.getLogger(__name__)
23 20
 
@@ -164,11 +161,7 @@ class WoobProxy(object):
164 161
 
165 162
         return queries
166 163
 
167
-    def query(
168
-        self, query,
169
-        max_entries=None, store_personal_data=False, force_fetch_all=False,
170
-        last_fetch_by_backend=None
171
-    ):
164
+    def query(self, query, max_entries=None, store_personal_data=False):
172 165
         """
173 166
         Fetch the housings posts matching a given Woob query.
174 167
 
@@ -176,18 +169,12 @@ class WoobProxy(object):
176 169
         :param max_entries: Maximum number of entries to fetch.
177 170
         :param store_personal_data: Whether personal data should be fetched
178 171
             from housing posts (phone number etc).
179
-        :param force_fetch_all: Whether to force fetching all available flats
180
-            or only diff from last fetch (based on timestamps).
181
-        :param last_fetch_by_backend: A dict mapping all backends to last fetch
182
-            datetimes.
183 172
         :return: The matching housing posts, dumped as a list of JSON objects.
184 173
         """
185
-        if last_fetch_by_backend is None:
186
-            last_fetch_by_backend = {}
187
-
188 174
         housings = []
189 175
         # List the useful backends for this specific query
190 176
         useful_backends = [x.backend for x in query.cities]
177
+        # TODO: Handle max_entries better
191 178
         try:
192 179
             for housing in itertools.islice(
193 180
                 self.webnip.do(
@@ -200,16 +187,6 @@ class WoobProxy(object):
200 187
                 ),
201 188
                 max_entries,
202 189
             ):
203
-                if not force_fetch_all:
204
-                    # Check whether we should continue iterating or not
205
-                    last_fetch_datetime = last_fetch_by_backend.get(housing.backend)
206
-                    if last_fetch_datetime and housing.date and housing.date < last_fetch_datetime:
207
-                        LOGGER.info(
208
-                            'Done iterating till last fetch (housing.date=%s, last_fetch=%s). Stopping iteration.',
209
-                            housing.date,
210
-                            last_fetch_datetime
211
-                        )
212
-                        break
213 190
                 if not store_personal_data:
214 191
                     housing.phone = None
215 192
                 housings.append(json.dumps(housing, cls=WoobEncoder))
@@ -263,66 +240,19 @@ def fetch_flats(config):
263 240
     """
264 241
     fetched_flats = {}
265 242
 
266
-    # Get last fetch datetimes for all constraints / backends
267
-    get_session = database.init_db(config["database"], config["search_index"])
268
-    with get_session() as session:
269
-        last_fetch = collections.defaultdict(dict)
270
-        for item in session.query(last_fetch_model.LastFetch).all():
271
-            last_fetch[item.constraint_name][item.backend] = item.last_fetch
272
-
273
-    # Do the actual fetching
274 243
     for constraint_name, constraint in config["constraints"].items():
275 244
         LOGGER.info("Loading flats for constraint %s...", constraint_name)
276
-
277 245
         with WoobProxy(config) as woob_proxy:
278 246
             queries = woob_proxy.build_queries(constraint)
279 247
             housing_posts = []
280 248
             for query in queries:
281
-                housing_posts.extend(
282
-                    woob_proxy.query(
283
-                        query,
284
-                        config["max_entries"],
285
-                        config["store_personal_data"],
286
-                        config["force_fetch_all"],
287
-                        last_fetch[constraint_name]
288
-                    )
289
-                )
290
-
291
-        housing_posts = [json.loads(flat) for flat in housing_posts]
292
-
293
-        # Update last_fetch
294
-        last_fetch_by_backends = collections.defaultdict(lambda: None)
295
-        for flat in housing_posts:
296
-            backend = flat['id'].split('@')[-1]
297
-            if (
298
-                last_fetch_by_backends[backend] is None
299
-                or last_fetch_by_backends[backend] < flat['date']
300
-            ):
301
-                last_fetch_by_backends[backend] = flat['date']
302
-        for backend in last_fetch_by_backends:
303
-            last_fetch_in_db = session.query(last_fetch_model.LastFetch).where(
304
-                last_fetch_model.LastFetch.constraint_name == constraint_name,
305
-                last_fetch_model.LastFetch.backend == backend
306
-            ).first()
307
-            if last_fetch_in_db:
308
-                last_fetch_in_db.last_fetch = arrow.get(
309
-                    last_fetch_by_backends[backend]
310
-                ).date()
311
-            else:
312
-                last_fetch_in_db = last_fetch_model.LastFetch(
313
-                    constraint_name=constraint_name,
314
-                    backend=backend,
315
-                    last_fetch=arrow.get(last_fetch_by_backends[backend]).date()
316
-                )
317
-            session.add(last_fetch_in_db)
318
-            session.commit()
319
-
249
+                housing_posts.extend(woob_proxy.query(query, config["max_entries"], config["store_personal_data"]))
320 250
         housing_posts = housing_posts[: config["max_entries"]]
321 251
         LOGGER.info("Fetched %d flats.", len(housing_posts))
322 252
 
323
-        constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in housing_posts]
253
+        constraint_flats_list = [json.loads(flat) for flat in housing_posts]
254
+        constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in constraint_flats_list]
324 255
         fetched_flats[constraint_name] = constraint_flats_list
325
-
326 256
     return fetched_flats
327 257
 
328 258
 

+ 0
- 31
flatisfy/models/last_fetch.py View File

@@ -1,31 +0,0 @@
1
-# coding: utf-8
2
-"""
3
-This modules defines an SQLAlchemy ORM model for a flat.
4
-"""
5
-# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
6
-from __future__ import absolute_import, print_function, unicode_literals
7
-
8
-import logging
9
-
10
-from sqlalchemy import (
11
-    Column,
12
-    DateTime,
13
-    String,
14
-)
15
-
16
-from flatisfy.database.base import BASE
17
-
18
-
19
-LOGGER = logging.getLogger(__name__)
20
-
21
-
22
-class LastFetch(BASE):
23
-    """
24
-    SQLAlchemy ORM model to store last timestamp of fetch by backend.
25
-    """
26
-
27
-    __tablename__ = "last_fetch"
28
-
29
-    backend = Column(String, primary_key=True)
30
-    last_fetch = Column(DateTime)
31
-    constraint_name = Column(String)

+ 29
- 29
flatisfy/web/js_src/store/getters.js View File

@@ -9,80 +9,80 @@ export default {
9 9
     isLoading: (state) => state.loading > 0,
10 10
 
11 11
     inseeCodesFlatsBuckets: (state, getters) => (filter) => {
12
-        const buckets = {}
12
+        const buckets = {};
13 13
 
14 14
         state.flats.forEach((flat) => {
15 15
             if (!filter || filter(flat)) {
16
-                const insee = flat.flatisfy_postal_code.insee_code
16
+                const insee = flat.flatisfy_postal_code.insee_code;
17 17
                 if (!buckets[insee]) {
18 18
                     buckets[insee] = {
19 19
                         name: flat.flatisfy_postal_code.name,
20
-                        flats: []
21
-                    }
20
+                        flats: [],
21
+                    };
22 22
                 }
23
-                buckets[insee].flats.push(flat)
23
+                buckets[insee].flats.push(flat);
24 24
             }
25
-        })
25
+        });
26 26
 
27
-        return buckets
27
+        return buckets;
28 28
     },
29 29
 
30 30
     flatsMarkers: (state, getters) => (router, filter) => {
31
-        const markers = []
31
+        const markers = [];
32 32
         state.flats.forEach((flat) => {
33 33
             if (filter && filter(flat)) {
34
-                const gps = findFlatGPS(flat)
34
+                const gps = findFlatGPS(flat);
35 35
 
36 36
                 if (gps) {
37 37
                     const previousMarker = markers.find(
38 38
                         (marker) =>
39 39
                             marker.gps[0] === gps[0] && marker.gps[1] === gps[1]
40
-                    )
40
+                    );
41 41
                     if (previousMarker) {
42 42
                         // randomize position a bit
43 43
                         // gps[0] += (Math.random() - 0.5) / 500
44 44
                         // gps[1] += (Math.random() - 0.5) / 500
45 45
                     }
46 46
                     const href = router.resolve({
47
-                        name: 'details',
48
-                        params: { id: flat.id }
49
-                    }).href
47
+                        name: "details",
48
+                        params: { id: flat.id },
49
+                    }).href;
50 50
                     const cost = flat.cost
51 51
                         ? costFilter(flat.cost, flat.currency)
52
-                        : ''
52
+                        : "";
53 53
                     markers.push({
54
-                        title: '',
54
+                        title: "",
55 55
                         content:
56 56
                             '<a href="' +
57 57
                             href +
58 58
                             '">' +
59 59
                             flat.title +
60
-                            '</a>' +
60
+                            "</a>" +
61 61
                             cost,
62 62
                         gps: gps,
63
-                        flatId: flat.id
64
-                    })
63
+                        flatId: flat.id,
64
+                    });
65 65
                 }
66 66
             }
67
-        })
67
+        });
68 68
 
69
-        return markers
69
+        return markers;
70 70
     },
71 71
 
72 72
     allTimeToPlaces: (state) => {
73
-        const places = {}
73
+        const places = {};
74 74
         Object.keys(state.timeToPlaces).forEach((constraint) => {
75
-            const constraintTimeToPlaces = state.timeToPlaces[constraint]
75
+            const constraintTimeToPlaces = state.timeToPlaces[constraint];
76 76
             Object.keys(constraintTimeToPlaces).forEach((name) => {
77
-                places[name] = constraintTimeToPlaces[name]
78
-            })
79
-        })
80
-        return places
77
+                places[name] = constraintTimeToPlaces[name];
78
+            });
79
+        });
80
+        return places;
81 81
     },
82 82
 
83 83
     timeToPlaces: (state, getters) => (constraintName) => {
84
-        return state.timeToPlaces[constraintName]
84
+        return state.timeToPlaces[constraintName];
85 85
     },
86 86
 
87
-    metadata: (state) => state.metadata
88
-}
87
+    metadata: (state) => state.metadata,
88
+};