Revert "Reduce number of requests to housing websites"

This reverts commit 977e354646.
This commit is contained in:
Lucas Verney 2021-04-28 19:54:27 +02:00
parent ebd031c047
commit a7ee94653f
5 changed files with 35 additions and 144 deletions

View File

@ -135,11 +135,7 @@ List of configuration options:
doc](http://bottlepy.org/docs/dev/deployment.html).
* `backends` is a list of Woob backends to enable. It defaults to any
available and supported Woob backend.
* `force_fetch_all` is a boolean indicating whether or not Flatisfy should
fetch all available flats or only theones added from the last fetch (relying
on last known housing date). By default, Flatisfy will only iterate on
housings until the last known housing date.
* `store_personal_data` is a boolean indicating whether or not Flatisfy should
* `store_personal_data` is a boolean indicated whether or not Flatisfy should
fetch personal data from housing posts and store them in database. Such
personal data include contact phone number for instance. By default,
Flatisfy does not store such personal data.

View File

@ -55,9 +55,6 @@ DEFAULT_CONFIG = {
# Time is in seconds
}
},
# Whether to force fetching all available flats at each time or only fetch
# diff
"force_fetch_all": False,
# Whether or not to store personal data from housing posts (phone number
# etc)
"store_personal_data": False,
@ -165,7 +162,6 @@ def validate_config(config, check_with_data):
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
assert isinstance(config["force_fetch_all"], bool)
assert isinstance(config["store_personal_data"], bool)
assert isinstance(config["max_distance_housing_station"], (int, float))
assert isinstance(config["duplicate_threshold"], int)

View File

@ -5,9 +5,7 @@ This module contains all the code related to fetching and loading flats lists.
from __future__ import absolute_import, print_function, unicode_literals
from builtins import str
import arrow
import collections
import datetime
import itertools
import json
import logging
@ -17,7 +15,6 @@ from flatisfy import database
from flatisfy import tools
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
from flatisfy.models import flat as flat_model
from flatisfy.models import last_fetch as last_fetch_model
LOGGER = logging.getLogger(__name__)
@ -164,11 +161,7 @@ class WoobProxy(object):
return queries
def query(
self, query,
max_entries=None, store_personal_data=False, force_fetch_all=False,
last_fetch_by_backend=None
):
def query(self, query, max_entries=None, store_personal_data=False):
"""
Fetch the housings posts matching a given Woob query.
@ -176,18 +169,12 @@ class WoobProxy(object):
:param max_entries: Maximum number of entries to fetch.
:param store_personal_data: Whether personal data should be fetched
from housing posts (phone number etc).
:param force_fetch_all: Whether to force fetching all available flats
or only diff from last fetch (based on timestamps).
:param last_fetch_by_backend: A dict mapping all backends to last fetch
datetimes.
:return: The matching housing posts, dumped as a list of JSON objects.
"""
if last_fetch_by_backend is None:
last_fetch_by_backend = {}
housings = []
# List the useful backends for this specific query
useful_backends = [x.backend for x in query.cities]
# TODO: Handle max_entries better
try:
for housing in itertools.islice(
self.webnip.do(
@ -200,16 +187,6 @@ class WoobProxy(object):
),
max_entries,
):
if not force_fetch_all:
# Check whether we should continue iterating or not
last_fetch_datetime = last_fetch_by_backend.get(housing.backend)
if last_fetch_datetime and housing.date and housing.date < last_fetch_datetime:
LOGGER.info(
'Done iterating till last fetch (housing.date=%s, last_fetch=%s). Stopping iteration.',
housing.date,
last_fetch_datetime
)
break
if not store_personal_data:
housing.phone = None
housings.append(json.dumps(housing, cls=WoobEncoder))
@ -263,66 +240,19 @@ def fetch_flats(config):
"""
fetched_flats = {}
# Get last fetch datetimes for all constraints / backends
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
last_fetch = collections.defaultdict(dict)
for item in session.query(last_fetch_model.LastFetch).all():
last_fetch[item.constraint_name][item.backend] = item.last_fetch
# Do the actual fetching
for constraint_name, constraint in config["constraints"].items():
LOGGER.info("Loading flats for constraint %s...", constraint_name)
with WoobProxy(config) as woob_proxy:
queries = woob_proxy.build_queries(constraint)
housing_posts = []
for query in queries:
housing_posts.extend(
woob_proxy.query(
query,
config["max_entries"],
config["store_personal_data"],
config["force_fetch_all"],
last_fetch[constraint_name]
)
)
housing_posts = [json.loads(flat) for flat in housing_posts]
# Update last_fetch
last_fetch_by_backends = collections.defaultdict(lambda: None)
for flat in housing_posts:
backend = flat['id'].split('@')[-1]
if (
last_fetch_by_backends[backend] is None
or last_fetch_by_backends[backend] < flat['date']
):
last_fetch_by_backends[backend] = flat['date']
for backend in last_fetch_by_backends:
last_fetch_in_db = session.query(last_fetch_model.LastFetch).where(
last_fetch_model.LastFetch.constraint_name == constraint_name,
last_fetch_model.LastFetch.backend == backend
).first()
if last_fetch_in_db:
last_fetch_in_db.last_fetch = arrow.get(
last_fetch_by_backends[backend]
).date()
else:
last_fetch_in_db = last_fetch_model.LastFetch(
constraint_name=constraint_name,
backend=backend,
last_fetch=arrow.get(last_fetch_by_backends[backend]).date()
)
session.add(last_fetch_in_db)
session.commit()
housing_posts.extend(woob_proxy.query(query, config["max_entries"], config["store_personal_data"]))
housing_posts = housing_posts[: config["max_entries"]]
LOGGER.info("Fetched %d flats.", len(housing_posts))
constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in housing_posts]
constraint_flats_list = [json.loads(flat) for flat in housing_posts]
constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in constraint_flats_list]
fetched_flats[constraint_name] = constraint_flats_list
return fetched_flats

View File

@ -1,31 +0,0 @@
# coding: utf-8
"""
This modules defines an SQLAlchemy ORM model for a flat.
"""
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
from __future__ import absolute_import, print_function, unicode_literals
import logging
from sqlalchemy import (
Column,
DateTime,
String,
)
from flatisfy.database.base import BASE
LOGGER = logging.getLogger(__name__)
class LastFetch(BASE):
"""
SQLAlchemy ORM model to store last timestamp of fetch by backend.
"""
__tablename__ = "last_fetch"
backend = Column(String, primary_key=True)
last_fetch = Column(DateTime)
constraint_name = Column(String)

View File

@ -9,80 +9,80 @@ export default {
isLoading: (state) => state.loading > 0,
inseeCodesFlatsBuckets: (state, getters) => (filter) => {
const buckets = {}
const buckets = {};
state.flats.forEach((flat) => {
if (!filter || filter(flat)) {
const insee = flat.flatisfy_postal_code.insee_code
const insee = flat.flatisfy_postal_code.insee_code;
if (!buckets[insee]) {
buckets[insee] = {
name: flat.flatisfy_postal_code.name,
flats: []
flats: [],
};
}
buckets[insee].flats.push(flat);
}
buckets[insee].flats.push(flat)
}
})
});
return buckets
return buckets;
},
flatsMarkers: (state, getters) => (router, filter) => {
const markers = []
const markers = [];
state.flats.forEach((flat) => {
if (filter && filter(flat)) {
const gps = findFlatGPS(flat)
const gps = findFlatGPS(flat);
if (gps) {
const previousMarker = markers.find(
(marker) =>
marker.gps[0] === gps[0] && marker.gps[1] === gps[1]
)
);
if (previousMarker) {
// randomize position a bit
// gps[0] += (Math.random() - 0.5) / 500
// gps[1] += (Math.random() - 0.5) / 500
}
const href = router.resolve({
name: 'details',
params: { id: flat.id }
}).href
name: "details",
params: { id: flat.id },
}).href;
const cost = flat.cost
? costFilter(flat.cost, flat.currency)
: ''
: "";
markers.push({
title: '',
title: "",
content:
'<a href="' +
href +
'">' +
flat.title +
'</a>' +
"</a>" +
cost,
gps: gps,
flatId: flat.id
})
flatId: flat.id,
});
}
}
})
});
return markers
return markers;
},
allTimeToPlaces: (state) => {
const places = {}
const places = {};
Object.keys(state.timeToPlaces).forEach((constraint) => {
const constraintTimeToPlaces = state.timeToPlaces[constraint]
const constraintTimeToPlaces = state.timeToPlaces[constraint];
Object.keys(constraintTimeToPlaces).forEach((name) => {
places[name] = constraintTimeToPlaces[name]
})
})
return places
places[name] = constraintTimeToPlaces[name];
});
});
return places;
},
timeToPlaces: (state, getters) => (constraintName) => {
return state.timeToPlaces[constraintName]
return state.timeToPlaces[constraintName];
},
metadata: (state) => state.metadata
}
metadata: (state) => state.metadata,
};