Revert "Reduce number of requests to housing websites"
This reverts commit 977e354646
.
This commit is contained in:
parent
ebd031c047
commit
a7ee94653f
@ -135,11 +135,7 @@ List of configuration options:
|
|||||||
doc](http://bottlepy.org/docs/dev/deployment.html).
|
doc](http://bottlepy.org/docs/dev/deployment.html).
|
||||||
* `backends` is a list of Woob backends to enable. It defaults to any
|
* `backends` is a list of Woob backends to enable. It defaults to any
|
||||||
available and supported Woob backend.
|
available and supported Woob backend.
|
||||||
* `force_fetch_all` is a boolean indicating whether or not Flatisfy should
|
* `store_personal_data` is a boolean indicated whether or not Flatisfy should
|
||||||
fetch all available flats or only theones added from the last fetch (relying
|
|
||||||
on last known housing date). By default, Flatisfy will only iterate on
|
|
||||||
housings until the last known housing date.
|
|
||||||
* `store_personal_data` is a boolean indicating whether or not Flatisfy should
|
|
||||||
fetch personal data from housing posts and store them in database. Such
|
fetch personal data from housing posts and store them in database. Such
|
||||||
personal data include contact phone number for instance. By default,
|
personal data include contact phone number for instance. By default,
|
||||||
Flatisfy does not store such personal data.
|
Flatisfy does not store such personal data.
|
||||||
|
@ -55,9 +55,6 @@ DEFAULT_CONFIG = {
|
|||||||
# Time is in seconds
|
# Time is in seconds
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Whether to force fetching all available flats at each time or only fetch
|
|
||||||
# diff
|
|
||||||
"force_fetch_all": False,
|
|
||||||
# Whether or not to store personal data from housing posts (phone number
|
# Whether or not to store personal data from housing posts (phone number
|
||||||
# etc)
|
# etc)
|
||||||
"store_personal_data": False,
|
"store_personal_data": False,
|
||||||
@ -165,7 +162,6 @@ def validate_config(config, check_with_data):
|
|||||||
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
|
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
|
||||||
assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
|
assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
|
||||||
|
|
||||||
assert isinstance(config["force_fetch_all"], bool)
|
|
||||||
assert isinstance(config["store_personal_data"], bool)
|
assert isinstance(config["store_personal_data"], bool)
|
||||||
assert isinstance(config["max_distance_housing_station"], (int, float))
|
assert isinstance(config["max_distance_housing_station"], (int, float))
|
||||||
assert isinstance(config["duplicate_threshold"], int)
|
assert isinstance(config["duplicate_threshold"], int)
|
||||||
|
@ -5,9 +5,7 @@ This module contains all the code related to fetching and loading flats lists.
|
|||||||
from __future__ import absolute_import, print_function, unicode_literals
|
from __future__ import absolute_import, print_function, unicode_literals
|
||||||
from builtins import str
|
from builtins import str
|
||||||
|
|
||||||
import arrow
|
|
||||||
import collections
|
import collections
|
||||||
import datetime
|
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@ -17,7 +15,6 @@ from flatisfy import database
|
|||||||
from flatisfy import tools
|
from flatisfy import tools
|
||||||
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
|
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
|
||||||
from flatisfy.models import flat as flat_model
|
from flatisfy.models import flat as flat_model
|
||||||
from flatisfy.models import last_fetch as last_fetch_model
|
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -164,11 +161,7 @@ class WoobProxy(object):
|
|||||||
|
|
||||||
return queries
|
return queries
|
||||||
|
|
||||||
def query(
|
def query(self, query, max_entries=None, store_personal_data=False):
|
||||||
self, query,
|
|
||||||
max_entries=None, store_personal_data=False, force_fetch_all=False,
|
|
||||||
last_fetch_by_backend=None
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Fetch the housings posts matching a given Woob query.
|
Fetch the housings posts matching a given Woob query.
|
||||||
|
|
||||||
@ -176,18 +169,12 @@ class WoobProxy(object):
|
|||||||
:param max_entries: Maximum number of entries to fetch.
|
:param max_entries: Maximum number of entries to fetch.
|
||||||
:param store_personal_data: Whether personal data should be fetched
|
:param store_personal_data: Whether personal data should be fetched
|
||||||
from housing posts (phone number etc).
|
from housing posts (phone number etc).
|
||||||
:param force_fetch_all: Whether to force fetching all available flats
|
|
||||||
or only diff from last fetch (based on timestamps).
|
|
||||||
:param last_fetch_by_backend: A dict mapping all backends to last fetch
|
|
||||||
datetimes.
|
|
||||||
:return: The matching housing posts, dumped as a list of JSON objects.
|
:return: The matching housing posts, dumped as a list of JSON objects.
|
||||||
"""
|
"""
|
||||||
if last_fetch_by_backend is None:
|
|
||||||
last_fetch_by_backend = {}
|
|
||||||
|
|
||||||
housings = []
|
housings = []
|
||||||
# List the useful backends for this specific query
|
# List the useful backends for this specific query
|
||||||
useful_backends = [x.backend for x in query.cities]
|
useful_backends = [x.backend for x in query.cities]
|
||||||
|
# TODO: Handle max_entries better
|
||||||
try:
|
try:
|
||||||
for housing in itertools.islice(
|
for housing in itertools.islice(
|
||||||
self.webnip.do(
|
self.webnip.do(
|
||||||
@ -200,16 +187,6 @@ class WoobProxy(object):
|
|||||||
),
|
),
|
||||||
max_entries,
|
max_entries,
|
||||||
):
|
):
|
||||||
if not force_fetch_all:
|
|
||||||
# Check whether we should continue iterating or not
|
|
||||||
last_fetch_datetime = last_fetch_by_backend.get(housing.backend)
|
|
||||||
if last_fetch_datetime and housing.date and housing.date < last_fetch_datetime:
|
|
||||||
LOGGER.info(
|
|
||||||
'Done iterating till last fetch (housing.date=%s, last_fetch=%s). Stopping iteration.',
|
|
||||||
housing.date,
|
|
||||||
last_fetch_datetime
|
|
||||||
)
|
|
||||||
break
|
|
||||||
if not store_personal_data:
|
if not store_personal_data:
|
||||||
housing.phone = None
|
housing.phone = None
|
||||||
housings.append(json.dumps(housing, cls=WoobEncoder))
|
housings.append(json.dumps(housing, cls=WoobEncoder))
|
||||||
@ -263,66 +240,19 @@ def fetch_flats(config):
|
|||||||
"""
|
"""
|
||||||
fetched_flats = {}
|
fetched_flats = {}
|
||||||
|
|
||||||
# Get last fetch datetimes for all constraints / backends
|
|
||||||
get_session = database.init_db(config["database"], config["search_index"])
|
|
||||||
with get_session() as session:
|
|
||||||
last_fetch = collections.defaultdict(dict)
|
|
||||||
for item in session.query(last_fetch_model.LastFetch).all():
|
|
||||||
last_fetch[item.constraint_name][item.backend] = item.last_fetch
|
|
||||||
|
|
||||||
# Do the actual fetching
|
|
||||||
for constraint_name, constraint in config["constraints"].items():
|
for constraint_name, constraint in config["constraints"].items():
|
||||||
LOGGER.info("Loading flats for constraint %s...", constraint_name)
|
LOGGER.info("Loading flats for constraint %s...", constraint_name)
|
||||||
|
|
||||||
with WoobProxy(config) as woob_proxy:
|
with WoobProxy(config) as woob_proxy:
|
||||||
queries = woob_proxy.build_queries(constraint)
|
queries = woob_proxy.build_queries(constraint)
|
||||||
housing_posts = []
|
housing_posts = []
|
||||||
for query in queries:
|
for query in queries:
|
||||||
housing_posts.extend(
|
housing_posts.extend(woob_proxy.query(query, config["max_entries"], config["store_personal_data"]))
|
||||||
woob_proxy.query(
|
|
||||||
query,
|
|
||||||
config["max_entries"],
|
|
||||||
config["store_personal_data"],
|
|
||||||
config["force_fetch_all"],
|
|
||||||
last_fetch[constraint_name]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
housing_posts = [json.loads(flat) for flat in housing_posts]
|
|
||||||
|
|
||||||
# Update last_fetch
|
|
||||||
last_fetch_by_backends = collections.defaultdict(lambda: None)
|
|
||||||
for flat in housing_posts:
|
|
||||||
backend = flat['id'].split('@')[-1]
|
|
||||||
if (
|
|
||||||
last_fetch_by_backends[backend] is None
|
|
||||||
or last_fetch_by_backends[backend] < flat['date']
|
|
||||||
):
|
|
||||||
last_fetch_by_backends[backend] = flat['date']
|
|
||||||
for backend in last_fetch_by_backends:
|
|
||||||
last_fetch_in_db = session.query(last_fetch_model.LastFetch).where(
|
|
||||||
last_fetch_model.LastFetch.constraint_name == constraint_name,
|
|
||||||
last_fetch_model.LastFetch.backend == backend
|
|
||||||
).first()
|
|
||||||
if last_fetch_in_db:
|
|
||||||
last_fetch_in_db.last_fetch = arrow.get(
|
|
||||||
last_fetch_by_backends[backend]
|
|
||||||
).date()
|
|
||||||
else:
|
|
||||||
last_fetch_in_db = last_fetch_model.LastFetch(
|
|
||||||
constraint_name=constraint_name,
|
|
||||||
backend=backend,
|
|
||||||
last_fetch=arrow.get(last_fetch_by_backends[backend]).date()
|
|
||||||
)
|
|
||||||
session.add(last_fetch_in_db)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
housing_posts = housing_posts[: config["max_entries"]]
|
housing_posts = housing_posts[: config["max_entries"]]
|
||||||
LOGGER.info("Fetched %d flats.", len(housing_posts))
|
LOGGER.info("Fetched %d flats.", len(housing_posts))
|
||||||
|
|
||||||
constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in housing_posts]
|
constraint_flats_list = [json.loads(flat) for flat in housing_posts]
|
||||||
|
constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in constraint_flats_list]
|
||||||
fetched_flats[constraint_name] = constraint_flats_list
|
fetched_flats[constraint_name] = constraint_flats_list
|
||||||
|
|
||||||
return fetched_flats
|
return fetched_flats
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
"""
|
|
||||||
This modules defines an SQLAlchemy ORM model for a flat.
|
|
||||||
"""
|
|
||||||
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
|
|
||||||
from __future__ import absolute_import, print_function, unicode_literals
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from sqlalchemy import (
|
|
||||||
Column,
|
|
||||||
DateTime,
|
|
||||||
String,
|
|
||||||
)
|
|
||||||
|
|
||||||
from flatisfy.database.base import BASE
|
|
||||||
|
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class LastFetch(BASE):
|
|
||||||
"""
|
|
||||||
SQLAlchemy ORM model to store last timestamp of fetch by backend.
|
|
||||||
"""
|
|
||||||
|
|
||||||
__tablename__ = "last_fetch"
|
|
||||||
|
|
||||||
backend = Column(String, primary_key=True)
|
|
||||||
last_fetch = Column(DateTime)
|
|
||||||
constraint_name = Column(String)
|
|
@ -9,80 +9,80 @@ export default {
|
|||||||
isLoading: (state) => state.loading > 0,
|
isLoading: (state) => state.loading > 0,
|
||||||
|
|
||||||
inseeCodesFlatsBuckets: (state, getters) => (filter) => {
|
inseeCodesFlatsBuckets: (state, getters) => (filter) => {
|
||||||
const buckets = {}
|
const buckets = {};
|
||||||
|
|
||||||
state.flats.forEach((flat) => {
|
state.flats.forEach((flat) => {
|
||||||
if (!filter || filter(flat)) {
|
if (!filter || filter(flat)) {
|
||||||
const insee = flat.flatisfy_postal_code.insee_code
|
const insee = flat.flatisfy_postal_code.insee_code;
|
||||||
if (!buckets[insee]) {
|
if (!buckets[insee]) {
|
||||||
buckets[insee] = {
|
buckets[insee] = {
|
||||||
name: flat.flatisfy_postal_code.name,
|
name: flat.flatisfy_postal_code.name,
|
||||||
flats: []
|
flats: [],
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
buckets[insee].flats.push(flat)
|
buckets[insee].flats.push(flat);
|
||||||
}
|
}
|
||||||
})
|
});
|
||||||
|
|
||||||
return buckets
|
return buckets;
|
||||||
},
|
},
|
||||||
|
|
||||||
flatsMarkers: (state, getters) => (router, filter) => {
|
flatsMarkers: (state, getters) => (router, filter) => {
|
||||||
const markers = []
|
const markers = [];
|
||||||
state.flats.forEach((flat) => {
|
state.flats.forEach((flat) => {
|
||||||
if (filter && filter(flat)) {
|
if (filter && filter(flat)) {
|
||||||
const gps = findFlatGPS(flat)
|
const gps = findFlatGPS(flat);
|
||||||
|
|
||||||
if (gps) {
|
if (gps) {
|
||||||
const previousMarker = markers.find(
|
const previousMarker = markers.find(
|
||||||
(marker) =>
|
(marker) =>
|
||||||
marker.gps[0] === gps[0] && marker.gps[1] === gps[1]
|
marker.gps[0] === gps[0] && marker.gps[1] === gps[1]
|
||||||
)
|
);
|
||||||
if (previousMarker) {
|
if (previousMarker) {
|
||||||
// randomize position a bit
|
// randomize position a bit
|
||||||
// gps[0] += (Math.random() - 0.5) / 500
|
// gps[0] += (Math.random() - 0.5) / 500
|
||||||
// gps[1] += (Math.random() - 0.5) / 500
|
// gps[1] += (Math.random() - 0.5) / 500
|
||||||
}
|
}
|
||||||
const href = router.resolve({
|
const href = router.resolve({
|
||||||
name: 'details',
|
name: "details",
|
||||||
params: { id: flat.id }
|
params: { id: flat.id },
|
||||||
}).href
|
}).href;
|
||||||
const cost = flat.cost
|
const cost = flat.cost
|
||||||
? costFilter(flat.cost, flat.currency)
|
? costFilter(flat.cost, flat.currency)
|
||||||
: ''
|
: "";
|
||||||
markers.push({
|
markers.push({
|
||||||
title: '',
|
title: "",
|
||||||
content:
|
content:
|
||||||
'<a href="' +
|
'<a href="' +
|
||||||
href +
|
href +
|
||||||
'">' +
|
'">' +
|
||||||
flat.title +
|
flat.title +
|
||||||
'</a>' +
|
"</a>" +
|
||||||
cost,
|
cost,
|
||||||
gps: gps,
|
gps: gps,
|
||||||
flatId: flat.id
|
flatId: flat.id,
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
});
|
||||||
|
|
||||||
return markers
|
return markers;
|
||||||
},
|
},
|
||||||
|
|
||||||
allTimeToPlaces: (state) => {
|
allTimeToPlaces: (state) => {
|
||||||
const places = {}
|
const places = {};
|
||||||
Object.keys(state.timeToPlaces).forEach((constraint) => {
|
Object.keys(state.timeToPlaces).forEach((constraint) => {
|
||||||
const constraintTimeToPlaces = state.timeToPlaces[constraint]
|
const constraintTimeToPlaces = state.timeToPlaces[constraint];
|
||||||
Object.keys(constraintTimeToPlaces).forEach((name) => {
|
Object.keys(constraintTimeToPlaces).forEach((name) => {
|
||||||
places[name] = constraintTimeToPlaces[name]
|
places[name] = constraintTimeToPlaces[name];
|
||||||
})
|
});
|
||||||
})
|
});
|
||||||
return places
|
return places;
|
||||||
},
|
},
|
||||||
|
|
||||||
timeToPlaces: (state, getters) => (constraintName) => {
|
timeToPlaces: (state, getters) => (constraintName) => {
|
||||||
return state.timeToPlaces[constraintName]
|
return state.timeToPlaces[constraintName];
|
||||||
},
|
},
|
||||||
|
|
||||||
metadata: (state) => state.metadata
|
metadata: (state) => state.metadata,
|
||||||
}
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user