Revert "Reduce number of requests to housing websites"
This reverts commit 977e354646
.
This commit is contained in:
parent
ebd031c047
commit
a7ee94653f
@ -135,11 +135,7 @@ List of configuration options:
|
||||
doc](http://bottlepy.org/docs/dev/deployment.html).
|
||||
* `backends` is a list of Woob backends to enable. It defaults to any
|
||||
available and supported Woob backend.
|
||||
* `force_fetch_all` is a boolean indicating whether or not Flatisfy should
|
||||
fetch all available flats or only theones added from the last fetch (relying
|
||||
on last known housing date). By default, Flatisfy will only iterate on
|
||||
housings until the last known housing date.
|
||||
* `store_personal_data` is a boolean indicating whether or not Flatisfy should
|
||||
* `store_personal_data` is a boolean indicated whether or not Flatisfy should
|
||||
fetch personal data from housing posts and store them in database. Such
|
||||
personal data include contact phone number for instance. By default,
|
||||
Flatisfy does not store such personal data.
|
||||
|
@ -55,9 +55,6 @@ DEFAULT_CONFIG = {
|
||||
# Time is in seconds
|
||||
}
|
||||
},
|
||||
# Whether to force fetching all available flats at each time or only fetch
|
||||
# diff
|
||||
"force_fetch_all": False,
|
||||
# Whether or not to store personal data from housing posts (phone number
|
||||
# etc)
|
||||
"store_personal_data": False,
|
||||
@ -165,7 +162,6 @@ def validate_config(config, check_with_data):
|
||||
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
|
||||
assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
|
||||
|
||||
assert isinstance(config["force_fetch_all"], bool)
|
||||
assert isinstance(config["store_personal_data"], bool)
|
||||
assert isinstance(config["max_distance_housing_station"], (int, float))
|
||||
assert isinstance(config["duplicate_threshold"], int)
|
||||
|
@ -5,9 +5,7 @@ This module contains all the code related to fetching and loading flats lists.
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
from builtins import str
|
||||
|
||||
import arrow
|
||||
import collections
|
||||
import datetime
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
@ -17,7 +15,6 @@ from flatisfy import database
|
||||
from flatisfy import tools
|
||||
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
|
||||
from flatisfy.models import flat as flat_model
|
||||
from flatisfy.models import last_fetch as last_fetch_model
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
@ -164,11 +161,7 @@ class WoobProxy(object):
|
||||
|
||||
return queries
|
||||
|
||||
def query(
|
||||
self, query,
|
||||
max_entries=None, store_personal_data=False, force_fetch_all=False,
|
||||
last_fetch_by_backend=None
|
||||
):
|
||||
def query(self, query, max_entries=None, store_personal_data=False):
|
||||
"""
|
||||
Fetch the housings posts matching a given Woob query.
|
||||
|
||||
@ -176,18 +169,12 @@ class WoobProxy(object):
|
||||
:param max_entries: Maximum number of entries to fetch.
|
||||
:param store_personal_data: Whether personal data should be fetched
|
||||
from housing posts (phone number etc).
|
||||
:param force_fetch_all: Whether to force fetching all available flats
|
||||
or only diff from last fetch (based on timestamps).
|
||||
:param last_fetch_by_backend: A dict mapping all backends to last fetch
|
||||
datetimes.
|
||||
:return: The matching housing posts, dumped as a list of JSON objects.
|
||||
"""
|
||||
if last_fetch_by_backend is None:
|
||||
last_fetch_by_backend = {}
|
||||
|
||||
housings = []
|
||||
# List the useful backends for this specific query
|
||||
useful_backends = [x.backend for x in query.cities]
|
||||
# TODO: Handle max_entries better
|
||||
try:
|
||||
for housing in itertools.islice(
|
||||
self.webnip.do(
|
||||
@ -200,16 +187,6 @@ class WoobProxy(object):
|
||||
),
|
||||
max_entries,
|
||||
):
|
||||
if not force_fetch_all:
|
||||
# Check whether we should continue iterating or not
|
||||
last_fetch_datetime = last_fetch_by_backend.get(housing.backend)
|
||||
if last_fetch_datetime and housing.date and housing.date < last_fetch_datetime:
|
||||
LOGGER.info(
|
||||
'Done iterating till last fetch (housing.date=%s, last_fetch=%s). Stopping iteration.',
|
||||
housing.date,
|
||||
last_fetch_datetime
|
||||
)
|
||||
break
|
||||
if not store_personal_data:
|
||||
housing.phone = None
|
||||
housings.append(json.dumps(housing, cls=WoobEncoder))
|
||||
@ -263,66 +240,19 @@ def fetch_flats(config):
|
||||
"""
|
||||
fetched_flats = {}
|
||||
|
||||
# Get last fetch datetimes for all constraints / backends
|
||||
get_session = database.init_db(config["database"], config["search_index"])
|
||||
with get_session() as session:
|
||||
last_fetch = collections.defaultdict(dict)
|
||||
for item in session.query(last_fetch_model.LastFetch).all():
|
||||
last_fetch[item.constraint_name][item.backend] = item.last_fetch
|
||||
|
||||
# Do the actual fetching
|
||||
for constraint_name, constraint in config["constraints"].items():
|
||||
LOGGER.info("Loading flats for constraint %s...", constraint_name)
|
||||
|
||||
with WoobProxy(config) as woob_proxy:
|
||||
queries = woob_proxy.build_queries(constraint)
|
||||
housing_posts = []
|
||||
for query in queries:
|
||||
housing_posts.extend(
|
||||
woob_proxy.query(
|
||||
query,
|
||||
config["max_entries"],
|
||||
config["store_personal_data"],
|
||||
config["force_fetch_all"],
|
||||
last_fetch[constraint_name]
|
||||
)
|
||||
)
|
||||
|
||||
housing_posts = [json.loads(flat) for flat in housing_posts]
|
||||
|
||||
# Update last_fetch
|
||||
last_fetch_by_backends = collections.defaultdict(lambda: None)
|
||||
for flat in housing_posts:
|
||||
backend = flat['id'].split('@')[-1]
|
||||
if (
|
||||
last_fetch_by_backends[backend] is None
|
||||
or last_fetch_by_backends[backend] < flat['date']
|
||||
):
|
||||
last_fetch_by_backends[backend] = flat['date']
|
||||
for backend in last_fetch_by_backends:
|
||||
last_fetch_in_db = session.query(last_fetch_model.LastFetch).where(
|
||||
last_fetch_model.LastFetch.constraint_name == constraint_name,
|
||||
last_fetch_model.LastFetch.backend == backend
|
||||
).first()
|
||||
if last_fetch_in_db:
|
||||
last_fetch_in_db.last_fetch = arrow.get(
|
||||
last_fetch_by_backends[backend]
|
||||
).date()
|
||||
else:
|
||||
last_fetch_in_db = last_fetch_model.LastFetch(
|
||||
constraint_name=constraint_name,
|
||||
backend=backend,
|
||||
last_fetch=arrow.get(last_fetch_by_backends[backend]).date()
|
||||
)
|
||||
session.add(last_fetch_in_db)
|
||||
session.commit()
|
||||
|
||||
housing_posts.extend(woob_proxy.query(query, config["max_entries"], config["store_personal_data"]))
|
||||
housing_posts = housing_posts[: config["max_entries"]]
|
||||
LOGGER.info("Fetched %d flats.", len(housing_posts))
|
||||
|
||||
constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in housing_posts]
|
||||
constraint_flats_list = [json.loads(flat) for flat in housing_posts]
|
||||
constraint_flats_list = [WoobProxy.restore_decimal_fields(flat) for flat in constraint_flats_list]
|
||||
fetched_flats[constraint_name] = constraint_flats_list
|
||||
|
||||
return fetched_flats
|
||||
|
||||
|
||||
|
@ -1,31 +0,0 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This modules defines an SQLAlchemy ORM model for a flat.
|
||||
"""
|
||||
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
DateTime,
|
||||
String,
|
||||
)
|
||||
|
||||
from flatisfy.database.base import BASE
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LastFetch(BASE):
|
||||
"""
|
||||
SQLAlchemy ORM model to store last timestamp of fetch by backend.
|
||||
"""
|
||||
|
||||
__tablename__ = "last_fetch"
|
||||
|
||||
backend = Column(String, primary_key=True)
|
||||
last_fetch = Column(DateTime)
|
||||
constraint_name = Column(String)
|
@ -9,80 +9,80 @@ export default {
|
||||
isLoading: (state) => state.loading > 0,
|
||||
|
||||
inseeCodesFlatsBuckets: (state, getters) => (filter) => {
|
||||
const buckets = {}
|
||||
const buckets = {};
|
||||
|
||||
state.flats.forEach((flat) => {
|
||||
if (!filter || filter(flat)) {
|
||||
const insee = flat.flatisfy_postal_code.insee_code
|
||||
const insee = flat.flatisfy_postal_code.insee_code;
|
||||
if (!buckets[insee]) {
|
||||
buckets[insee] = {
|
||||
name: flat.flatisfy_postal_code.name,
|
||||
flats: []
|
||||
flats: [],
|
||||
};
|
||||
}
|
||||
buckets[insee].flats.push(flat);
|
||||
}
|
||||
buckets[insee].flats.push(flat)
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
return buckets
|
||||
return buckets;
|
||||
},
|
||||
|
||||
flatsMarkers: (state, getters) => (router, filter) => {
|
||||
const markers = []
|
||||
const markers = [];
|
||||
state.flats.forEach((flat) => {
|
||||
if (filter && filter(flat)) {
|
||||
const gps = findFlatGPS(flat)
|
||||
const gps = findFlatGPS(flat);
|
||||
|
||||
if (gps) {
|
||||
const previousMarker = markers.find(
|
||||
(marker) =>
|
||||
marker.gps[0] === gps[0] && marker.gps[1] === gps[1]
|
||||
)
|
||||
);
|
||||
if (previousMarker) {
|
||||
// randomize position a bit
|
||||
// gps[0] += (Math.random() - 0.5) / 500
|
||||
// gps[1] += (Math.random() - 0.5) / 500
|
||||
}
|
||||
const href = router.resolve({
|
||||
name: 'details',
|
||||
params: { id: flat.id }
|
||||
}).href
|
||||
name: "details",
|
||||
params: { id: flat.id },
|
||||
}).href;
|
||||
const cost = flat.cost
|
||||
? costFilter(flat.cost, flat.currency)
|
||||
: ''
|
||||
: "";
|
||||
markers.push({
|
||||
title: '',
|
||||
title: "",
|
||||
content:
|
||||
'<a href="' +
|
||||
href +
|
||||
'">' +
|
||||
flat.title +
|
||||
'</a>' +
|
||||
"</a>" +
|
||||
cost,
|
||||
gps: gps,
|
||||
flatId: flat.id
|
||||
})
|
||||
flatId: flat.id,
|
||||
});
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
return markers
|
||||
return markers;
|
||||
},
|
||||
|
||||
allTimeToPlaces: (state) => {
|
||||
const places = {}
|
||||
const places = {};
|
||||
Object.keys(state.timeToPlaces).forEach((constraint) => {
|
||||
const constraintTimeToPlaces = state.timeToPlaces[constraint]
|
||||
const constraintTimeToPlaces = state.timeToPlaces[constraint];
|
||||
Object.keys(constraintTimeToPlaces).forEach((name) => {
|
||||
places[name] = constraintTimeToPlaces[name]
|
||||
})
|
||||
})
|
||||
return places
|
||||
places[name] = constraintTimeToPlaces[name];
|
||||
});
|
||||
});
|
||||
return places;
|
||||
},
|
||||
|
||||
timeToPlaces: (state, getters) => (constraintName) => {
|
||||
return state.timeToPlaces[constraintName]
|
||||
return state.timeToPlaces[constraintName];
|
||||
},
|
||||
|
||||
metadata: (state) => state.metadata
|
||||
}
|
||||
metadata: (state) => state.metadata,
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user