Add a config option to filter on terms in the description. Fix #77.

This commit is contained in:
Lucas Verney 2017-10-29 20:16:33 +01:00
parent f81deb9f13
commit 40e62baadf
3 changed files with 36 additions and 7 deletions

View File

@ -153,8 +153,12 @@ under the `constraints` key. The available constraints are:
`null`) from the place identified by the GPS coordinates `LAT` and `LNG`
(latitude and longitude), and we call this place `foobar` in human-readable
form. Beware that `time` constraints are in **seconds**.
* `minimum_nb_photos` let you filter out posts with less than this number of
* `minimum_nb_photos` lets you filter out posts with less than this number of
photos.
* `description_should_contain` lets you specify a list of terms that should
be present in the posts descriptions. Typically, if you expect "parking" to
be in all the posts Flatisfy fetches for you, you can set
`description_should_contain: ["parking"]`.
You can think of constraints as "a set of criterias to filter out flats". You

View File

@ -35,6 +35,7 @@ DEFAULT_CONFIG = {
"rooms": (None, None), # (min, max)
"bedrooms": (None, None), # (min, max)
"minimum_nb_photos": None, # min number of photos
"description_should_contain": [], # list of terms
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max) }
# Time is in seconds
@ -144,6 +145,12 @@ def validate_config(config, check_with_data):
assert isinstance(constraint["minimum_nb_photos"], int)
assert constraint["minimum_nb_photos"] >= 0
assert "description_should_contain" in constraint
assert isinstance(constraint["description_should_contain"], list)
if constraint["description_should_contain"]:
for term in constraint["description_should_contain"]:
assert isinstance(term, str)
assert "house_types" in constraint
assert constraint["house_types"]
for house_type in constraint["house_types"]:

View File

@ -80,12 +80,15 @@ def refine_with_housing_criteria(flats_list, constraint):
)
def refine_with_minimum_nb_photos(flats_list, constraint):
def refine_with_details_criteria(flats_list, constraint):
"""
Filter a list of flats according to the minimum number of photos criterion.
Filter a list of flats according to the criteria which require the full
details to be fetched. These include minimum number of photos and terms
that should appear in description.
.. note :: This has to be done in a separate function and not with the
other criterias as photos are only fetched in the second pass.
other criterias as photos and full description are only fetched in the
second pass.
:param flats_list: A list of flats dict to filter.
:param constraint: The constraint that the ``flats_list`` should satisfy.
@ -111,6 +114,20 @@ def refine_with_minimum_nb_photos(flats_list, constraint):
)
is_ok[i] = False
has_terms_in_description = True
if constraint["description_should_contain"]:
has_terms_in_description = all(
term in flat['text']
for term in constraint["description_should_contain"]
)
if not has_terms_in_description:
LOGGER.info(
("Description for flat %s does not contain all the required "
"terms."),
flat["id"]
)
is_ok[i] = False
return (
[
flat
@ -202,8 +219,9 @@ def second_pass(flats_list, constraint, config):
flats_list, ignored_list = refine_with_housing_criteria(flats_list,
constraint)
# Remove return housing posts which do not have enough photos
flats_list, ignored_list = refine_with_minimum_nb_photos(flats_list,
# Remove returned housing posts which do not match criteria relying on
# fetched details.
flats_list, ignored_list = refine_with_details_criteria(flats_list,
constraint)
return {