From 40e62baadff9353ba9d0b0c8b4a04fac96172d47 Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Sun, 29 Oct 2017 20:16:33 +0100 Subject: [PATCH] Add a config option to filter on terms in the description. Fix #77. --- doc/0.getting_started.md | 6 +++++- flatisfy/config.py | 7 +++++++ flatisfy/filters/__init__.py | 30 ++++++++++++++++++++++++------ 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/doc/0.getting_started.md b/doc/0.getting_started.md index 695d49c..f607bae 100644 --- a/doc/0.getting_started.md +++ b/doc/0.getting_started.md @@ -153,8 +153,12 @@ under the `constraints` key. The available constraints are: `null`) from the place identified by the GPS coordinates `LAT` and `LNG` (latitude and longitude), and we call this place `foobar` in human-readable form. Beware that `time` constraints are in **seconds**. - * `minimum_nb_photos` let you filter out posts with less than this number of + * `minimum_nb_photos` lets you filter out posts with less than this number of photos. + * `description_should_contain` lets you specify a list of terms that should + be present in the posts descriptions. Typically, if you expect "parking" to + be in all the posts Flatisfy fetches for you, you can set + `description_should_contain: ["parking"]`. You can think of constraints as "a set of criterias to filter out flats". You diff --git a/flatisfy/config.py b/flatisfy/config.py index c43e201..9d6171f 100644 --- a/flatisfy/config.py +++ b/flatisfy/config.py @@ -35,6 +35,7 @@ DEFAULT_CONFIG = { "rooms": (None, None), # (min, max) "bedrooms": (None, None), # (min, max) "minimum_nb_photos": None, # min number of photos + "description_should_contain": [], # list of terms "time_to": {} # Dict mapping names to {"gps": [lat, lng], # "time": (min, max) } # Time is in seconds @@ -144,6 +145,12 @@ def validate_config(config, check_with_data): assert isinstance(constraint["minimum_nb_photos"], int) assert constraint["minimum_nb_photos"] >= 0 + assert "description_should_contain" in constraint + assert isinstance(constraint["description_should_contain"], list) + if constraint["description_should_contain"]: + for term in constraint["description_should_contain"]: + assert isinstance(term, str) + assert "house_types" in constraint assert constraint["house_types"] for house_type in constraint["house_types"]: diff --git a/flatisfy/filters/__init__.py b/flatisfy/filters/__init__.py index 5028b96..6e86040 100644 --- a/flatisfy/filters/__init__.py +++ b/flatisfy/filters/__init__.py @@ -80,12 +80,15 @@ def refine_with_housing_criteria(flats_list, constraint): ) -def refine_with_minimum_nb_photos(flats_list, constraint): +def refine_with_details_criteria(flats_list, constraint): """ - Filter a list of flats according to the minimum number of photos criterion. + Filter a list of flats according to the criteria which require the full + details to be fetched. These include minimum number of photos and terms + that should appear in description. .. note :: This has to be done in a separate function and not with the - other criterias as photos are only fetched in the second pass. + other criterias as photos and full description are only fetched in the + second pass. :param flats_list: A list of flats dict to filter. :param constraint: The constraint that the ``flats_list`` should satisfy. @@ -111,6 +114,20 @@ def refine_with_minimum_nb_photos(flats_list, constraint): ) is_ok[i] = False + has_terms_in_description = True + if constraint["description_should_contain"]: + has_terms_in_description = all( + term in flat['text'] + for term in constraint["description_should_contain"] + ) + if not has_terms_in_description: + LOGGER.info( + ("Description for flat %s does not contain all the required " + "terms."), + flat["id"] + ) + is_ok[i] = False + return ( [ flat @@ -202,9 +219,10 @@ def second_pass(flats_list, constraint, config): flats_list, ignored_list = refine_with_housing_criteria(flats_list, constraint) - # Remove return housing posts which do not have enough photos - flats_list, ignored_list = refine_with_minimum_nb_photos(flats_list, - constraint) + # Remove returned housing posts which do not match criteria relying on + # fetched details. + flats_list, ignored_list = refine_with_details_criteria(flats_list, + constraint) return { "new": flats_list,