From a38cf0e9a8380e4ab8f8d6468fe479eaeb8de275 Mon Sep 17 00:00:00 2001 From: Adrien Guatto Date: Wed, 7 Nov 2018 15:47:19 +0100 Subject: [PATCH] Allow blacklisting words from flat descriptions This commit adds a new field "description_should_not_contain" to the configuration object. Any flat whose description contains a word appearing in this list will be filtered out. --- doc/0.getting_started.md | 4 ++++ flatisfy/config.py | 8 ++++++++ flatisfy/filters/__init__.py | 17 +++++++++++++---- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/doc/0.getting_started.md b/doc/0.getting_started.md index 45139aa..3afab9d 100644 --- a/doc/0.getting_started.md +++ b/doc/0.getting_started.md @@ -195,6 +195,10 @@ under the `constraints` key. The available constraints are: be present in the posts descriptions. Typically, if you expect "parking" to be in all the posts Flatisfy fetches for you, you can set `description_should_contain: ["parking"]`. + * `description_should_not_contain` lets you specify a list of terms that should + never occur in the posts descriptions. Typically, if you wish to avoid + "coloc" in the posts Flatisfy fetches for you, you can set + `description_should_not_contain: ["coloc"]`. You can think of constraints as "a set of criterias to filter out flats". You diff --git a/flatisfy/config.py b/flatisfy/config.py index b1f6c34..f233ce3 100644 --- a/flatisfy/config.py +++ b/flatisfy/config.py @@ -38,6 +38,7 @@ DEFAULT_CONFIG = { "bedrooms": (None, None), # (min, max) "minimum_nb_photos": None, # min number of photos "description_should_contain": [], # list of terms + "description_should_not_contain": [], # list of terms "time_to": {} # Dict mapping names to {"gps": [lat, lng], # "time": (min, max), # "mode": Valid mode } @@ -177,6 +178,13 @@ def validate_config(config, check_with_data): for term in constraint["description_should_contain"]: assert isinstance(term, str) + assert "description_should_not_contain" in constraint + assert isinstance(constraint["description_should_not_contain"], + list) + if constraint["description_should_not_contain"]: + for term in constraint["description_should_not_contain"]: + assert isinstance(term, str) + assert "house_types" in constraint assert constraint["house_types"] for house_type in constraint["house_types"]: diff --git a/flatisfy/filters/__init__.py b/flatisfy/filters/__init__.py index 2e9bf62..5b31075 100644 --- a/flatisfy/filters/__init__.py +++ b/flatisfy/filters/__init__.py @@ -117,16 +117,25 @@ def refine_with_details_criteria(flats_list, constraint): ) is_ok[i] = False - has_terms_in_description = True + has_all_good_terms_in_description = True if constraint["description_should_contain"]: - has_terms_in_description = all( + has_all_good_terms_in_description = all( term in flat['text'] for term in constraint["description_should_contain"] ) - if not has_terms_in_description: + + has_a_bad_term_in_description = False + if constraint["description_should_not_contain"]: + has_a_bad_term_in_description = any( + term in flat['text'] + for term in constraint["description_should_not_contain"] + ) + + if (not has_all_good_terms_in_description + or has_a_bad_term_in_description): LOGGER.info( ("Description for flat %s does not contain all the required " - "terms."), + "terms, or contains a blacklisted term."), flat["id"] ) is_ok[i] = False