Allow blacklisting words from flat descriptions

This commit adds a new field "description_should_not_contain" to the
configuration object. Any flat whose description contains a word
appearing in this list will be filtered out.
This commit is contained in:
Adrien Guatto 2018-11-07 15:47:19 +01:00
parent d87f2ec37d
commit a38cf0e9a8
3 changed files with 25 additions and 4 deletions

View File

@ -195,6 +195,10 @@ under the `constraints` key. The available constraints are:
be present in the posts descriptions. Typically, if you expect "parking" to be present in the posts descriptions. Typically, if you expect "parking" to
be in all the posts Flatisfy fetches for you, you can set be in all the posts Flatisfy fetches for you, you can set
`description_should_contain: ["parking"]`. `description_should_contain: ["parking"]`.
* `description_should_not_contain` lets you specify a list of terms that should
never occur in the posts descriptions. Typically, if you wish to avoid
"coloc" in the posts Flatisfy fetches for you, you can set
`description_should_not_contain: ["coloc"]`.
You can think of constraints as "a set of criterias to filter out flats". You You can think of constraints as "a set of criterias to filter out flats". You

View File

@ -38,6 +38,7 @@ DEFAULT_CONFIG = {
"bedrooms": (None, None), # (min, max) "bedrooms": (None, None), # (min, max)
"minimum_nb_photos": None, # min number of photos "minimum_nb_photos": None, # min number of photos
"description_should_contain": [], # list of terms "description_should_contain": [], # list of terms
"description_should_not_contain": [], # list of terms
"time_to": {} # Dict mapping names to {"gps": [lat, lng], "time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max), # "time": (min, max),
# "mode": Valid mode } # "mode": Valid mode }
@ -177,6 +178,13 @@ def validate_config(config, check_with_data):
for term in constraint["description_should_contain"]: for term in constraint["description_should_contain"]:
assert isinstance(term, str) assert isinstance(term, str)
assert "description_should_not_contain" in constraint
assert isinstance(constraint["description_should_not_contain"],
list)
if constraint["description_should_not_contain"]:
for term in constraint["description_should_not_contain"]:
assert isinstance(term, str)
assert "house_types" in constraint assert "house_types" in constraint
assert constraint["house_types"] assert constraint["house_types"]
for house_type in constraint["house_types"]: for house_type in constraint["house_types"]:

View File

@ -117,16 +117,25 @@ def refine_with_details_criteria(flats_list, constraint):
) )
is_ok[i] = False is_ok[i] = False
has_terms_in_description = True has_all_good_terms_in_description = True
if constraint["description_should_contain"]: if constraint["description_should_contain"]:
has_terms_in_description = all( has_all_good_terms_in_description = all(
term in flat['text'] term in flat['text']
for term in constraint["description_should_contain"] for term in constraint["description_should_contain"]
) )
if not has_terms_in_description:
has_a_bad_term_in_description = False
if constraint["description_should_not_contain"]:
has_a_bad_term_in_description = any(
term in flat['text']
for term in constraint["description_should_not_contain"]
)
if (not has_all_good_terms_in_description
or has_a_bad_term_in_description):
LOGGER.info( LOGGER.info(
("Description for flat %s does not contain all the required " ("Description for flat %s does not contain all the required "
"terms."), "terms, or contains a blacklisted term."),
flat["id"] flat["id"]
) )
is_ok[i] = False is_ok[i] = False