Allow blacklisting words from flat descriptions

This commit adds a new field "description_should_not_contain" to the
configuration object. Any flat whose description contains a word
appearing in this list will be filtered out.
This commit is contained in:
Adrien Guatto 2018-11-07 15:47:19 +01:00
parent d87f2ec37d
commit a38cf0e9a8
3 changed files with 25 additions and 4 deletions

View File

@ -195,6 +195,10 @@ under the `constraints` key. The available constraints are:
be present in the posts descriptions. Typically, if you expect "parking" to
be in all the posts Flatisfy fetches for you, you can set
`description_should_contain: ["parking"]`.
* `description_should_not_contain` lets you specify a list of terms that should
never occur in the posts descriptions. Typically, if you wish to avoid
"coloc" in the posts Flatisfy fetches for you, you can set
`description_should_not_contain: ["coloc"]`.
You can think of constraints as "a set of criterias to filter out flats". You

View File

@ -38,6 +38,7 @@ DEFAULT_CONFIG = {
"bedrooms": (None, None), # (min, max)
"minimum_nb_photos": None, # min number of photos
"description_should_contain": [], # list of terms
"description_should_not_contain": [], # list of terms
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max),
# "mode": Valid mode }
@ -177,6 +178,13 @@ def validate_config(config, check_with_data):
for term in constraint["description_should_contain"]:
assert isinstance(term, str)
assert "description_should_not_contain" in constraint
assert isinstance(constraint["description_should_not_contain"],
list)
if constraint["description_should_not_contain"]:
for term in constraint["description_should_not_contain"]:
assert isinstance(term, str)
assert "house_types" in constraint
assert constraint["house_types"]
for house_type in constraint["house_types"]:

View File

@ -117,16 +117,25 @@ def refine_with_details_criteria(flats_list, constraint):
)
is_ok[i] = False
has_terms_in_description = True
has_all_good_terms_in_description = True
if constraint["description_should_contain"]:
has_terms_in_description = all(
has_all_good_terms_in_description = all(
term in flat['text']
for term in constraint["description_should_contain"]
)
if not has_terms_in_description:
has_a_bad_term_in_description = False
if constraint["description_should_not_contain"]:
has_a_bad_term_in_description = any(
term in flat['text']
for term in constraint["description_should_not_contain"]
)
if (not has_all_good_terms_in_description
or has_a_bad_term_in_description):
LOGGER.info(
("Description for flat %s does not contain all the required "
"terms."),
"terms, or contains a blacklisted term."),
flat["id"]
)
is_ok[i] = False