Allow blacklisting words from flat descriptions
This commit adds a new field "description_should_not_contain" to the configuration object. Any flat whose description contains a word appearing in this list will be filtered out.
This commit is contained in:
parent
d87f2ec37d
commit
a38cf0e9a8
@ -195,6 +195,10 @@ under the `constraints` key. The available constraints are:
|
|||||||
be present in the posts descriptions. Typically, if you expect "parking" to
|
be present in the posts descriptions. Typically, if you expect "parking" to
|
||||||
be in all the posts Flatisfy fetches for you, you can set
|
be in all the posts Flatisfy fetches for you, you can set
|
||||||
`description_should_contain: ["parking"]`.
|
`description_should_contain: ["parking"]`.
|
||||||
|
* `description_should_not_contain` lets you specify a list of terms that should
|
||||||
|
never occur in the posts descriptions. Typically, if you wish to avoid
|
||||||
|
"coloc" in the posts Flatisfy fetches for you, you can set
|
||||||
|
`description_should_not_contain: ["coloc"]`.
|
||||||
|
|
||||||
|
|
||||||
You can think of constraints as "a set of criterias to filter out flats". You
|
You can think of constraints as "a set of criterias to filter out flats". You
|
||||||
|
@ -38,6 +38,7 @@ DEFAULT_CONFIG = {
|
|||||||
"bedrooms": (None, None), # (min, max)
|
"bedrooms": (None, None), # (min, max)
|
||||||
"minimum_nb_photos": None, # min number of photos
|
"minimum_nb_photos": None, # min number of photos
|
||||||
"description_should_contain": [], # list of terms
|
"description_should_contain": [], # list of terms
|
||||||
|
"description_should_not_contain": [], # list of terms
|
||||||
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
|
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
|
||||||
# "time": (min, max),
|
# "time": (min, max),
|
||||||
# "mode": Valid mode }
|
# "mode": Valid mode }
|
||||||
@ -177,6 +178,13 @@ def validate_config(config, check_with_data):
|
|||||||
for term in constraint["description_should_contain"]:
|
for term in constraint["description_should_contain"]:
|
||||||
assert isinstance(term, str)
|
assert isinstance(term, str)
|
||||||
|
|
||||||
|
assert "description_should_not_contain" in constraint
|
||||||
|
assert isinstance(constraint["description_should_not_contain"],
|
||||||
|
list)
|
||||||
|
if constraint["description_should_not_contain"]:
|
||||||
|
for term in constraint["description_should_not_contain"]:
|
||||||
|
assert isinstance(term, str)
|
||||||
|
|
||||||
assert "house_types" in constraint
|
assert "house_types" in constraint
|
||||||
assert constraint["house_types"]
|
assert constraint["house_types"]
|
||||||
for house_type in constraint["house_types"]:
|
for house_type in constraint["house_types"]:
|
||||||
|
@ -117,16 +117,25 @@ def refine_with_details_criteria(flats_list, constraint):
|
|||||||
)
|
)
|
||||||
is_ok[i] = False
|
is_ok[i] = False
|
||||||
|
|
||||||
has_terms_in_description = True
|
has_all_good_terms_in_description = True
|
||||||
if constraint["description_should_contain"]:
|
if constraint["description_should_contain"]:
|
||||||
has_terms_in_description = all(
|
has_all_good_terms_in_description = all(
|
||||||
term in flat['text']
|
term in flat['text']
|
||||||
for term in constraint["description_should_contain"]
|
for term in constraint["description_should_contain"]
|
||||||
)
|
)
|
||||||
if not has_terms_in_description:
|
|
||||||
|
has_a_bad_term_in_description = False
|
||||||
|
if constraint["description_should_not_contain"]:
|
||||||
|
has_a_bad_term_in_description = any(
|
||||||
|
term in flat['text']
|
||||||
|
for term in constraint["description_should_not_contain"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if (not has_all_good_terms_in_description
|
||||||
|
or has_a_bad_term_in_description):
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
("Description for flat %s does not contain all the required "
|
("Description for flat %s does not contain all the required "
|
||||||
"terms."),
|
"terms, or contains a blacklisted term."),
|
||||||
flat["id"]
|
flat["id"]
|
||||||
)
|
)
|
||||||
is_ok[i] = False
|
is_ok[i] = False
|
||||||
|
Loading…
Reference in New Issue
Block a user