Add a config option to filter on terms in the description. Fix #77.
This commit is contained in:
parent
f81deb9f13
commit
40e62baadf
@ -153,8 +153,12 @@ under the `constraints` key. The available constraints are:
|
||||
`null`) from the place identified by the GPS coordinates `LAT` and `LNG`
|
||||
(latitude and longitude), and we call this place `foobar` in human-readable
|
||||
form. Beware that `time` constraints are in **seconds**.
|
||||
* `minimum_nb_photos` let you filter out posts with less than this number of
|
||||
* `minimum_nb_photos` lets you filter out posts with less than this number of
|
||||
photos.
|
||||
* `description_should_contain` lets you specify a list of terms that should
|
||||
be present in the posts descriptions. Typically, if you expect "parking" to
|
||||
be in all the posts Flatisfy fetches for you, you can set
|
||||
`description_should_contain: ["parking"]`.
|
||||
|
||||
|
||||
You can think of constraints as "a set of criterias to filter out flats". You
|
||||
|
@ -35,6 +35,7 @@ DEFAULT_CONFIG = {
|
||||
"rooms": (None, None), # (min, max)
|
||||
"bedrooms": (None, None), # (min, max)
|
||||
"minimum_nb_photos": None, # min number of photos
|
||||
"description_should_contain": [], # list of terms
|
||||
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
|
||||
# "time": (min, max) }
|
||||
# Time is in seconds
|
||||
@ -144,6 +145,12 @@ def validate_config(config, check_with_data):
|
||||
assert isinstance(constraint["minimum_nb_photos"], int)
|
||||
assert constraint["minimum_nb_photos"] >= 0
|
||||
|
||||
assert "description_should_contain" in constraint
|
||||
assert isinstance(constraint["description_should_contain"], list)
|
||||
if constraint["description_should_contain"]:
|
||||
for term in constraint["description_should_contain"]:
|
||||
assert isinstance(term, str)
|
||||
|
||||
assert "house_types" in constraint
|
||||
assert constraint["house_types"]
|
||||
for house_type in constraint["house_types"]:
|
||||
|
@ -80,12 +80,15 @@ def refine_with_housing_criteria(flats_list, constraint):
|
||||
)
|
||||
|
||||
|
||||
def refine_with_minimum_nb_photos(flats_list, constraint):
|
||||
def refine_with_details_criteria(flats_list, constraint):
|
||||
"""
|
||||
Filter a list of flats according to the minimum number of photos criterion.
|
||||
Filter a list of flats according to the criteria which require the full
|
||||
details to be fetched. These include minimum number of photos and terms
|
||||
that should appear in description.
|
||||
|
||||
.. note :: This has to be done in a separate function and not with the
|
||||
other criterias as photos are only fetched in the second pass.
|
||||
other criterias as photos and full description are only fetched in the
|
||||
second pass.
|
||||
|
||||
:param flats_list: A list of flats dict to filter.
|
||||
:param constraint: The constraint that the ``flats_list`` should satisfy.
|
||||
@ -111,6 +114,20 @@ def refine_with_minimum_nb_photos(flats_list, constraint):
|
||||
)
|
||||
is_ok[i] = False
|
||||
|
||||
has_terms_in_description = True
|
||||
if constraint["description_should_contain"]:
|
||||
has_terms_in_description = all(
|
||||
term in flat['text']
|
||||
for term in constraint["description_should_contain"]
|
||||
)
|
||||
if not has_terms_in_description:
|
||||
LOGGER.info(
|
||||
("Description for flat %s does not contain all the required "
|
||||
"terms."),
|
||||
flat["id"]
|
||||
)
|
||||
is_ok[i] = False
|
||||
|
||||
return (
|
||||
[
|
||||
flat
|
||||
@ -202,9 +219,10 @@ def second_pass(flats_list, constraint, config):
|
||||
flats_list, ignored_list = refine_with_housing_criteria(flats_list,
|
||||
constraint)
|
||||
|
||||
# Remove return housing posts which do not have enough photos
|
||||
flats_list, ignored_list = refine_with_minimum_nb_photos(flats_list,
|
||||
constraint)
|
||||
# Remove returned housing posts which do not match criteria relying on
|
||||
# fetched details.
|
||||
flats_list, ignored_list = refine_with_details_criteria(flats_list,
|
||||
constraint)
|
||||
|
||||
return {
|
||||
"new": flats_list,
|
||||
|
Loading…
Reference in New Issue
Block a user