Add a config option to filter on terms in the description. Fix #77.

This commit is contained in:
Lucas Verney 2017-10-29 20:16:33 +01:00
parent f81deb9f13
commit 40e62baadf
3 changed files with 36 additions and 7 deletions

View File

@ -153,8 +153,12 @@ under the `constraints` key. The available constraints are:
`null`) from the place identified by the GPS coordinates `LAT` and `LNG` `null`) from the place identified by the GPS coordinates `LAT` and `LNG`
(latitude and longitude), and we call this place `foobar` in human-readable (latitude and longitude), and we call this place `foobar` in human-readable
form. Beware that `time` constraints are in **seconds**. form. Beware that `time` constraints are in **seconds**.
* `minimum_nb_photos` let you filter out posts with less than this number of * `minimum_nb_photos` lets you filter out posts with less than this number of
photos. photos.
* `description_should_contain` lets you specify a list of terms that should
be present in the posts descriptions. Typically, if you expect "parking" to
be in all the posts Flatisfy fetches for you, you can set
`description_should_contain: ["parking"]`.
You can think of constraints as "a set of criterias to filter out flats". You You can think of constraints as "a set of criterias to filter out flats". You

View File

@ -35,6 +35,7 @@ DEFAULT_CONFIG = {
"rooms": (None, None), # (min, max) "rooms": (None, None), # (min, max)
"bedrooms": (None, None), # (min, max) "bedrooms": (None, None), # (min, max)
"minimum_nb_photos": None, # min number of photos "minimum_nb_photos": None, # min number of photos
"description_should_contain": [], # list of terms
"time_to": {} # Dict mapping names to {"gps": [lat, lng], "time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max) } # "time": (min, max) }
# Time is in seconds # Time is in seconds
@ -144,6 +145,12 @@ def validate_config(config, check_with_data):
assert isinstance(constraint["minimum_nb_photos"], int) assert isinstance(constraint["minimum_nb_photos"], int)
assert constraint["minimum_nb_photos"] >= 0 assert constraint["minimum_nb_photos"] >= 0
assert "description_should_contain" in constraint
assert isinstance(constraint["description_should_contain"], list)
if constraint["description_should_contain"]:
for term in constraint["description_should_contain"]:
assert isinstance(term, str)
assert "house_types" in constraint assert "house_types" in constraint
assert constraint["house_types"] assert constraint["house_types"]
for house_type in constraint["house_types"]: for house_type in constraint["house_types"]:

View File

@ -80,12 +80,15 @@ def refine_with_housing_criteria(flats_list, constraint):
) )
def refine_with_minimum_nb_photos(flats_list, constraint): def refine_with_details_criteria(flats_list, constraint):
""" """
Filter a list of flats according to the minimum number of photos criterion. Filter a list of flats according to the criteria which require the full
details to be fetched. These include minimum number of photos and terms
that should appear in description.
.. note :: This has to be done in a separate function and not with the .. note :: This has to be done in a separate function and not with the
other criterias as photos are only fetched in the second pass. other criterias as photos and full description are only fetched in the
second pass.
:param flats_list: A list of flats dict to filter. :param flats_list: A list of flats dict to filter.
:param constraint: The constraint that the ``flats_list`` should satisfy. :param constraint: The constraint that the ``flats_list`` should satisfy.
@ -111,6 +114,20 @@ def refine_with_minimum_nb_photos(flats_list, constraint):
) )
is_ok[i] = False is_ok[i] = False
has_terms_in_description = True
if constraint["description_should_contain"]:
has_terms_in_description = all(
term in flat['text']
for term in constraint["description_should_contain"]
)
if not has_terms_in_description:
LOGGER.info(
("Description for flat %s does not contain all the required "
"terms."),
flat["id"]
)
is_ok[i] = False
return ( return (
[ [
flat flat
@ -202,9 +219,10 @@ def second_pass(flats_list, constraint, config):
flats_list, ignored_list = refine_with_housing_criteria(flats_list, flats_list, ignored_list = refine_with_housing_criteria(flats_list,
constraint) constraint)
# Remove return housing posts which do not have enough photos # Remove returned housing posts which do not match criteria relying on
flats_list, ignored_list = refine_with_minimum_nb_photos(flats_list, # fetched details.
constraint) flats_list, ignored_list = refine_with_details_criteria(flats_list,
constraint)
return { return {
"new": flats_list, "new": flats_list,