From 28270a15b9d09988e3d403f173e56fd78047a1ff Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Sun, 18 Apr 2021 22:55:19 +0200 Subject: [PATCH] Fix modules --- modules/pap/pages.py | 11 ++++++++++- modules/seloger/pages.py | 6 +++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/modules/pap/pages.py b/modules/pap/pages.py index ecc1ac0..71b622e 100644 --- a/modules/pap/pages.py +++ b/modules/pap/pages.py @@ -49,6 +49,8 @@ class HousingPage(HTMLPage): @method class iter_housings(ListElement): item_xpath = '//div[has-class("search-list-item-alt")]' + # Prevent DataError on same ids + ignore_duplicate = True def next_page(self): return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self) @@ -61,6 +63,9 @@ class HousingPage(HTMLPage): isNotFurnishedOk = True if self.env['query_type'] == POSTS_TYPES.RENT: isNotFurnishedOk = 'meublé' not in title.lower() + id = self.obj_id(self) + if id is None: + return False return ( Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and isNotFurnishedOk @@ -94,7 +99,11 @@ class HousingPage(HTMLPage): )(item) self.env[name] = value - obj_id = Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)') + obj_id = Regexp( + Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', + default=None + ) + obj_type = Env('query_type') obj_advert_type = ADVERT_TYPES.PERSONAL diff --git a/modules/seloger/pages.py b/modules/seloger/pages.py index f40b4d2..d8510db 100644 --- a/modules/seloger/pages.py +++ b/modules/seloger/pages.py @@ -81,7 +81,11 @@ class SearchResultsPage(HTMLPage): klass = Housing def condition(self): - return Dict('cardType')(self) not in ['advertising', 'localExpert'] and Dict('id', default=False)(self) + return ( + Dict('cardType')(self) not in ['advertising', 'localExpert'] + and Dict('id', default=False)(self) + and Dict('classifiedURL', default=False)(self) + ) obj_id = Dict('id')