Restore SeLoger module

This commit is contained in:
Lucas Verney 2021-04-09 20:15:02 +02:00
parent b73dbdb10e
commit 76ef3a3879
8 changed files with 547 additions and 1 deletions

View File

@ -12,7 +12,7 @@ from enum import Enum
# housing.
BACKENDS_BY_PRECEDENCE = [
"foncia",
#"seloger",
"seloger",
"pap",
"leboncoin",
"explorimmo",

View File

@ -0,0 +1,3 @@
from .module import SeLogerModule
__all__ = ['SeLogerModule']

View File

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
from woob.capabilities.housing import TypeNotSupported, POSTS_TYPES
from woob.browser import PagesBrowser, URL
from .pages import SearchResultsPage, HousingPage, CitiesPage, ErrorPage, HousingJsonPage
from woob.browser.profiles import Android
from .constants import TYPES, RET
__all__ = ['SeLogerBrowser']
class SeLogerBrowser(PagesBrowser):
BASEURL = 'https://www.seloger.com'
PROFILE = Android()
cities = URL(r'https://autocomplete.svc.groupe-seloger.com/auto/complete/0/Ville/6\?text=(?P<pattern>.*)',
CitiesPage)
search = URL(r'/list.html\?(?P<query>.*)&LISTING-LISTpg=(?P<page_number>\d+)', SearchResultsPage)
housing = URL(r'/(?P<_id>.+)/detail.htm',
r'/annonces/.+',
HousingPage)
housing_detail = URL(r'detail,json,caracteristique_bien.json\?idannonce=(?P<_id>\d+)', HousingJsonPage)
captcha = URL(r'http://validate.perfdrive.com', ErrorPage)
def search_geo(self, pattern):
return self.cities.open(pattern=pattern).iter_cities()
def search_housings(self, _type, cities, nb_rooms, area_min, area_max,
cost_min, cost_max, house_types, advert_types):
price = '{}/{}'.format(cost_min or 'NaN', cost_max or 'Nan')
surface = '{}/{}'.format(area_min or 'Nan', area_max or 'Nan')
rooms = ''
if nb_rooms:
rooms = '&rooms={}'.format(nb_rooms if nb_rooms <= 5 else 5)
viager = ""
if _type not in TYPES:
raise TypeNotSupported()
elif _type != POSTS_TYPES.VIAGER:
_type = '{}'.format(TYPES.get(_type))
viager = "&natures=1,2,4"
else:
_type = TYPES.get(_type)
places = '|'.join(['{{ci:{}}}'.format(c) for c in cities])
places = '[{}]'.format(places)
ret = ','.join([RET.get(t) for t in house_types if t in RET])
query = "projects={}{}&places={}&types={}&price={}&surface={}{}&enterprise=0&qsVersion=1.0"\
.format(_type,
viager,
places,
ret,
price,
surface,
rooms)
return self.search.go(query=query, page_number=1).iter_housings(query_type=_type, advert_types=advert_types, ret=ret)
def get_housing(self, _id, obj=None):
return self.housing.go(_id=_id).get_housing(obj=obj)
def get_housing_detail(self, obj):
return self.housing_detail.go(_id=obj.id).get_housing(obj=obj)

View File

@ -0,0 +1,12 @@
from woob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
TYPES = {POSTS_TYPES.RENT: 1,
POSTS_TYPES.SALE: 2,
POSTS_TYPES.FURNISHED_RENT: 1,
POSTS_TYPES.VIAGER: 5}
RET = {HOUSE_TYPES.HOUSE: '2',
HOUSE_TYPES.APART: '1',
HOUSE_TYPES.LAND: '4',
HOUSE_TYPES.PARKING: '3',
HOUSE_TYPES.OTHER: '10'}

BIN
modules/seloger/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

80
modules/seloger/module.py Normal file
View File

@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
from woob.capabilities.housing import CapHousing, Housing, HousingPhoto
from woob.tools.backend import Module
from .browser import SeLogerBrowser
__all__ = ['SeLogerModule']
class SeLogerModule(Module, CapHousing):
NAME = 'seloger'
MAINTAINER = u'Romain Bignon'
EMAIL = 'romain@weboob.org'
VERSION = '2.1'
DESCRIPTION = 'French housing website'
LICENSE = 'AGPLv3+'
ICON = 'http://static.poliris.com/z/portail/svx/portals/sv6_gen/favicon.png'
BROWSER = SeLogerBrowser
def search_housings(self, query):
cities = [c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list([])
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types,
query.advert_types)
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
return self.browser.get_housing(id, housing)
def search_city(self, pattern):
return self.browser.search_geo(pattern)
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
def fill_housing(self, housing, fields):
if 'DPE' in fields or 'GES' in fields:
housing = self.browser.get_housing_detail(housing)
fields.remove('DPE')
fields.remove('GES')
if len(fields) > 0:
housing = self.browser.get_housing(housing.id, housing)
return housing
OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}

262
modules/seloger/pages.py Normal file
View File

@ -0,0 +1,262 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
from woob.browser.pages import JsonPage, pagination, HTMLPage
from woob.browser.elements import ItemElement, DictElement, method
from woob.browser.filters.json import Dict
from woob.browser.filters.html import XPath
from woob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
Env, Regexp, Field, BrowserURL)
from woob.capabilities.base import NotAvailable, NotLoaded
from woob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES)
from woob.capabilities.address import PostalAddress
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
from woob.tools.json import json
from woob.exceptions import ActionNeeded
from .constants import TYPES, RET
import codecs
class ErrorPage(HTMLPage):
def on_load(self):
raise ActionNeeded("Please resolve the captcha")
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
ignore_duplicate = True
class item(ItemElement):
klass = City
obj_id = Dict('Params/ci')
obj_name = Dict('Display')
class SearchResultsPage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
json_content = Regexp(CleanText('//script'),
r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]")(self.doc)
json_content = codecs.unicode_escape_decode(json_content)[0]
json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
self.doc = json.loads(json_content)
@pagination
@method
class iter_housings(DictElement):
item_xpath = 'cards/list'
def next_page(self):
page_nb = Dict('navigation/pagination/page')(self)
max_results = Dict('navigation/pagination/maxResults')(self)
results_per_page = Dict('navigation/pagination/resultsPerPage')(self)
if int(max_results) / int(results_per_page) > int(page_nb):
return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
# TODO handle bellesdemeures
class item(ItemElement):
klass = Housing
def condition(self):
return Dict('cardType')(self) not in ['advertising', 'localExpert'] and Dict('id', default=False)(self)
obj_id = Dict('id')
def obj_type(self):
idType = int(Env('query_type')(self))
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return type
def obj_title(self):
return "{} - {} - {}".format(Dict('estateType')(self),
" / ".join(Dict('tags')(self)),
Field('location')(self))
def obj_advert_type(self):
is_agency = Dict('contact/agencyId', default=False)(self)
if is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
obj_utilities = UTILITIES.EXCLUDED
def obj_photos(self):
photos = []
for photo in Dict('photos')(self):
photos.append(HousingPhoto(photo))
return photos
def obj_location(self):
quartier = Dict('districtLabel')(self)
quartier = quartier if quartier else ''
ville = Dict('cityLabel')(self)
ville = ville if ville else ''
cp = Dict('zipCode')(self)
cp = cp if cp else ''
return u'%s %s (%s)' % (quartier, ville, cp)
obj_url = Dict('classifiedURL')
obj_text = Dict('description')
obj_cost = CleanDecimal(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
obj_currency = Currency(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
obj_price_per_meter = CleanDecimal(Dict('pricing/squareMeterPrice'), default=PricePerMeterFilter)
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
json_content = Regexp(CleanText('//script'), "var ava_data = ({.+?});")(self)
json_content = json_content.replace("logged", "\"logged\"")
json_content = json_content.replace("lengthcarrousel", "\"lengthcarrousel\"")
json_content = json_content.replace("products", "\"products\"")
json_content = json_content.replace("// // ANNONCES_SIMILAIRE / RECO", "")
self.house_json_datas = json.loads(json_content)['products'][0]
obj_id = CleanText('//form[@name="central"]/input[@name="idannonce"]/@value')
def obj_house_type(self):
naturebien = CleanText('//form[@name="central"]/input[@name="naturebien"]/@value')(self)
try:
return next(k for k, v in RET.items() if v == naturebien)
except StopIteration:
return NotLoaded
def obj_type(self):
idType = int(CleanText('//form[@name="central"]/input[@name="idtt"]/@value')(self))
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return type
def obj_advert_type(self):
is_agency = (
CleanText('//form[@name="central"]/input[@name="nomagance"]/@value')(self) or
CleanText('//form[@name="central"]/input[@name="urlagence"]/@value')(self) or
CleanText('//form[@name="central"]/input[@name="adresseagence"]/@value')(self)
)
if is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_photos(self):
photos = []
for photo in XPath('//div[@class="carrousel_slide"]/img/@src')(self):
photos.append(HousingPhoto("https:{}".format(photo)))
for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')(self):
p = json.loads(photo)
photos.append(HousingPhoto("https:{}".format(p['url'])))
return photos
obj_title = CleanText('//title[1]')
def obj_location(self):
quartier = Regexp(CleanText('//script'),
r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
ville = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
ville = ville if ville else ''
cp = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
cp = cp if cp else ''
return u'%s %s (%s)' % (quartier, ville, cp)
def obj_address(self):
p = PostalAddress()
p.street = Regexp(CleanText('//script'),
r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
p.postal_code = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
p.city = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
p.full_address = Field('location')(self)
return p
obj_text = CleanText('//form[@name="central"]/input[@name="description"]/@value')
obj_cost = CleanDecimal(CleanText('//a[@id="price"]'), default=NotLoaded)
obj_currency = Currency(CleanText('//a[@id="price"]'), default=NotLoaded)
obj_price_per_meter = PricePerMeterFilter()
obj_area = CleanDecimal('//form[@name="central"]/input[@name="surface"]/@value', replace_dots=True)
obj_url = CleanText('//form[@name="central"]/input[@name="urlannonce"]/@value')
obj_phone = CleanText('//div[@class="data-action"]/a[@data-phone]/@data-phone')
def obj_utilities(self):
mention = CleanText('//span[@class="detail_indice_prix"]', default="")(self)
if "(CC) Loyer mensuel charges comprises" in mention:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
def obj_bedrooms(self):
return CleanDecimal(Dict('nb_chambres', default=NotLoaded))(self.house_json_datas)
def obj_rooms(self):
return CleanDecimal(Dict('nb_pieces', default=NotLoaded))(self.house_json_datas)
class HousingJsonPage(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def obj_DPE(self):
DPE = Dict("energie", default="")(self)
if DPE['status'] > 0:
return NotAvailable
else:
return getattr(ENERGY_CLASS, DPE['lettre'], NotAvailable)
def obj_GES(self):
GES = Dict("ges", default="")(self)
if GES['status'] > 0:
return NotAvailable
else:
return getattr(ENERGY_CLASS, GES['lettre'], NotAvailable)
def obj_details(self):
details = {}
for c in Dict('categories')(self):
if c['criteria']:
details[c['name']] = ' / '.join([_['value'] for _ in c['criteria']])
for _, c in Dict('infos_acquereur')(self).items():
for key, value in c.items():
details[key] = value
return details

104
modules/seloger/test.py Normal file
View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
from woob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from woob.tools.test import BackendTest
from woob.tools.capabilities.housing.housing_test import HousingTest
class SeLogerTest(BackendTest, HousingTest):
MODULE = 'seloger'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"utilities", "date", "location", "text"
]
FIELDS_ANY_HOUSINGS_LIST = [
"cost", # Some posts don't have cost in seloger
"currency", # Same
"photos",
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"utilities", "date", "location", "text", "phone", "details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"cost", # Some posts don't have cost in seloger
"currency", # Same
"photos",
"rooms",
"bedrooms",
"station",
"DPE",
"GES"
]
DO_NOT_DISTINGUISH_FURNISHED_RENT = True
def test_seloger_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('85'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_rent_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)