diff --git a/flatisfy/constants.py b/flatisfy/constants.py
index 90c6613..6372599 100644
--- a/flatisfy/constants.py
+++ b/flatisfy/constants.py
@@ -12,7 +12,7 @@ from enum import Enum
# housing.
BACKENDS_BY_PRECEDENCE = [
"foncia",
- #"seloger",
+ "seloger",
"pap",
"leboncoin",
"explorimmo",
diff --git a/modules/seloger/__init__.py b/modules/seloger/__init__.py
new file mode 100644
index 0000000..0c57692
--- /dev/null
+++ b/modules/seloger/__init__.py
@@ -0,0 +1,3 @@
+from .module import SeLogerModule
+
+__all__ = ['SeLogerModule']
diff --git a/modules/seloger/browser.py b/modules/seloger/browser.py
new file mode 100644
index 0000000..a89e53e
--- /dev/null
+++ b/modules/seloger/browser.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a woob module.
+#
+# This woob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This woob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this woob module. If not, see .
+
+from woob.capabilities.housing import TypeNotSupported, POSTS_TYPES
+
+from woob.browser import PagesBrowser, URL
+from .pages import SearchResultsPage, HousingPage, CitiesPage, ErrorPage, HousingJsonPage
+from woob.browser.profiles import Android
+
+from .constants import TYPES, RET
+
+__all__ = ['SeLogerBrowser']
+
+
+class SeLogerBrowser(PagesBrowser):
+ BASEURL = 'https://www.seloger.com'
+ PROFILE = Android()
+ cities = URL(r'https://autocomplete.svc.groupe-seloger.com/auto/complete/0/Ville/6\?text=(?P.*)',
+ CitiesPage)
+ search = URL(r'/list.html\?(?P.*)&LISTING-LISTpg=(?P\d+)', SearchResultsPage)
+ housing = URL(r'/(?P<_id>.+)/detail.htm',
+ r'/annonces/.+',
+ HousingPage)
+ housing_detail = URL(r'detail,json,caracteristique_bien.json\?idannonce=(?P<_id>\d+)', HousingJsonPage)
+ captcha = URL(r'http://validate.perfdrive.com', ErrorPage)
+
+ def search_geo(self, pattern):
+ return self.cities.open(pattern=pattern).iter_cities()
+
+ def search_housings(self, _type, cities, nb_rooms, area_min, area_max,
+ cost_min, cost_max, house_types, advert_types):
+
+ price = '{}/{}'.format(cost_min or 'NaN', cost_max or 'Nan')
+ surface = '{}/{}'.format(area_min or 'Nan', area_max or 'Nan')
+
+ rooms = ''
+ if nb_rooms:
+ rooms = '&rooms={}'.format(nb_rooms if nb_rooms <= 5 else 5)
+
+ viager = ""
+ if _type not in TYPES:
+ raise TypeNotSupported()
+ elif _type != POSTS_TYPES.VIAGER:
+ _type = '{}'.format(TYPES.get(_type))
+ viager = "&natures=1,2,4"
+ else:
+ _type = TYPES.get(_type)
+
+ places = '|'.join(['{{ci:{}}}'.format(c) for c in cities])
+ places = '[{}]'.format(places)
+
+ ret = ','.join([RET.get(t) for t in house_types if t in RET])
+
+ query = "projects={}{}&places={}&types={}&price={}&surface={}{}&enterprise=0&qsVersion=1.0"\
+ .format(_type,
+ viager,
+ places,
+ ret,
+ price,
+ surface,
+ rooms)
+
+ return self.search.go(query=query, page_number=1).iter_housings(query_type=_type, advert_types=advert_types, ret=ret)
+
+ def get_housing(self, _id, obj=None):
+ return self.housing.go(_id=_id).get_housing(obj=obj)
+
+ def get_housing_detail(self, obj):
+ return self.housing_detail.go(_id=obj.id).get_housing(obj=obj)
diff --git a/modules/seloger/constants.py b/modules/seloger/constants.py
new file mode 100644
index 0000000..a62f20a
--- /dev/null
+++ b/modules/seloger/constants.py
@@ -0,0 +1,12 @@
+from woob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
+
+TYPES = {POSTS_TYPES.RENT: 1,
+ POSTS_TYPES.SALE: 2,
+ POSTS_TYPES.FURNISHED_RENT: 1,
+ POSTS_TYPES.VIAGER: 5}
+
+RET = {HOUSE_TYPES.HOUSE: '2',
+ HOUSE_TYPES.APART: '1',
+ HOUSE_TYPES.LAND: '4',
+ HOUSE_TYPES.PARKING: '3',
+ HOUSE_TYPES.OTHER: '10'}
diff --git a/modules/seloger/favicon.png b/modules/seloger/favicon.png
new file mode 100644
index 0000000..4677f10
Binary files /dev/null and b/modules/seloger/favicon.png differ
diff --git a/modules/seloger/module.py b/modules/seloger/module.py
new file mode 100644
index 0000000..1e642a4
--- /dev/null
+++ b/modules/seloger/module.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a woob module.
+#
+# This woob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This woob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this woob module. If not, see .
+
+
+from woob.capabilities.housing import CapHousing, Housing, HousingPhoto
+from woob.tools.backend import Module
+
+from .browser import SeLogerBrowser
+
+
+__all__ = ['SeLogerModule']
+
+
+class SeLogerModule(Module, CapHousing):
+ NAME = 'seloger'
+ MAINTAINER = u'Romain Bignon'
+ EMAIL = 'romain@weboob.org'
+ VERSION = '2.1'
+ DESCRIPTION = 'French housing website'
+ LICENSE = 'AGPLv3+'
+ ICON = 'http://static.poliris.com/z/portail/svx/portals/sv6_gen/favicon.png'
+ BROWSER = SeLogerBrowser
+
+ def search_housings(self, query):
+ cities = [c.id for c in query.cities if c.backend == self.name]
+ if len(cities) == 0:
+ return list([])
+
+ return self.browser.search_housings(query.type, cities, query.nb_rooms,
+ query.area_min, query.area_max,
+ query.cost_min, query.cost_max,
+ query.house_types,
+ query.advert_types)
+
+ def get_housing(self, housing):
+ if isinstance(housing, Housing):
+ id = housing.id
+ else:
+ id = housing
+ housing = None
+
+ return self.browser.get_housing(id, housing)
+
+ def search_city(self, pattern):
+ return self.browser.search_geo(pattern)
+
+ def fill_photo(self, photo, fields):
+ if 'data' in fields and photo.url and not photo.data:
+ photo.data = self.browser.open(photo.url).content
+ return photo
+
+ def fill_housing(self, housing, fields):
+
+ if 'DPE' in fields or 'GES' in fields:
+ housing = self.browser.get_housing_detail(housing)
+ fields.remove('DPE')
+ fields.remove('GES')
+
+ if len(fields) > 0:
+ housing = self.browser.get_housing(housing.id, housing)
+
+ return housing
+
+ OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}
diff --git a/modules/seloger/pages.py b/modules/seloger/pages.py
new file mode 100644
index 0000000..f40b4d2
--- /dev/null
+++ b/modules/seloger/pages.py
@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a woob module.
+#
+# This woob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This woob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this woob module. If not, see .
+
+
+from woob.browser.pages import JsonPage, pagination, HTMLPage
+from woob.browser.elements import ItemElement, DictElement, method
+from woob.browser.filters.json import Dict
+from woob.browser.filters.html import XPath
+from woob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
+ Env, Regexp, Field, BrowserURL)
+from woob.capabilities.base import NotAvailable, NotLoaded
+from woob.capabilities.housing import (Housing, HousingPhoto, City,
+ UTILITIES, ENERGY_CLASS, POSTS_TYPES,
+ ADVERT_TYPES)
+from woob.capabilities.address import PostalAddress
+from woob.tools.capabilities.housing.housing import PricePerMeterFilter
+from woob.tools.json import json
+from woob.exceptions import ActionNeeded
+from .constants import TYPES, RET
+import codecs
+
+
+class ErrorPage(HTMLPage):
+ def on_load(self):
+ raise ActionNeeded("Please resolve the captcha")
+
+
+class CitiesPage(JsonPage):
+ @method
+ class iter_cities(DictElement):
+ ignore_duplicate = True
+
+ class item(ItemElement):
+ klass = City
+
+ obj_id = Dict('Params/ci')
+ obj_name = Dict('Display')
+
+
+class SearchResultsPage(HTMLPage):
+ def __init__(self, *args, **kwargs):
+ HTMLPage.__init__(self, *args, **kwargs)
+ json_content = Regexp(CleanText('//script'),
+ r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]")(self.doc)
+ json_content = codecs.unicode_escape_decode(json_content)[0]
+ json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
+ self.doc = json.loads(json_content)
+
+ @pagination
+ @method
+ class iter_housings(DictElement):
+ item_xpath = 'cards/list'
+
+ def next_page(self):
+ page_nb = Dict('navigation/pagination/page')(self)
+ max_results = Dict('navigation/pagination/maxResults')(self)
+ results_per_page = Dict('navigation/pagination/resultsPerPage')(self)
+
+ if int(max_results) / int(results_per_page) > int(page_nb):
+ return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
+
+ # TODO handle bellesdemeures
+
+ class item(ItemElement):
+ klass = Housing
+
+ def condition(self):
+ return Dict('cardType')(self) not in ['advertising', 'localExpert'] and Dict('id', default=False)(self)
+
+ obj_id = Dict('id')
+
+ def obj_type(self):
+ idType = int(Env('query_type')(self))
+ type = next(k for k, v in TYPES.items() if v == idType)
+ if type == POSTS_TYPES.FURNISHED_RENT:
+ # SeLoger does not let us discriminate between furnished and not furnished.
+ return POSTS_TYPES.RENT
+ return type
+
+ def obj_title(self):
+ return "{} - {} - {}".format(Dict('estateType')(self),
+ " / ".join(Dict('tags')(self)),
+ Field('location')(self))
+
+ def obj_advert_type(self):
+ is_agency = Dict('contact/agencyId', default=False)(self)
+ if is_agency:
+ return ADVERT_TYPES.PROFESSIONAL
+ else:
+ return ADVERT_TYPES.PERSONAL
+
+ obj_utilities = UTILITIES.EXCLUDED
+
+ def obj_photos(self):
+ photos = []
+ for photo in Dict('photos')(self):
+ photos.append(HousingPhoto(photo))
+ return photos
+
+ def obj_location(self):
+ quartier = Dict('districtLabel')(self)
+ quartier = quartier if quartier else ''
+ ville = Dict('cityLabel')(self)
+ ville = ville if ville else ''
+ cp = Dict('zipCode')(self)
+ cp = cp if cp else ''
+ return u'%s %s (%s)' % (quartier, ville, cp)
+
+ obj_url = Dict('classifiedURL')
+
+ obj_text = Dict('description')
+
+ obj_cost = CleanDecimal(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
+ obj_currency = Currency(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
+ obj_price_per_meter = CleanDecimal(Dict('pricing/squareMeterPrice'), default=PricePerMeterFilter)
+
+
+class HousingPage(HTMLPage):
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ def parse(self, el):
+ json_content = Regexp(CleanText('//script'), "var ava_data = ({.+?});")(self)
+ json_content = json_content.replace("logged", "\"logged\"")
+ json_content = json_content.replace("lengthcarrousel", "\"lengthcarrousel\"")
+ json_content = json_content.replace("products", "\"products\"")
+ json_content = json_content.replace("// // ANNONCES_SIMILAIRE / RECO", "")
+ self.house_json_datas = json.loads(json_content)['products'][0]
+
+ obj_id = CleanText('//form[@name="central"]/input[@name="idannonce"]/@value')
+
+ def obj_house_type(self):
+ naturebien = CleanText('//form[@name="central"]/input[@name="naturebien"]/@value')(self)
+ try:
+ return next(k for k, v in RET.items() if v == naturebien)
+ except StopIteration:
+ return NotLoaded
+
+ def obj_type(self):
+ idType = int(CleanText('//form[@name="central"]/input[@name="idtt"]/@value')(self))
+ type = next(k for k, v in TYPES.items() if v == idType)
+ if type == POSTS_TYPES.FURNISHED_RENT:
+ # SeLoger does not let us discriminate between furnished and not furnished.
+ return POSTS_TYPES.RENT
+ return type
+
+ def obj_advert_type(self):
+ is_agency = (
+ CleanText('//form[@name="central"]/input[@name="nomagance"]/@value')(self) or
+ CleanText('//form[@name="central"]/input[@name="urlagence"]/@value')(self) or
+ CleanText('//form[@name="central"]/input[@name="adresseagence"]/@value')(self)
+ )
+ if is_agency:
+ return ADVERT_TYPES.PROFESSIONAL
+ else:
+ return ADVERT_TYPES.PERSONAL
+
+ def obj_photos(self):
+ photos = []
+
+ for photo in XPath('//div[@class="carrousel_slide"]/img/@src')(self):
+ photos.append(HousingPhoto("https:{}".format(photo)))
+
+ for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')(self):
+ p = json.loads(photo)
+ photos.append(HousingPhoto("https:{}".format(p['url'])))
+
+ return photos
+
+ obj_title = CleanText('//title[1]')
+
+ def obj_location(self):
+ quartier = Regexp(CleanText('//script'),
+ r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
+ ville = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
+ ville = ville if ville else ''
+ cp = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
+ cp = cp if cp else ''
+ return u'%s %s (%s)' % (quartier, ville, cp)
+
+ def obj_address(self):
+ p = PostalAddress()
+
+ p.street = Regexp(CleanText('//script'),
+ r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
+ p.postal_code = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
+ p.city = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
+ p.full_address = Field('location')(self)
+ return p
+
+ obj_text = CleanText('//form[@name="central"]/input[@name="description"]/@value')
+
+ obj_cost = CleanDecimal(CleanText('//a[@id="price"]'), default=NotLoaded)
+ obj_currency = Currency(CleanText('//a[@id="price"]'), default=NotLoaded)
+ obj_price_per_meter = PricePerMeterFilter()
+
+ obj_area = CleanDecimal('//form[@name="central"]/input[@name="surface"]/@value', replace_dots=True)
+ obj_url = CleanText('//form[@name="central"]/input[@name="urlannonce"]/@value')
+ obj_phone = CleanText('//div[@class="data-action"]/a[@data-phone]/@data-phone')
+
+ def obj_utilities(self):
+ mention = CleanText('//span[@class="detail_indice_prix"]', default="")(self)
+ if "(CC) Loyer mensuel charges comprises" in mention:
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.UNKNOWN
+
+ def obj_bedrooms(self):
+ return CleanDecimal(Dict('nb_chambres', default=NotLoaded))(self.house_json_datas)
+
+ def obj_rooms(self):
+ return CleanDecimal(Dict('nb_pieces', default=NotLoaded))(self.house_json_datas)
+
+
+class HousingJsonPage(JsonPage):
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ def obj_DPE(self):
+ DPE = Dict("energie", default="")(self)
+ if DPE['status'] > 0:
+ return NotAvailable
+ else:
+ return getattr(ENERGY_CLASS, DPE['lettre'], NotAvailable)
+
+ def obj_GES(self):
+ GES = Dict("ges", default="")(self)
+ if GES['status'] > 0:
+ return NotAvailable
+ else:
+ return getattr(ENERGY_CLASS, GES['lettre'], NotAvailable)
+
+ def obj_details(self):
+ details = {}
+
+ for c in Dict('categories')(self):
+ if c['criteria']:
+ details[c['name']] = ' / '.join([_['value'] for _ in c['criteria']])
+
+ for _, c in Dict('infos_acquereur')(self).items():
+ for key, value in c.items():
+ details[key] = value
+
+ return details
diff --git a/modules/seloger/test.py b/modules/seloger/test.py
new file mode 100644
index 0000000..37303c8
--- /dev/null
+++ b/modules/seloger/test.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a woob module.
+#
+# This woob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This woob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this woob module. If not, see .
+
+from woob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
+from woob.tools.test import BackendTest
+from woob.tools.capabilities.housing.housing_test import HousingTest
+
+
+class SeLogerTest(BackendTest, HousingTest):
+ MODULE = 'seloger'
+
+ FIELDS_ALL_HOUSINGS_LIST = [
+ "id", "type", "advert_type", "house_type", "url", "title", "area",
+ "utilities", "date", "location", "text"
+ ]
+ FIELDS_ANY_HOUSINGS_LIST = [
+ "cost", # Some posts don't have cost in seloger
+ "currency", # Same
+ "photos",
+ ]
+ FIELDS_ALL_SINGLE_HOUSING = [
+ "id", "url", "type", "advert_type", "house_type", "title", "area",
+ "utilities", "date", "location", "text", "phone", "details"
+ ]
+ FIELDS_ANY_SINGLE_HOUSING = [
+ "cost", # Some posts don't have cost in seloger
+ "currency", # Same
+ "photos",
+ "rooms",
+ "bedrooms",
+ "station",
+ "DPE",
+ "GES"
+ ]
+ DO_NOT_DISTINGUISH_FURNISHED_RENT = True
+
+ def test_seloger_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_seloger_sale(self):
+ query = Query()
+ query.area_min = 20
+ query.type = POSTS_TYPES.SALE
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_seloger_furnished_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.FURNISHED_RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_seloger_viager(self):
+ query = Query()
+ query.type = POSTS_TYPES.VIAGER
+ query.cities = []
+ for city in self.backend.search_city('85'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_seloger_rent_personal(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)