diff --git a/doc/0.getting_started.md b/doc/0.getting_started.md
index dcfd1c5..7bb5412 100644
--- a/doc/0.getting_started.md
+++ b/doc/0.getting_started.md
@@ -27,6 +27,13 @@ your disk, to point `modules_path` configuration option to
`path_to_weboob_git/modules` (see the configuration section below) and to run
a `git pull; python setup.py install` in the WebOOB git repo often.
+A copy of the WebOOB modules is available in the `modules` directory at the
+root of this repository, you can use `"modules_path": "/path/to/flatisfy/modules"` to use them.
+This copy may or may not be more up to date than the current state of official
+WebOOB modules. Some changes are made there, which are not backported
+upstream. WebOOB official modules are not synced in the `modules` folder on a
+regular basis, so try both and see which ones match your needs! :)
+
## TL;DR
diff --git a/modules/explorimmo/__init__.py b/modules/explorimmo/__init__.py
new file mode 100644
index 0000000..e38012f
--- /dev/null
+++ b/modules/explorimmo/__init__.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from .module import ExplorimmoModule
+
+
+__all__ = ['ExplorimmoModule']
diff --git a/modules/explorimmo/browser.py b/modules/explorimmo/browser.py
new file mode 100644
index 0000000..d308aee
--- /dev/null
+++ b/modules/explorimmo/browser.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from weboob.browser import PagesBrowser, URL
+from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
+ HOUSE_TYPES)
+from weboob.tools.compat import urlencode
+from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage
+
+
+class ExplorimmoBrowser(PagesBrowser):
+ BASEURL = 'https://immobilier.lefigaro.fr'
+
+ cities = URL('/rest/locations\?q=(?P.*)', CitiesPage)
+ search = URL('/annonces/resultat/annonces.html\?(?P.*)', SearchPage)
+ housing_html = URL('/annonces/annonce-(?P<_id>.*).html', HousingPage)
+ phone = URL('/rest/classifieds/(?P<_id>.*)/phone', PhonePage)
+ housing = URL('/rest/classifieds/(?P<_id>.*)',
+ '/rest/classifieds/\?(?P.*)', HousingPage2)
+
+ TYPES = {POSTS_TYPES.RENT: 'location',
+ POSTS_TYPES.SALE: 'vente',
+ POSTS_TYPES.FURNISHED_RENT: 'location',
+ POSTS_TYPES.VIAGER: 'vente'}
+
+ RET = {HOUSE_TYPES.HOUSE: 'Maison',
+ HOUSE_TYPES.APART: 'Appartement',
+ HOUSE_TYPES.LAND: 'Terrain',
+ HOUSE_TYPES.PARKING: 'Parking',
+ HOUSE_TYPES.OTHER: 'Divers'}
+
+ def get_cities(self, pattern):
+ return self.cities.open(city=pattern).get_cities()
+
+ def search_housings(self, type, cities, nb_rooms, area_min, area_max,
+ cost_min, cost_max, house_types, advert_types):
+
+ if type not in self.TYPES:
+ raise TypeNotSupported()
+
+ ret = []
+ if type == POSTS_TYPES.VIAGER:
+ ret = ['Viager']
+ else:
+ for house_type in house_types:
+ if house_type in self.RET:
+ ret.append(self.RET.get(house_type))
+
+ data = {'location': ','.join(cities).encode('iso 8859-1'),
+ 'furnished': type == POSTS_TYPES.FURNISHED_RENT,
+ 'areaMin': area_min or '',
+ 'areaMax': area_max or '',
+ 'priceMin': cost_min or '',
+ 'priceMax': cost_max or '',
+ 'transaction': self.TYPES.get(type, 'location'),
+ 'recherche': '',
+ 'mode': '',
+ 'proximity': '0',
+ 'roomMin': nb_rooms or '',
+ 'page': '1'}
+
+ query = u'%s%s%s' % (urlencode(data), '&type=', '&type='.join(ret))
+
+ return self.search.go(query=query).iter_housings(
+ query_type=type,
+ advert_types=advert_types
+ )
+
+ def get_housing(self, _id, housing=None):
+ return self.housing.go(_id=_id).get_housing(obj=housing)
+
+ def get_phone(self, _id):
+ return self.phone.go(_id=_id).get_phone()
+
+ def get_total_page(self, js_datas):
+ return self.housing.open(js_datas=js_datas).get_total_page()
diff --git a/modules/explorimmo/module.py b/modules/explorimmo/module.py
new file mode 100644
index 0000000..856d38f
--- /dev/null
+++ b/modules/explorimmo/module.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from weboob.tools.backend import Module
+from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
+
+from .browser import ExplorimmoBrowser
+
+
+__all__ = ['ExplorimmoModule']
+
+
+class ExplorimmoModule(Module, CapHousing):
+ NAME = 'explorimmo'
+ DESCRIPTION = u'explorimmo website'
+ MAINTAINER = u'Bezleputh'
+ EMAIL = 'carton_ben@yahoo.fr'
+ LICENSE = 'AGPLv3+'
+ VERSION = '2.1'
+
+ BROWSER = ExplorimmoBrowser
+
+ def get_housing(self, housing):
+ if isinstance(housing, Housing):
+ id = housing.id
+ else:
+ id = housing
+ housing = None
+ housing = self.browser.get_housing(id, housing)
+ return housing
+
+ def search_city(self, pattern):
+ return self.browser.get_cities(pattern)
+
+ def search_housings(self, query):
+ cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
+ if len(cities) == 0:
+ return list()
+
+ return self.browser.search_housings(query.type, cities, query.nb_rooms,
+ query.area_min, query.area_max,
+ query.cost_min, query.cost_max,
+ query.house_types,
+ query.advert_types)
+
+ def fill_housing(self, housing, fields):
+ if 'phone' in fields:
+ housing.phone = self.browser.get_phone(housing.id)
+ fields.remove('phone')
+
+ if len(fields) > 0:
+ self.browser.get_housing(housing.id, housing)
+
+ return housing
+
+ def fill_photo(self, photo, fields):
+ if 'data' in fields and photo.url and not photo.data:
+ photo.data = self.browser.open(photo.url).content
+ return photo
+
+ OBJECTS = {Housing: fill_housing,
+ HousingPhoto: fill_photo,
+ }
diff --git a/modules/explorimmo/pages.py b/modules/explorimmo/pages.py
new file mode 100644
index 0000000..35631b8
--- /dev/null
+++ b/modules/explorimmo/pages.py
@@ -0,0 +1,455 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+from __future__ import unicode_literals
+
+import json
+import math
+import re
+from decimal import Decimal
+from datetime import datetime
+from weboob.browser.filters.json import Dict
+from weboob.browser.elements import ItemElement, ListElement, DictElement, method
+from weboob.browser.pages import JsonPage, HTMLPage, pagination
+from weboob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
+ Regexp, Env, BrowserURL, Filter,
+ Format)
+from weboob.browser.filters.html import Attr, CleanHTML, XPath
+from weboob.capabilities.base import NotAvailable, NotLoaded, Currency as BaseCurrency
+from weboob.capabilities.housing import (Housing, HousingPhoto, City,
+ UTILITIES, ENERGY_CLASS, POSTS_TYPES,
+ ADVERT_TYPES, HOUSE_TYPES)
+from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
+from weboob.tools.compat import unquote
+
+
+class CitiesPage(JsonPage):
+
+ ENCODING = 'UTF-8'
+
+ def build_doc(self, content):
+ content = super(CitiesPage, self).build_doc(content)
+ if content:
+ return content
+ else:
+ return [{"locations": []}]
+
+ @method
+ class get_cities(DictElement):
+ item_xpath = '0/locations'
+
+ class item(ItemElement):
+ klass = City
+
+ obj_id = Dict('label')
+ obj_name = Dict('label')
+
+
+class SearchPage(HTMLPage):
+ @pagination
+ @method
+ class iter_housings(ListElement):
+ item_xpath = '//div[starts-with(@id, "bloc-vue-")]'
+
+ def next_page(self):
+ js_datas = CleanText(
+ '//div[@id="js-data"]/@data-rest-search-request'
+ )(self).split('?')[-1].split('&')
+
+ try:
+ resultsPerPage = next(
+ x for x in js_datas if 'resultsPerPage' in x
+ ).split('=')[-1]
+ currentPageNumber = next(
+ x for x in js_datas if 'currentPageNumber' in x
+ ).split('=')[-1]
+ resultCount = CleanText(
+ '(//div[@id="js-data"]/@data-result-count)[1]'
+ )(self)
+ totalPageNumber = math.ceil(
+ int(resultCount) / int(resultsPerPage)
+ )
+
+ next_page = int(currentPageNumber) + 1
+ if next_page <= totalPageNumber:
+ return self.page.url.replace(
+ 'page=%s' % currentPageNumber,
+ 'page=%d' % next_page
+ )
+ except StopIteration:
+ pass
+
+ class item(ItemElement):
+ klass = Housing
+ price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'
+
+ def is_agency(self):
+ agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
+ return 'annonce de particulier' not in agency.lower()
+
+ def condition(self):
+ if len(self.env['advert_types']) == 1:
+ is_agency = self.is_agency()
+ if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
+ return not is_agency
+ elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL:
+ return is_agency
+ return Attr('.', 'data-classified-id', default=False)(self)
+
+ obj_id = Attr('.', 'data-classified-id')
+ obj_type = Env('query_type')
+ obj_title = CleanText('./div/h2[@class="item-type"]')
+
+ def obj_advert_type(self):
+ if self.is_agency():
+ return ADVERT_TYPES.PROFESSIONAL
+ else:
+ return ADVERT_TYPES.PERSONAL
+
+ def obj_house_type(self):
+ type = self.obj_title(self).split()[0].lower()
+ if type == "appartement" or type == "studio" or type == "chambre":
+ return HOUSE_TYPES.APART
+ elif type == "maison" or type == "villa":
+ return HOUSE_TYPES.HOUSE
+ elif type == "parking":
+ return HOUSE_TYPES.PARKING
+ elif type == "terrain":
+ return HOUSE_TYPES.LAND
+ else:
+ return HOUSE_TYPES.OTHER
+
+ def obj_location(self):
+ script = CleanText('./script')(self)
+ try:
+ # Should be standard JSON+LD data
+ script = json.loads(script)
+ except ValueError:
+ try:
+ # But explorimmo can't write JSON correctly and there
+ # is a trailing "}"
+ script = json.loads(script.strip().rstrip('}'))
+ except ValueError:
+ script = None
+ if not script:
+ return NotLoaded
+
+ try:
+ return '%s (%s)' % (
+ script['address']['addressLocality'],
+ script['address']['postalCode']
+ )
+ except (KeyError):
+ return NotLoaded
+
+ def obj_cost(self):
+ cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''),
+ r'de (.*) à .*',
+ default=0))(self)
+ if cost == 0:
+ return CleanDecimal(self.price_selector, default=NotAvailable)(self)
+ else:
+ return cost
+
+ obj_currency = Currency(price_selector)
+
+ def obj_utilities(self):
+ utilities = CleanText(
+ './div/div/span[@class="price-label"]|'
+ './div/div[@class="item-price-pdf"]|'
+ './div/div/span[@class="item-price"]'
+ )(self)
+ if "CC" in utilities:
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.UNKNOWN
+
+ obj_text = CleanText('./div/p[@itemprop="description"]')
+ obj_area = CleanDecimal(
+ Regexp(
+ obj_title,
+ r'(.*?)([\d,\.]*) m2(.*?)',
+ '\\2',
+ default=None
+ ),
+ replace_dots=True,
+ default=NotLoaded
+ )
+
+ obj_url = Format(
+ "https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
+ CleanText('./@data-classified-id')
+ )
+
+ obj_price_per_meter = PricePerMeterFilter()
+
+ def obj_phone(self):
+ phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
+ replace=[('Téléphoner : ', '')],
+ default=NotLoaded)(self)
+
+ if '...' in phone:
+ return NotLoaded
+
+ return phone
+
+ def obj_details(self):
+ charges = CleanText('.//span[@class="price-fees"]',
+ default=None)(self)
+ if charges:
+ return {
+ "fees": charges.split(":")[1].strip()
+ }
+ else:
+ return NotLoaded
+
+ def obj_photos(self):
+ url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
+ if url:
+ url = unquote(url)
+ if "http://" in url[3:]:
+ rindex = url.rfind("?")
+ if rindex == -1:
+ rindex = None
+ url = url[url.find("http://", 3):rindex]
+ return [HousingPhoto(url)]
+ else:
+ return NotLoaded
+
+
+class TypeDecimal(Filter):
+ def filter(self, el):
+ return Decimal(el)
+
+
+class FromTimestamp(Filter):
+ def filter(self, el):
+ return datetime.fromtimestamp(el / 1000.0)
+
+
+class PhonePage(JsonPage):
+ def get_phone(self):
+ return self.doc.get('phoneNumber')
+
+
+class HousingPage2(JsonPage):
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ def is_agency(self):
+ return Dict('agency/isParticulier')(self) == 'false'
+
+ obj_id = Env('_id')
+
+ def obj_type(self):
+ transaction = Dict('characteristics/transaction')(self)
+ if transaction == 'location':
+ if Dict('characteristics/isFurnished')(self):
+ return POSTS_TYPES.FURNISHED_RENT
+ else:
+ return POSTS_TYPES.RENT
+ elif transaction == 'vente':
+ type = Dict('characteristics/estateType')(self).lower()
+ if 'viager' in type:
+ return POSTS_TYPES.VIAGER
+ else:
+ return POSTS_TYPES.SALE
+ else:
+ return NotAvailable
+
+ def obj_advert_type(self):
+ if self.is_agency:
+ return ADVERT_TYPES.PROFESSIONAL
+ else:
+ return ADVERT_TYPES.PERSONAL
+
+ def obj_house_type(self):
+ type = Dict('characteristics/estateType')(self).lower()
+ if 'appartement' in type:
+ return HOUSE_TYPES.APART
+ elif 'maison' in type:
+ return HOUSE_TYPES.HOUSE
+ elif 'parking' in type:
+ return HOUSE_TYPES.PARKING
+ elif 'terrain' in type:
+ return HOUSE_TYPES.LAND
+ else:
+ return HOUSE_TYPES.OTHER
+
+ obj_title = Dict('characteristics/titleWithTransaction')
+ obj_location = Format('%s %s %s', Dict('location/address'),
+ Dict('location/cityLabel'),
+ Dict('location/postalCode'))
+
+ def obj_cost(self):
+ cost = TypeDecimal(Dict('characteristics/price'))(self)
+ if cost == 0:
+ cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
+ return cost
+
+ obj_currency = BaseCurrency.get_currency('€')
+
+ def obj_utilities(self):
+ are_fees_included = Dict('characteristics/areFeesIncluded',
+ default=None)(self)
+ if are_fees_included:
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.EXCLUDED
+
+ obj_text = CleanHTML(Dict('characteristics/description'))
+ obj_url = BrowserURL('housing_html', _id=Env('_id'))
+
+ def obj_area(self):
+ area = TypeDecimal(Dict('characteristics/area'))(self)
+ if area == 0:
+ area = TypeDecimal(Dict('characteristics/areaMin'))(self)
+ return area
+
+ obj_date = FromTimestamp(Dict('characteristics/date'))
+ obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))
+
+ def obj_rooms(self):
+ # TODO: Why is roomCount a list?
+ rooms = Dict('characteristics/roomCount', default=[])(self)
+ if rooms:
+ return TypeDecimal(rooms[0])(self)
+ return NotAvailable
+
+ obj_price_per_meter = PricePerMeterFilter()
+
+ def obj_photos(self):
+ photos = []
+ for img in Dict('characteristics/images')(self):
+ m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl'))
+ if m:
+ photos.append(HousingPhoto(m.group(1)))
+ else:
+ photos.append(HousingPhoto(img.get('xl')))
+ return photos
+
+ def obj_DPE(self):
+ DPE = Dict(
+ 'characteristics/energyConsumptionCategory',
+ default=""
+ )(self)
+ return getattr(ENERGY_CLASS, DPE, NotAvailable)
+
+ def obj_GES(self):
+ GES = Dict(
+ 'characteristics/greenhouseGasEmissionCategory',
+ default=""
+ )(self)
+ return getattr(ENERGY_CLASS, GES, NotAvailable)
+
+ def obj_details(self):
+ details = {}
+ details['fees'] = Dict(
+ 'characteristics/fees', default=NotAvailable
+ )(self)
+ details['agencyFees'] = Dict(
+ 'characteristics/agencyFees', default=NotAvailable
+ )(self)
+ details['guarantee'] = Dict(
+ 'characteristics/guarantee', default=NotAvailable
+ )(self)
+ details['bathrooms'] = Dict(
+ 'characteristics/bathroomCount', default=NotAvailable
+ )(self)
+ details['creationDate'] = FromTimestamp(
+ Dict(
+ 'characteristics/creationDate', default=NotAvailable
+ ),
+ default=NotAvailable
+ )(self)
+ details['availabilityDate'] = Dict(
+ 'characteristics/estateAvailabilityDate', default=NotAvailable
+ )(self)
+ details['exposure'] = Dict(
+ 'characteristics/exposure', default=NotAvailable
+ )(self)
+ details['heatingType'] = Dict(
+ 'characteristics/heatingType', default=NotAvailable
+ )(self)
+ details['floor'] = Dict(
+ 'characteristics/floor', default=NotAvailable
+ )(self)
+ details['bedrooms'] = Dict(
+ 'characteristics/bedroomCount', default=NotAvailable
+ )(self)
+ details['isFurnished'] = Dict(
+ 'characteristics/isFurnished', default=NotAvailable
+ )(self)
+ rooms = Dict('characteristics/roomCount', default=[])(self)
+ if len(rooms):
+ details['rooms'] = rooms[0]
+ details['available'] = Dict(
+ 'characteristics/isAvailable', default=NotAvailable
+ )(self)
+ agency = Dict('agency', default=NotAvailable)(self)
+ details['agency'] = ', '.join([
+ x for x in [
+ agency.get('corporateName', ''),
+ agency.get('corporateAddress', ''),
+ agency.get('corporatePostalCode', ''),
+ agency.get('corporateCity', '')
+ ] if x
+ ])
+ return details
+
+ def get_total_page(self):
+ return self.doc.get('pagination').get('total') if 'pagination' in self.doc else 0
+
+
+class HousingPage(HTMLPage):
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ obj_id = Env('_id')
+ obj_title = CleanText('//h1[@itemprop="name"]')
+ obj_location = CleanText('//span[@class="informations-localisation"]')
+ obj_cost = CleanDecimal('//span[@itemprop="price"]')
+ obj_currency = Currency('//span[@itemprop="price"]')
+ obj_text = CleanHTML('//div[@itemprop="description"]')
+ obj_url = BrowserURL('housing', _id=Env('_id'))
+ obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
+ r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
+ obj_price_per_meter = PricePerMeterFilter()
+
+ def obj_photos(self):
+ photos = []
+ for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
+ url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img)
+ photos.append(HousingPhoto(url))
+ return photos
+
+ def obj_details(self):
+ details = dict()
+ for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
+ key = CleanText('./span[@class="name"]')(item)
+ value = CleanText('./span[@class="value"]')(item)
+ if value and key:
+ details[key] = value
+
+ key = CleanText('//div[@class="title-dpe clearfix"]')(self)
+ value = CleanText('//div[@class="energy-consumption"]')(self)
+ if value and key:
+ details[key] = value
+ return details
diff --git a/modules/explorimmo/test.py b/modules/explorimmo/test.py
new file mode 100644
index 0000000..dfa8cdf
--- /dev/null
+++ b/modules/explorimmo/test.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from weboob.capabilities.housing import Query, ADVERT_TYPES, POSTS_TYPES
+from weboob.tools.capabilities.housing.housing_test import HousingTest
+from weboob.tools.test import BackendTest
+
+
+class ExplorimmoTest(BackendTest, HousingTest):
+ MODULE = 'explorimmo'
+
+ FIELDS_ALL_HOUSINGS_LIST = [
+ "id", "type", "advert_type", "house_type", "title", "location",
+ "utilities", "text", "area", "url"
+ ]
+ FIELDS_ANY_HOUSINGS_LIST = [
+ "photos", "cost", "currency"
+ ]
+ FIELDS_ALL_SINGLE_HOUSING = [
+ "id", "url", "type", "advert_type", "house_type", "title", "area",
+ "cost", "currency", "utilities", "date", "location", "text", "rooms",
+ "details"
+ ]
+ FIELDS_ANY_SINGLE_HOUSING = [
+ "bedrooms",
+ "photos",
+ "DPE",
+ "GES",
+ "phone"
+ ]
+
+ def test_explorimmo_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_explorimmo_sale(self):
+ query = Query()
+ query.area_min = 20
+ query.type = POSTS_TYPES.SALE
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_explorimmo_furnished_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.FURNISHED_RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_explorimmo_viager(self):
+ query = Query()
+ query.type = POSTS_TYPES.VIAGER
+ query.cities = []
+ for city in self.backend.search_city('85'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_explorimmo_personal(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 900
+ query.type = POSTS_TYPES.RENT
+ query.advert_types = [ADVERT_TYPES.PERSONAL]
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+
+ results = list(self.backend.search_housings(query))
+ self.assertEqual(len(results), 0)
diff --git a/modules/foncia/__init__.py b/modules/foncia/__init__.py
new file mode 100644
index 0000000..4b71602
--- /dev/null
+++ b/modules/foncia/__init__.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Phyks (Lucas Verney)
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from __future__ import unicode_literals
+
+
+from .module import FonciaModule
+
+
+__all__ = ['FonciaModule']
diff --git a/modules/foncia/browser.py b/modules/foncia/browser.py
new file mode 100644
index 0000000..ce12558
--- /dev/null
+++ b/modules/foncia/browser.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Phyks (Lucas Verney)
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from __future__ import unicode_literals
+
+
+from weboob.browser import PagesBrowser, URL
+
+from .constants import QUERY_TYPES
+from .pages import CitiesPage, HousingPage, SearchPage, SearchResultsPage
+
+
+class FonciaBrowser(PagesBrowser):
+ BASEURL = 'https://fr.foncia.com'
+
+ cities = URL(r'/recherche/autocomplete\?term=(?P.+)', CitiesPage)
+ housing = URL(r'/(?P[^/]+)/.*\d+.htm', HousingPage)
+ search_results = URL(r'/(?P[^/]+)/.*', SearchResultsPage)
+ search = URL(r'/(?P.+)', SearchPage)
+
+ def get_cities(self, pattern):
+ """
+ Get cities matching a given pattern.
+ """
+ return self.cities.open(term=pattern).iter_cities()
+
+ def search_housings(self, query, cities):
+ """
+ Search for housings matching given query.
+ """
+ try:
+ query_type = QUERY_TYPES[query.type]
+ except KeyError:
+ return []
+
+ self.search.go(type=query_type).do_search(query, cities)
+ return self.page.iter_housings(query_type=query.type)
+
+ def get_housing(self, housing):
+ """
+ Get specific housing.
+ """
+ query_type, housing = housing.split(':')
+ self.search.go(type=query_type).find_housing(query_type, housing)
+ return self.page.get_housing()
diff --git a/modules/foncia/constants.py b/modules/foncia/constants.py
new file mode 100644
index 0000000..404f2af
--- /dev/null
+++ b/modules/foncia/constants.py
@@ -0,0 +1,24 @@
+from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
+
+QUERY_TYPES = {
+ POSTS_TYPES.RENT: 'location',
+ POSTS_TYPES.SALE: 'achat',
+ POSTS_TYPES.FURNISHED_RENT: 'location'
+}
+
+QUERY_HOUSE_TYPES = {
+ HOUSE_TYPES.APART: ['appartement', 'appartement-meuble'],
+ HOUSE_TYPES.HOUSE: ['maison'],
+ HOUSE_TYPES.PARKING: ['parking'],
+ HOUSE_TYPES.LAND: ['terrain'],
+ HOUSE_TYPES.OTHER: ['chambre', 'programme-neuf',
+ 'local-commercial', 'immeuble']
+}
+
+AVAILABLE_TYPES = {
+ POSTS_TYPES.RENT: ['appartement', 'maison', 'parking', 'chambre',
+ 'local-commercial'],
+ POSTS_TYPES.SALE: ['appartement', 'maison', 'parking', 'local-commercial',
+ 'terrain', 'immeuble', 'programme-neuf'],
+ POSTS_TYPES.FURNISHED_RENT: ['appartement-meuble']
+}
diff --git a/modules/foncia/favicon.png b/modules/foncia/favicon.png
new file mode 100644
index 0000000..bdda286
Binary files /dev/null and b/modules/foncia/favicon.png differ
diff --git a/modules/foncia/module.py b/modules/foncia/module.py
new file mode 100644
index 0000000..7364ab3
--- /dev/null
+++ b/modules/foncia/module.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Phyks (Lucas Verney)
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from __future__ import unicode_literals
+
+
+from weboob.tools.backend import Module
+from weboob.capabilities.housing import CapHousing, Housing, ADVERT_TYPES, HousingPhoto
+
+from .browser import FonciaBrowser
+
+
+__all__ = ['FonciaModule']
+
+
+class FonciaModule(Module, CapHousing):
+ NAME = 'foncia'
+ DESCRIPTION = u'Foncia housing website.'
+ MAINTAINER = u'Phyks (Lucas Verney)'
+ EMAIL = 'phyks@phyks.me'
+ LICENSE = 'AGPLv3+'
+ VERSION = '2.1'
+
+ BROWSER = FonciaBrowser
+
+ def get_housing(self, housing):
+ return self.browser.get_housing(housing)
+
+ def search_city(self, pattern):
+ return self.browser.get_cities(pattern)
+
+ def search_housings(self, query):
+ if (
+ len(query.advert_types) == 1 and
+ query.advert_types[0] == ADVERT_TYPES.PERSONAL
+ ):
+ # Foncia is pro only
+ return list()
+
+ cities = ','.join(
+ ['%s' % c.name for c in query.cities if c.backend == self.name]
+ )
+ if len(cities) == 0:
+ return []
+
+ return self.browser.search_housings(query, cities)
+
+ def fill_housing(self, housing, fields):
+ if len(fields) > 0:
+ self.browser.get_housing(housing)
+ return housing
+
+ def fill_photo(self, photo, fields):
+ if 'data' in fields and photo.url and not photo.data:
+ photo.data = self.browser.open(photo.url).content
+ return photo
+
+ OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo}
diff --git a/modules/foncia/pages.py b/modules/foncia/pages.py
new file mode 100644
index 0000000..9c881e8
--- /dev/null
+++ b/modules/foncia/pages.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Phyks (Lucas Verney)
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from __future__ import unicode_literals
+
+import datetime
+
+from weboob.browser.pages import JsonPage, HTMLPage, pagination
+from weboob.browser.filters.standard import (
+ CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError
+)
+from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound
+from weboob.browser.elements import ItemElement, ListElement, method
+from weboob.capabilities.base import NotAvailable, NotLoaded
+from weboob.capabilities.housing import (
+ City, Housing, HousingPhoto,
+ UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES
+)
+from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
+
+from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES
+
+
+class CitiesPage(JsonPage):
+ def iter_cities(self):
+ cities_list = self.doc
+ if isinstance(self.doc, dict):
+ cities_list = self.doc.values()
+
+ for city in cities_list:
+ city_obj = City()
+ city_obj.id = city
+ city_obj.name = city
+ yield city_obj
+
+
+class HousingPage(HTMLPage):
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ obj_id = Format(
+ '%s:%s',
+ Env('type'),
+ Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')
+ )
+ obj_advert_type = ADVERT_TYPES.PROFESSIONAL
+
+ def obj_type(self):
+ type = Env('type')(self)
+ if type == 'location':
+ if 'appartement-meuble' in self.page.url:
+ return POSTS_TYPES.FURNISHED_RENT
+ else:
+ return POSTS_TYPES.RENT
+ elif type == 'achat':
+ return POSTS_TYPES.SALE
+ else:
+ return NotAvailable
+
+ def obj_url(self):
+ return self.page.url
+
+ def obj_house_type(self):
+ url = self.obj_url()
+ for house_type, types in QUERY_HOUSE_TYPES.items():
+ for type in types:
+ if ('/%s/' % type) in url:
+ return house_type
+ return NotAvailable
+
+ obj_title = CleanText('//h1[has-class("OfferTop-title")]')
+ obj_area = CleanDecimal(
+ Regexp(
+ CleanText(
+ '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
+ ),
+ r'(\d*\.*\d*) .*',
+ default=NotAvailable
+ ),
+ default=NotAvailable
+ )
+ obj_cost = CleanDecimal(
+ '//span[has-class("OfferTop-price")]',
+ default=NotAvailable
+ )
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_currency = Currency(
+ '//span[has-class("OfferTop-price")]'
+ )
+ obj_location = Format(
+ '%s - %s',
+ CleanText('//p[@data-behat="adresseBien"]'),
+ CleanText('//p[has-class("OfferTop-loc")]')
+ )
+ obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
+ obj_phone = Regexp(
+ Link(
+ '//a[has-class("OfferContact-btn--tel")]'
+ ),
+ r'tel:(.*)'
+ )
+
+ def obj_photos(self):
+ photos = []
+ for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
+ photo_url = Attr('.', 'src')(photo)
+ photo_url = photo_url.replace('640/480', '800/600')
+ photos.append(HousingPhoto(photo_url))
+ return photos
+
+ obj_date = datetime.date.today()
+
+ def obj_utilities(self):
+ price = CleanText(
+ '//p[has-class("OfferTop-price")]'
+ )(self)
+ if "charges comprises" in price.lower():
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.EXCLUDED
+
+ obj_rooms = CleanDecimal(
+ '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
+ default=NotAvailable
+ )
+ obj_bedrooms = CleanDecimal(
+ '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
+ default=NotAvailable
+ )
+
+ def obj_DPE(self):
+ try:
+ electric_consumption = CleanDecimal(Regexp(
+ Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
+ r'https://dpe.foncia.net\/(\d+)\/.*'
+ ))(self)
+ except (RegexpError, XPathNotFound):
+ electric_consumption = None
+
+ DPE = ""
+ if electric_consumption is not None:
+ if electric_consumption <= 50:
+ DPE = "A"
+ elif 50 < electric_consumption <= 90:
+ DPE = "B"
+ elif 90 < electric_consumption <= 150:
+ DPE = "C"
+ elif 150 < electric_consumption <= 230:
+ DPE = "D"
+ elif 230 < electric_consumption <= 330:
+ DPE = "E"
+ elif 330 < electric_consumption <= 450:
+ DPE = "F"
+ else:
+ DPE = "G"
+ return getattr(ENERGY_CLASS, DPE, NotAvailable)
+ return NotAvailable
+
+ def obj_details(self):
+ details = {}
+
+ dispo = Date(
+ Regexp(
+ CleanText('//p[has-class("OfferTop-dispo")]'),
+ r'.* (\d\d\/\d\d\/\d\d\d\d)',
+ default=datetime.date.today().isoformat()
+ )
+ )(self)
+ if dispo is not None:
+ details["dispo"] = dispo
+
+ priceMentions = CleanText(
+ '//p[has-class("OfferTop-mentions")]',
+ default=None
+ )(self)
+ if priceMentions is not None:
+ details["priceMentions"] = priceMentions
+
+ agency = CleanText(
+ '//p[has-class("OfferContact-address")]',
+ default=None
+ )(self)
+ if agency is not None:
+ details["agency"] = agency
+
+ for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'):
+ category = CleanText(
+ './h3[has-class("OfferDetails-title--2")]',
+ default=None
+ )(item)
+ if not category:
+ continue
+
+ details[category] = {}
+
+ for detail_item in item.xpath('.//ul[has-class("List--data")]/li'):
+ detail_title = CleanText('.//span[has-class("List-data")]')(detail_item)
+ detail_value = CleanText('.//*[has-class("List-value")]')(detail_item)
+ details[category][detail_title] = detail_value
+
+ for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'):
+ detail_title = CleanText('.')(detail_item)
+ details[category][detail_title] = True
+
+ try:
+ electric_consumption = CleanDecimal(Regexp(
+ Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
+ r'https://dpe.foncia.net\/(\d+)\/.*'
+ ))(self)
+ details["electric_consumption"] = (
+ '{} kWhEP/m².an'.format(electric_consumption)
+ )
+ except (RegexpError, XPathNotFound):
+ pass
+
+ return details
+
+
+class SearchPage(HTMLPage):
+ def do_search(self, query, cities):
+ form = self.get_form('//form[@name="searchForm"]')
+
+ form['searchForm[type]'] = QUERY_TYPES.get(query.type, None)
+ form['searchForm[localisation]'] = cities
+ form['searchForm[type_bien][]'] = []
+ for house_type in query.house_types:
+ try:
+ form['searchForm[type_bien][]'].extend(
+ QUERY_HOUSE_TYPES[house_type]
+ )
+ except KeyError:
+ pass
+ form['searchForm[type_bien][]'] = [
+ x for x in form['searchForm[type_bien][]']
+ if x in AVAILABLE_TYPES.get(query.type, [])
+ ]
+ if query.area_min:
+ form['searchForm[surface_min]'] = query.area_min
+ if query.area_max:
+ form['searchForm[surface_max]'] = query.area_max
+ if query.cost_min:
+ form['searchForm[prix_min]'] = query.cost_min
+ if query.cost_max:
+ form['searchForm[prix_max]'] = query.cost_max
+ if query.nb_rooms:
+ form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)]
+ form.submit()
+
+ def find_housing(self, query_type, housing):
+ form = self.get_form('//form[@name="searchForm"]')
+ form['searchForm[type]'] = query_type
+ form['searchForm[reference]'] = housing
+ form.submit()
+
+
+class SearchResultsPage(HTMLPage):
+ @pagination
+ @method
+ class iter_housings(ListElement):
+ item_xpath = '//article[has-class("TeaserOffer")]'
+
+ next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]')
+
+ class item(ItemElement):
+ klass = Housing
+
+ obj_id = Format(
+ '%s:%s',
+ Env('type'),
+ Attr('.//span[boolean(@data-reference)]', 'data-reference')
+ )
+ obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
+ obj_type = Env('query_type')
+ obj_advert_type = ADVERT_TYPES.PROFESSIONAL
+
+ def obj_house_type(self):
+ url = self.obj_url(self)
+ for house_type, types in QUERY_HOUSE_TYPES.items():
+ for type in types:
+ if ('/%s/' % type) in url:
+ return house_type
+ return NotLoaded
+
+ obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
+ obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
+ obj_area = CleanDecimal(
+ Regexp(
+ CleanText(
+ './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
+ ),
+ r'(\d*\.*\d*) .*',
+ default=NotAvailable
+ ),
+ default=NotAvailable
+ )
+ obj_cost = CleanDecimal(
+ './/strong[has-class("TeaserOffer-price-num")]',
+ default=NotAvailable
+ )
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_currency = Currency(
+ './/strong[has-class("TeaserOffer-price-num")]'
+ )
+ obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
+ obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')
+
+ def obj_photos(self):
+ url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self)
+ # If the used photo is a default no photo, the src is on the same domain.
+ if url[0] == '/':
+ return []
+ else:
+ return [HousingPhoto(url)]
+
+ obj_date = datetime.date.today()
+
+ def obj_utilities(self):
+ price = CleanText(
+ './/strong[has-class("TeaserOffer-price-num")]'
+ )(self)
+ if "charges comprises" in price.lower():
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.EXCLUDED
+
+ obj_rooms = CleanDecimal(
+ './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
+ default=NotLoaded
+ )
+ obj_bedrooms = CleanDecimal(
+ './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
+ default=NotLoaded
+ )
+
+ def obj_details(self):
+ return {
+ "dispo": Date(
+ Attr('.//span[boolean(@data-dispo)]', 'data-dispo',
+ default=datetime.date.today().isoformat())
+ )(self),
+ "priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self)
+ }
diff --git a/modules/foncia/test.py b/modules/foncia/test.py
new file mode 100644
index 0000000..1206523
--- /dev/null
+++ b/modules/foncia/test.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Phyks (Lucas Verney)
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from __future__ import unicode_literals
+
+from weboob.capabilities.housing import (
+ Query, POSTS_TYPES, ADVERT_TYPES
+)
+from weboob.tools.capabilities.housing.housing_test import HousingTest
+from weboob.tools.test import BackendTest
+
+
+class FonciaTest(BackendTest, HousingTest):
+ MODULE = 'foncia'
+
+ FIELDS_ALL_HOUSINGS_LIST = [
+ "id", "type", "advert_type", "house_type", "url", "title", "area",
+ "cost", "currency", "date", "location", "text", "details"
+ ]
+ FIELDS_ANY_HOUSINGS_LIST = [
+ "photos",
+ "rooms"
+ ]
+ FIELDS_ALL_SINGLE_HOUSING = [
+ "id", "url", "type", "advert_type", "house_type", "title", "area",
+ "cost", "currency", "utilities", "date", "location", "text", "phone",
+ "DPE", "details"
+ ]
+ FIELDS_ANY_SINGLE_HOUSING = [
+ "bedrooms",
+ "photos",
+ "rooms"
+ ]
+
+ def test_foncia_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_foncia_sale(self):
+ query = Query()
+ query.area_min = 20
+ query.type = POSTS_TYPES.SALE
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_foncia_furnished_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.FURNISHED_RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_foncia_personal(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 900
+ query.type = POSTS_TYPES.RENT
+ query.advert_types = [ADVERT_TYPES.PERSONAL]
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+
+ results = list(self.backend.search_housings(query))
+ self.assertEqual(len(results), 0)
diff --git a/modules/leboncoin/__init__.py b/modules/leboncoin/__init__.py
new file mode 100644
index 0000000..2206442
--- /dev/null
+++ b/modules/leboncoin/__init__.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from .module import LeboncoinModule
+
+
+__all__ = ['LeboncoinModule']
diff --git a/modules/leboncoin/browser.py b/modules/leboncoin/browser.py
new file mode 100644
index 0000000..fcfe6cb
--- /dev/null
+++ b/modules/leboncoin/browser.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from weboob.tools.json import json
+
+from weboob.browser import PagesBrowser, URL
+from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
+ HOUSE_TYPES, ADVERT_TYPES)
+from .pages import CityListPage, HousingListPage, HousingPage, PhonePage, HomePage
+
+
+class LeboncoinBrowser(PagesBrowser):
+ BASEURL = 'https://www.leboncoin.fr/'
+ city = URL('ajax/location_list.html\?city=(?P.*)&zipcode=(?P.*)', CityListPage)
+ housing = URL('ventes_immobilieres/(?P<_id>.*).htm', HousingPage)
+
+ home = URL('annonces/offres', HomePage)
+ api = URL('https://api.leboncoin.fr/finder/search', HousingListPage)
+ phone = URL('https://api.leboncoin.fr/api/utils/phonenumber.json', PhonePage)
+
+ TYPES = {POSTS_TYPES.RENT: '10',
+ POSTS_TYPES.FURNISHED_RENT: '10',
+ POSTS_TYPES.SALE: '9',
+ POSTS_TYPES.SHARING: '11', }
+
+ RET = {HOUSE_TYPES.HOUSE: '1',
+ HOUSE_TYPES.APART: '2',
+ HOUSE_TYPES.LAND: '3',
+ HOUSE_TYPES.PARKING: '4',
+ HOUSE_TYPES.OTHER: '5'}
+
+ def __init__(self, *args, **kwargs):
+ super(LeboncoinBrowser, self).__init__(*args, **kwargs)
+
+ def get_cities(self, pattern):
+ city = ''
+ zip_code = ''
+ if pattern.isdigit():
+ zip_code = pattern
+ else:
+ city = pattern.replace(" ", "_")
+
+ return self.city.go(city=city, zip=zip_code).get_cities()
+
+ def search_housings(self, query, module_name):
+
+ if query.type not in self.TYPES.keys():
+ return TypeNotSupported()
+
+ data = {}
+ data['filters'] = {}
+ data['filters']['category'] = {}
+ data['filters']['category']['id'] = self.TYPES.get(query.type)
+ data['filters']['enums'] = {}
+ data['filters']['enums']['ad_type'] = ['offer']
+
+ data['filters']['enums']['real_estate_type'] = []
+ for t in query.house_types:
+ t = self.RET.get(t)
+ if t:
+ data['filters']['enums']['real_estate_type'].append(t)
+
+ if query.type == POSTS_TYPES.FURNISHED_RENT:
+ data['filters']['enums']['furnished'] = ['1']
+ elif query.type == POSTS_TYPES.RENT:
+ data['filters']['enums']['furnished'] = ['2']
+
+ data['filters']['keywords'] = {}
+ data['filters']['ranges'] = {}
+
+ if query.cost_max or query.cost_min:
+ data['filters']['ranges']['price'] = {}
+
+ if query.cost_max:
+ data['filters']['ranges']['price']['max'] = query.cost_max
+
+ if query.cost_min:
+ data['filters']['ranges']['price']['min'] = query.cost_min
+
+ if query.area_max or query.area_min:
+ data['filters']['ranges']['square'] = {}
+ if query.area_max:
+ data['filters']['ranges']['square']['max'] = query.area_max
+
+ if query.area_min:
+ data['filters']['ranges']['square']['min'] = query.area_min
+
+ if query.nb_rooms:
+ data['filters']['ranges']['rooms'] = {}
+ data['filters']['ranges']['rooms']['min'] = query.nb_rooms
+
+ data['filters']['location'] = {}
+ data['filters']['location']['city_zipcodes'] = []
+
+ for c in query.cities:
+ if c.backend == module_name:
+ _c = c.id.split(' ')
+ __c = {}
+ __c['city'] = _c[0]
+ __c['zipcode'] = _c[1]
+ __c['label'] = c.name
+
+ data['filters']['location']['city_zipcodes'].append(__c)
+
+ if len(query.advert_types) == 1:
+ if query.advert_types[0] == ADVERT_TYPES.PERSONAL:
+ data['owner_type'] = 'private'
+ elif query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL:
+ data['owner_type'] = 'pro'
+ else:
+ data['owner_type'] = 'all'
+
+ data['limit'] = 100
+ data['limit_alu'] = 3
+ data['offset'] = 0
+
+ self.session.headers.update({"api_key": self.home.go().get_api_key()})
+ return self.api.go(data=json.dumps(data)).get_housing_list(query_type=query.type, data=data)
+
+ def get_housing(self, _id, obj=None):
+ return self.housing.go(_id=_id).get_housing(obj=obj)
+
+ def get_phone(self, _id):
+ api_key = self.housing.stay_or_go(_id=_id).get_api_key()
+ data = {'list_id': _id,
+ 'app_id': 'leboncoin_web_utils',
+ 'key': api_key,
+ 'text': 1, }
+ return self.phone.go(data=data).get_phone()
diff --git a/modules/leboncoin/favicon.png b/modules/leboncoin/favicon.png
new file mode 100644
index 0000000..bb70200
Binary files /dev/null and b/modules/leboncoin/favicon.png differ
diff --git a/modules/leboncoin/module.py b/modules/leboncoin/module.py
new file mode 100644
index 0000000..6a9a311
--- /dev/null
+++ b/modules/leboncoin/module.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from weboob.tools.backend import Module
+from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto)
+from .browser import LeboncoinBrowser
+
+
+__all__ = ['LeboncoinModule']
+
+
+class LeboncoinModule(Module, CapHousing):
+ NAME = 'leboncoin'
+ DESCRIPTION = u'search house on leboncoin website'
+ MAINTAINER = u'Bezleputh'
+ EMAIL = 'carton_ben@yahoo.fr'
+ LICENSE = 'AGPLv3+'
+ VERSION = '2.1'
+
+ BROWSER = LeboncoinBrowser
+
+ def create_default_browser(self):
+ return self.create_browser()
+
+ def get_housing(self, _id):
+ return self.browser.get_housing(_id)
+
+ def fill_housing(self, housing, fields):
+ if 'phone' in fields:
+ housing.phone = self.browser.get_phone(housing.id)
+ fields.remove('phone')
+
+ if len(fields) > 0:
+ self.browser.get_housing(housing.id, housing)
+
+ return housing
+
+ def fill_photo(self, photo, fields):
+ if 'data' in fields and photo.url and not photo.data:
+ photo.data = self.browser.open(photo.url).content
+ return photo
+
+ def search_city(self, pattern):
+ return self.browser.get_cities(pattern)
+
+ def search_housings(self, query):
+ return self.browser.search_housings(query, self.name)
+
+ OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo}
diff --git a/modules/leboncoin/pages.py b/modules/leboncoin/pages.py
new file mode 100644
index 0000000..6079d23
--- /dev/null
+++ b/modules/leboncoin/pages.py
@@ -0,0 +1,301 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+from __future__ import unicode_literals
+
+import requests
+
+from weboob.browser.pages import HTMLPage, JsonPage, pagination
+from weboob.browser.elements import ItemElement, ListElement, method, DictElement
+from weboob.capabilities.base import Currency as BaseCurrency
+from weboob.browser.filters.standard import (CleanText, CleanDecimal, _Filter,
+ Env, DateTime, Format)
+from weboob.browser.filters.json import Dict
+from weboob.capabilities.housing import (City, Housing, HousingPhoto,
+ UTILITIES, ENERGY_CLASS, POSTS_TYPES,
+ ADVERT_TYPES, HOUSE_TYPES)
+from weboob.capabilities.base import NotAvailable
+from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
+
+from decimal import Decimal
+from lxml import etree
+import json
+
+
+class PopDetail(_Filter):
+ def __init__(self, name, default=NotAvailable):
+ super(PopDetail, self).__init__(default)
+ self.name = name
+
+ def __call__(self, item):
+ return item.env['details'].pop(self.name, self.default)
+
+
+class CityListPage(HTMLPage):
+
+ def build_doc(self, content):
+ content = super(CityListPage, self).build_doc(content)
+ if content.getroot() is not None:
+ return content
+ return etree.Element("html")
+
+ @method
+ class get_cities(ListElement):
+ item_xpath = '//li'
+
+ class item(ItemElement):
+ klass = City
+
+ obj_id = Format('%s %s',
+ CleanText('./span[has-class("city")]'),
+ CleanText('./span[@class="zipcode"]'))
+
+ obj_name = Format('%s %s',
+ CleanText('./span[has-class("city")]'),
+ CleanText('./span[@class="zipcode"]'))
+
+
+class HomePage(HTMLPage):
+ def __init__(self, *args, **kwargs):
+ HTMLPage.__init__(self, *args, **kwargs)
+
+ add_content = CleanText('(//body/script)[4]', replace=[('window.FLUX_STATE = ', '')])(self.doc) or '{}'
+ api_content = CleanText('(//body/script[@id="__NEXT_DATA__"])')(self.doc)
+
+ self.htmldoc = self.doc
+ self.api_content = json.loads(api_content)
+ self.doc = json.loads(add_content)
+
+ def get_api_key(self):
+ return Dict('runtimeConfig/API/KEY')(self.api_content)
+
+
+class HousingListPage(JsonPage):
+
+ def __init__(self, *args, **kwargs):
+ JsonPage.__init__(self, *args, **kwargs)
+ if 'ads' not in self.doc:
+ self.doc['ads'] = []
+
+ @pagination
+ @method
+ class get_housing_list(DictElement):
+ item_xpath = 'ads'
+
+ def next_page(self):
+ data = Env('data')(self)
+ if data['offset'] > self.page.doc['total_all']:
+ return
+
+ data['offset'] = data['offset'] + data['limit']
+ return requests.Request("POST", self.page.url, data=json.dumps(data))
+
+ class item(ItemElement):
+ klass = Housing
+
+ def parse(self, el):
+ self.env['details'] = {obj['key']: obj['value_label'] for obj in self.el['attributes']}
+
+ obj_id = Dict('list_id')
+ obj_url = Dict('url')
+ obj_type = Env('query_type')
+
+ obj_area = CleanDecimal(PopDetail('square',
+ default=0),
+ default=NotAvailable)
+ obj_rooms = CleanDecimal(PopDetail('rooms',
+ default=0),
+ default=NotAvailable)
+
+ def obj_GES(self):
+ ges = CleanText(PopDetail('ges', default='|'))(self)
+ return getattr(ENERGY_CLASS, ges[0], NotAvailable)
+
+ def obj_DPE(self):
+ dpe = CleanText(PopDetail('energy_rate', default='|'))(self)
+ return getattr(ENERGY_CLASS, dpe[0], NotAvailable)
+
+ def obj_house_type(self):
+ value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower()
+ if value == 'parking':
+ return HOUSE_TYPES.PARKING
+ elif value == 'appartement':
+ return HOUSE_TYPES.APART
+ elif value == 'maison':
+ return HOUSE_TYPES.HOUSE
+ elif value == 'terrain':
+ return HOUSE_TYPES.LAND
+ else:
+ return HOUSE_TYPES.OTHER
+
+ def obj_utilities(self):
+ value = CleanText(PopDetail('charges_included',
+ default='Non'),
+ default=NotAvailable)(self)
+ if value == "Oui":
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.EXCLUDED
+
+ def obj_advert_type(self):
+ line_pro = Dict('owner/type')(self)
+ if line_pro == u'pro':
+ return ADVERT_TYPES.PROFESSIONAL
+ else:
+ return ADVERT_TYPES.PERSONAL
+
+ obj_title = Dict('subject')
+ obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0))
+ obj_currency = BaseCurrency.get_currency(u'€')
+ obj_text = Dict('body')
+ obj_location = Dict('location/city_label')
+ obj_date = DateTime(Dict('first_publication_date'))
+
+ def obj_photos(self):
+ photos = []
+ for img in Dict('images/urls_large', default=[])(self):
+ photos.append(HousingPhoto(img))
+ return photos
+
+ def obj_type(self):
+ try:
+ breadcrumb = int(Dict('category_id')(self))
+ except ValueError:
+ breadcrumb = None
+
+ if breadcrumb == 11:
+ return POSTS_TYPES.SHARING
+ elif breadcrumb == 10:
+
+ isFurnished = CleanText(PopDetail('furnished', default=' '))(self)
+
+ if isFurnished.lower() == u'meublé':
+ return POSTS_TYPES.FURNISHED_RENT
+ else:
+ return POSTS_TYPES.RENT
+ else:
+ return POSTS_TYPES.SALE
+
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_details = Env('details')
+
+
+class HousingPage(HomePage):
+ def __init__(self, *args, **kwargs):
+ HomePage.__init__(self, *args, **kwargs)
+ self.doc = self.api_content["props"]["pageProps"]["ad"]
+
+ def get_api_key(self):
+ return Dict('runtimeConfig/API/KEY_JSON')(self.api_content)
+
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ def parse(self, el):
+ self.env['details'] = {obj['key']: obj['value_label'] for obj in el['attributes']}
+
+ obj_id = Env('_id')
+
+ obj_area = CleanDecimal(PopDetail('square',
+ default=0),
+ default=NotAvailable)
+ obj_rooms = CleanDecimal(PopDetail('rooms',
+ default=0),
+ default=NotAvailable)
+
+ def obj_GES(self):
+ ges = CleanText(PopDetail('ges', default='|'))(self)
+ return getattr(ENERGY_CLASS, ges[0], NotAvailable)
+
+ def obj_DPE(self):
+ dpe = CleanText(PopDetail('energy_rate', default='|'))(self)
+ return getattr(ENERGY_CLASS, dpe[0], NotAvailable)
+
+ def obj_house_type(self):
+ value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower()
+ if value == 'parking':
+ return HOUSE_TYPES.PARKING
+ elif value == 'appartement':
+ return HOUSE_TYPES.APART
+ elif value == 'maison':
+ return HOUSE_TYPES.HOUSE
+ elif value == 'terrain':
+ return HOUSE_TYPES.LAND
+ else:
+ return HOUSE_TYPES.OTHER
+
+ def obj_utilities(self):
+ value = CleanText(PopDetail('charges_included',
+ default='Non'),
+ default=NotAvailable)(self)
+ if value == "Oui":
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.EXCLUDED
+
+ obj_title = Dict('subject')
+ obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0))
+ obj_currency = BaseCurrency.get_currency(u'€')
+ obj_text = Dict('body')
+ obj_location = Dict('location/city_label')
+
+ def obj_advert_type(self):
+ line_pro = Dict('owner/type')(self)
+ if line_pro == u'pro':
+ return ADVERT_TYPES.PROFESSIONAL
+ else:
+ return ADVERT_TYPES.PERSONAL
+
+ obj_date = DateTime(Dict('first_publication_date'))
+
+ def obj_photos(self):
+ photos = []
+ for img in Dict('images/urls_large', default=[])(self):
+ photos.append(HousingPhoto(img))
+ return photos
+
+ def obj_type(self):
+ try:
+ breadcrumb = int(Dict('category_id')(self))
+ except ValueError:
+ breadcrumb = None
+
+ if breadcrumb == 11:
+ return POSTS_TYPES.SHARING
+ elif breadcrumb == 10:
+
+ isFurnished = CleanText(PopDetail('furnished', default=' '))(self)
+
+ if isFurnished.lower() == u'meublé':
+ return POSTS_TYPES.FURNISHED_RENT
+ else:
+ return POSTS_TYPES.RENT
+ else:
+ return POSTS_TYPES.SALE
+
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_url = Dict('url')
+ obj_details = Env('details')
+
+
+class PhonePage(JsonPage):
+ def get_phone(self):
+ if Dict('utils/status')(self.doc) == u'OK':
+ return Dict('utils/phonenumber')(self.doc)
+ return NotAvailable
diff --git a/modules/leboncoin/test.py b/modules/leboncoin/test.py
new file mode 100644
index 0000000..6367705
--- /dev/null
+++ b/modules/leboncoin/test.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from weboob.tools.test import BackendTest
+from weboob.tools.value import Value
+from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
+from weboob.tools.capabilities.housing.housing_test import HousingTest
+
+
+class LeboncoinTest(BackendTest, HousingTest):
+ MODULE = 'leboncoin'
+
+ FIELDS_ALL_HOUSINGS_LIST = [
+ "id", "type", "advert_type", "url", "title",
+ "currency", "utilities", "date", "location", "text"
+ ]
+ FIELDS_ANY_HOUSINGS_LIST = [
+ "area",
+ "cost",
+ "price_per_meter",
+ "photos"
+ ]
+ FIELDS_ALL_SINGLE_HOUSING = [
+ "id", "url", "type", "advert_type", "house_type", "title",
+ "cost", "currency", "utilities", "date", "location", "text",
+ "rooms", "details"
+ ]
+ FIELDS_ANY_SINGLE_HOUSING = [
+ "area",
+ "GES",
+ "DPE",
+ "photos",
+ # Don't test phone as leboncoin API is strongly rate-limited
+ ]
+
+ def setUp(self):
+ if not self.is_backend_configured():
+ self.backend.config['advert_type'] = Value(value='a')
+ self.backend.config['region'] = Value(value='ile_de_france')
+
+ def test_leboncoin_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_leboncoin_sale(self):
+ query = Query()
+ query.area_min = 20
+ query.type = POSTS_TYPES.SALE
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_leboncoin_furnished_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.FURNISHED_RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_leboncoin_professional(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 900
+ query.type = POSTS_TYPES.RENT
+ query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
diff --git a/modules/logicimmo/__init__.py b/modules/logicimmo/__init__.py
new file mode 100644
index 0000000..b052736
--- /dev/null
+++ b/modules/logicimmo/__init__.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from .module import LogicimmoModule
+
+
+__all__ = ['LogicimmoModule']
diff --git a/modules/logicimmo/browser.py b/modules/logicimmo/browser.py
new file mode 100644
index 0000000..ab8d2fb
--- /dev/null
+++ b/modules/logicimmo/browser.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from weboob.browser import PagesBrowser, URL
+from weboob.browser.profiles import Firefox
+from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
+ HOUSE_TYPES)
+from .pages import CitiesPage, SearchPage, HousingPage, PhonePage
+
+
+class LogicimmoBrowser(PagesBrowser):
+ BASEURL = 'https://www.logic-immo.com/'
+ PROFILE = Firefox()
+ city = URL('asset/t9/getLocalityT9.php\?site=fr&lang=fr&json=%22(?P.*)%22',
+ CitiesPage)
+ search = URL('(?Plocation-immobilier|vente-immobilier|recherche-colocation)-(?P.*)/options/(?P.*)', SearchPage)
+ housing = URL('detail-(?P<_id>.*).htm', HousingPage)
+ phone = URL('(?P.*)', PhonePage)
+
+ TYPES = {POSTS_TYPES.RENT: 'location-immobilier',
+ POSTS_TYPES.SALE: 'vente-immobilier',
+ POSTS_TYPES.SHARING: 'recherche-colocation',
+ POSTS_TYPES.FURNISHED_RENT: 'location-immobilier',
+ POSTS_TYPES.VIAGER: 'vente-immobilier'}
+
+ RET = {HOUSE_TYPES.HOUSE: '2',
+ HOUSE_TYPES.APART: '1',
+ HOUSE_TYPES.LAND: '3',
+ HOUSE_TYPES.PARKING: '10',
+ HOUSE_TYPES.OTHER: '14'}
+
+ def __init__(self, *args, **kwargs):
+ super(LogicimmoBrowser, self).__init__(*args, **kwargs)
+ self.session.headers['X-Requested-With'] = 'XMLHttpRequest'
+
+ def get_cities(self, pattern):
+ if pattern:
+ return self.city.go(pattern=pattern).get_cities()
+
+ def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
+ if type not in self.TYPES:
+ raise TypeNotSupported()
+
+ options = []
+
+ ret = []
+ if type == POSTS_TYPES.VIAGER:
+ ret = ['15']
+ else:
+ for house_type in house_types:
+ if house_type in self.RET:
+ ret.append(self.RET.get(house_type))
+
+ if len(ret):
+ options.append('groupprptypesids=%s' % ','.join(ret))
+
+ if type == POSTS_TYPES.FURNISHED_RENT:
+ options.append('searchoptions=4')
+
+ options.append('pricemin=%s' % (cost_min if cost_min else '0'))
+
+ if cost_max:
+ options.append('pricemax=%s' % cost_max)
+
+ options.append('areamin=%s' % (area_min if area_min else '0'))
+
+ if area_max:
+ options.append('areamax=%s' % area_max)
+
+ if nb_rooms:
+ if type == POSTS_TYPES.SHARING:
+ options.append('nbbedrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)]))
+ else:
+ options.append('nbrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)]))
+
+ self.search.go(type=self.TYPES.get(type, 'location-immobilier'),
+ cities=cities,
+ options='/'.join(options))
+
+ if type == POSTS_TYPES.SHARING:
+ return self.page.iter_sharing()
+
+ return self.page.iter_housings(query_type=type)
+
+ def get_housing(self, _id, housing=None):
+ return self.housing.go(_id=_id).get_housing(obj=housing)
+
+ def get_phone(self, _id):
+ if _id.startswith('location') or _id.startswith('vente'):
+ urlcontact, params = self.housing.stay_or_go(_id=_id).get_phone_url_datas()
+ return self.phone.go(urlcontact=urlcontact, params=params).get_phone()
diff --git a/modules/logicimmo/favicon.png b/modules/logicimmo/favicon.png
new file mode 100644
index 0000000..bd25006
Binary files /dev/null and b/modules/logicimmo/favicon.png differ
diff --git a/modules/logicimmo/module.py b/modules/logicimmo/module.py
new file mode 100644
index 0000000..c714ed3
--- /dev/null
+++ b/modules/logicimmo/module.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from weboob.tools.backend import Module
+from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto,
+ ADVERT_TYPES)
+from weboob.capabilities.base import UserError
+from .browser import LogicimmoBrowser
+
+
+__all__ = ['LogicimmoModule']
+
+
+class LogicImmoCitiesError(UserError):
+ """
+ Raised when more than 3 cities are selected
+ """
+ def __init__(self, msg='You cannot select more than three cities'):
+ UserError.__init__(self, msg)
+
+
+class LogicimmoModule(Module, CapHousing):
+ NAME = 'logicimmo'
+ DESCRIPTION = u'logicimmo website'
+ MAINTAINER = u'Bezleputh'
+ EMAIL = 'carton_ben@yahoo.fr'
+ LICENSE = 'AGPLv3+'
+ VERSION = '2.1'
+
+ BROWSER = LogicimmoBrowser
+
+ def get_housing(self, housing):
+ if isinstance(housing, Housing):
+ id = housing.id
+ else:
+ id = housing
+ housing = None
+ housing = self.browser.get_housing(id, housing)
+ return housing
+
+ def search_city(self, pattern):
+ return self.browser.get_cities(pattern)
+
+ def search_housings(self, query):
+ if(len(query.advert_types) == 1 and
+ query.advert_types[0] == ADVERT_TYPES.PERSONAL):
+ # Logic-immo is pro only
+ return list()
+
+ cities_names = ['%s' % c.name.replace(' ', '-') for c in query.cities if c.backend == self.name]
+ cities_ids = ['%s' % c.id for c in query.cities if c.backend == self.name]
+
+ if len(cities_names) == 0:
+ return list()
+
+ if len(cities_names) > 3:
+ raise LogicImmoCitiesError()
+
+ cities = ','.join(cities_names + cities_ids)
+ return self.browser.search_housings(query.type, cities.lower(), query.nb_rooms,
+ query.area_min, query.area_max,
+ query.cost_min, query.cost_max,
+ query.house_types)
+
+ def fill_housing(self, housing, fields):
+ if 'phone' in fields:
+ housing.phone = self.browser.get_phone(housing.id)
+ fields.remove('phone')
+
+ if len(fields) > 0:
+ self.browser.get_housing(housing.id, housing)
+
+ return housing
+
+ def fill_photo(self, photo, fields):
+ if 'data' in fields and photo.url and not photo.data:
+ photo.data = self.browser.open(photo.url).content
+ return photo
+
+ OBJECTS = {Housing: fill_housing,
+ HousingPhoto: fill_photo,
+ }
diff --git a/modules/logicimmo/pages.py b/modules/logicimmo/pages.py
new file mode 100644
index 0000000..403ccde
--- /dev/null
+++ b/modules/logicimmo/pages.py
@@ -0,0 +1,377 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from __future__ import unicode_literals
+
+from weboob.browser.pages import HTMLPage, JsonPage
+from weboob.browser.elements import ItemElement, ListElement, DictElement, method
+from weboob.browser.filters.json import Dict
+from weboob.browser.filters.standard import (Currency, Format, CleanText,
+ Regexp, CleanDecimal, Date, Env,
+ BrowserURL)
+from weboob.browser.filters.html import Attr, XPath, CleanHTML
+from weboob.capabilities.housing import (Housing, HousingPhoto, City,
+ UTILITIES, ENERGY_CLASS, POSTS_TYPES,
+ ADVERT_TYPES, HOUSE_TYPES)
+from weboob.capabilities.base import NotAvailable, NotLoaded
+from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
+from weboob.tools.compat import urljoin
+
+
+class CitiesPage(JsonPage):
+ @method
+ class get_cities(DictElement):
+ item_xpath = '*/children'
+
+ class item(ItemElement):
+ klass = City
+
+ def condition(self):
+ return Dict('lct_parent_id')(self) != '0'
+
+ obj_id = Format('%s_%s', Dict('lct_id'), Dict('lct_level'))
+ obj_name = Format('%s %s', Dict('lct_name'), Dict('lct_post_code'))
+
+
+class PhonePage(HTMLPage):
+ def get_phone(self):
+ return CleanText('//div[has-class("phone")]', children=False)(self.doc)
+
+
+class HousingPage(HTMLPage):
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ obj_id = Env('_id')
+
+ def obj_type(self):
+ url = BrowserURL('housing', _id=Env('_id'))(self)
+ if 'colocation' in url:
+ return POSTS_TYPES.SHARING
+ elif 'location' in url:
+ isFurnished = False
+ for li in XPath('//ul[@itemprop="description"]/li')(self):
+ label = CleanText('./span[has-class("criteria-label")]')(li)
+ if label.lower() == "meublé":
+ isFurnished = (
+ CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui'
+ )
+ if isFurnished:
+ return POSTS_TYPES.FURNISHED_RENT
+ else:
+ return POSTS_TYPES.RENT
+ elif 'vente' in url:
+ return POSTS_TYPES.SALE
+ return NotAvailable
+ obj_advert_type = ADVERT_TYPES.PROFESSIONAL
+
+ def obj_house_type(self):
+ house_type = CleanText('.//h2[@class="offerMainFeatures"]/div')(self).lower()
+ if house_type == "appartement":
+ return HOUSE_TYPES.APART
+ elif house_type == "maison":
+ return HOUSE_TYPES.HOUSE
+ elif house_type == "terrain":
+ return HOUSE_TYPES.LAND
+ elif house_type == "parking":
+ return HOUSE_TYPES.PARKING
+ else:
+ return HOUSE_TYPES.OTHER
+
+ obj_title = Attr('//meta[@property="og:title"]', 'content')
+ obj_area = CleanDecimal(
+ CleanText(
+ '//p[@class="offerArea"]/span',
+ ),
+ default=NotAvailable
+ )
+ obj_rooms = CleanDecimal(
+ Regexp(
+ CleanText('//p[@class="offerRooms"]/span'),
+ '(\d) p.',
+ default=NotAvailable
+ ),
+ default=NotAvailable
+ )
+ obj_bedrooms = CleanDecimal(
+ Regexp(
+ CleanText('//p[@class="offerRooms"]/span'),
+ '(\d) ch.',
+ default=NotAvailable
+ ),
+ default=NotAvailable
+ )
+ obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0)
+ obj_currency = Currency(
+ '//*[@itemprop="price"]'
+ )
+
+ def obj_utilities(self):
+ notes = CleanText('//p[@class="offer-description-notes"]')(self)
+ if "Loyer mensuel charges comprises" in notes:
+ return UTILITIES.INCLUDED
+ else:
+ return UTILITIES.UNKNOWN
+
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_date = Date(Regexp(CleanText('//div[@class="offer-description-notes"]'),
+ u'.* Mis à jour: (\d{2}/\d{2}/\d{4}).*'),
+ dayfirst=True)
+ obj_text = CleanHTML('//p[@class="descrProperty"]')
+ obj_location = CleanText('//em[@class="infoAdresse"]')
+ obj_station = CleanText(
+ '//div[has-class("offer-description-metro")]',
+ default=NotAvailable
+ )
+
+ obj_url = BrowserURL('housing', _id=Env('_id'))
+
+ def obj_photos(self):
+ photos = []
+ for img in XPath('//ul[@class="thumbsContainer"]//img/@src')(self):
+ if img.endswith('.svg'):
+ continue
+ url = u'%s' % img.replace('182x136', '800x600')
+ url = urljoin(self.page.url, url) # Ensure URL is absolute
+ photos.append(HousingPhoto(url))
+ return photos
+
+ def obj_DPE(self):
+ energy_value = CleanText(
+ '//ul[@class="energyInfosDPE"]//li[@class="energyInfos"]/span/@data-class',
+ default=""
+ )(self)
+ if len(energy_value):
+ energy_value = energy_value.replace("DPE", "").strip()[0]
+ return getattr(ENERGY_CLASS, energy_value, NotAvailable)
+
+ def obj_GES(self):
+ greenhouse_value = CleanText(
+ '//ul[@class="energyInfosGES"]//li[@class="energyInfos"]/span/@data-class',
+ default=""
+ )(self)
+ if len(greenhouse_value):
+ greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
+ return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
+
+ def obj_details(self):
+ details = {}
+
+ details["creationDate"] = Date(
+ Regexp(
+ CleanText(
+ '//div[@class="offer-description-notes"]'
+ ),
+ u'.*Mis en ligne: (\d{2}/\d{2}/\d{4}).*'
+ ),
+ dayfirst=True
+ )(self)
+
+ honoraires = CleanText(
+ (
+ '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
+ ),
+ default=None
+ )(self)
+ if honoraires:
+ details["Honoraires"] = (
+ "{} (TTC, en sus)".format(
+ honoraires.split(":")[1].strip()
+ )
+ )
+
+ for li in XPath('//ul[@itemprop="description"]/li')(self):
+ label = CleanText('./span[has-class("criteria-label")]')(li)
+ value = CleanText('./span[has-class("criteria-value")]')(li)
+ details[label] = value
+
+ return details
+
+ def get_phone_url_datas(self):
+ a = XPath('//button[has-class("js-show-phone-offer-sale-bottom")]')(self.doc)[0]
+ urlcontact = 'http://www.logic-immo.com/modalMail'
+ params = {}
+ params['universe'] = CleanText('./@data-univers')(a)
+ params['source'] = CleanText('./@data-source')(a)
+ params['pushcontact'] = CleanText('./@data-pushcontact')(a)
+ params['mapper'] = CleanText('./@data-mapper')(a)
+ params['offerid'] = CleanText('./@data-offerid')(a)
+ params['offerflag'] = CleanText('./@data-offerflag')(a)
+ params['campaign'] = CleanText('./@data-campaign')(a)
+ params['xtpage'] = CleanText('./@data-xtpage')(a)
+ params['offertransactiontype'] = CleanText('./@data-offertransactiontype')(a)
+ params['aeisource'] = CleanText('./@data-aeisource')(a)
+ params['shownumber'] = CleanText('./@data-shownumber')(a)
+ params['corail'] = 1
+ return urlcontact, params
+
+
+class SearchPage(HTMLPage):
+ @method
+ class iter_sharing(ListElement):
+ item_xpath = '//article[has-class("offer-block")]'
+
+ class item(ItemElement):
+ klass = Housing
+
+ obj_id = Format('colocation-%s', CleanText('./div/header/@id', replace=[('header-offer-', '')]))
+ obj_type = POSTS_TYPES.SHARING
+ obj_advert_type = ADVERT_TYPES.PROFESSIONAL
+ obj_title = CleanText(CleanHTML('./div/header/section/p[@class="property-type"]/span/@title'))
+
+ obj_area = CleanDecimal('./div/header/section/p[@class="offer-attributes"]/a/span[@class="offer-area-number"]',
+ default=0)
+
+ obj_cost = CleanDecimal('./div/header/section/p[@class="price"]', default=0)
+ obj_currency = Currency(
+ './div/header/section/p[@class="price"]'
+ )
+ obj_utilities = UTILITIES.UNKNOWN
+
+ obj_text = CleanText(
+ './div/div[@class="content-offer"]/section[has-class("content-desc")]/p/span[has-class("offer-text")]/@title',
+ default=NotLoaded
+ )
+
+ obj_date = Date(Regexp(CleanText('./div/header/section/p[has-class("update-date")]'),
+ ".*(\d{2}/\d{2}/\d{4}).*"))
+
+ obj_location = CleanText(
+ '(./div/div[@class="content-offer"]/section[has-class("content-desc")]/p)[1]/span/@title',
+ default=NotLoaded
+ )
+
+ @method
+ class iter_housings(ListElement):
+ item_xpath = '//div[has-class("offer-list")]//div[has-class("offer-block")]'
+
+ class item(ItemElement):
+ offer_details_wrapper = (
+ './/div[has-class("offer-details-wrapper")]'
+ )
+ klass = Housing
+
+ obj_id = Format(
+ '%s-%s',
+ Regexp(Env('type'), '(.*)-.*'),
+ CleanText('./@id', replace=[('header-offer-', '')])
+ )
+ obj_type = Env('query_type')
+ obj_advert_type = ADVERT_TYPES.PROFESSIONAL
+
+ def obj_house_type(self):
+ house_type = CleanText('.//div[has-class("offer-details-caracteristik")]/meta[@itemprop="name"]/@content')(self).lower()
+ if house_type == "appartement":
+ return HOUSE_TYPES.APART
+ elif house_type == "maison":
+ return HOUSE_TYPES.HOUSE
+ elif house_type == "terrain":
+ return HOUSE_TYPES.LAND
+ elif house_type == "parking":
+ return HOUSE_TYPES.PARKING
+ else:
+ return HOUSE_TYPES.OTHER
+
+ obj_title = CleanText('.//div[has-class("offer-details-type")]/a/@title')
+
+ obj_url = Format(u'%s%s',
+ CleanText('.//div/a[@class="offer-link"]/@href'),
+ CleanText('.//div/a[@class="offer-link"]/\
+@data-orpi', default=""))
+
+ obj_area = CleanDecimal(
+ (
+ offer_details_wrapper +
+ '/div/div/div[has-class("offer-details-second")]' +
+ '/div/h3[has-class("offer-attributes")]/span' +
+ '/span[has-class("offer-area-number")]'
+ ),
+ default=NotLoaded
+ )
+ obj_rooms = CleanDecimal(
+ (
+ offer_details_wrapper +
+ '/div/div/div[has-class("offer-details-second")]' +
+ '/div/h3[has-class("offer-attributes")]' +
+ '/span[has-class("offer-rooms")]' +
+ '/span[has-class("offer-rooms-number")]'
+ ),
+ default=NotAvailable
+ )
+ obj_cost = CleanDecimal(
+ Regexp(
+ CleanText(
+ (
+ offer_details_wrapper +
+ '/div/p[@class="offer-price"]/span'
+ ),
+ default=NotLoaded
+ ),
+ '(.*) [%s%s%s]' % (u'€', u'$', u'£'),
+ default=NotLoaded
+ ),
+ default=NotLoaded
+ )
+ obj_currency = Currency(
+ offer_details_wrapper + '/div/p[has-class("offer-price")]/span'
+ )
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_utilities = UTILITIES.UNKNOWN
+ obj_text = CleanText(
+ offer_details_wrapper + '/div/div/div/p[has-class("offer-description")]/span'
+ )
+ obj_location = CleanText(
+ offer_details_wrapper + '/div[@class="offer-details-location"]',
+ replace=[('Voir sur la carte','')]
+ )
+
+ def obj_photos(self):
+ photos = []
+ url = None
+ try:
+ url = Attr(
+ './/div[has-class("offer-picture")]//img',
+ 'src'
+ )(self)
+ except:
+ pass
+
+ if url:
+ url = url.replace('335x253', '800x600')
+ url = urljoin(self.page.url, url) # Ensure URL is absolute
+ photos.append(HousingPhoto(url))
+ return photos
+
+ def obj_details(self):
+ details = {}
+ honoraires = CleanText(
+ (
+ self.offer_details_wrapper +
+ '/div/div/p[@class="offer-agency-fees"]'
+ ),
+ default=None
+ )(self)
+ if honoraires:
+ details["Honoraires"] = (
+ "{} (TTC, en sus)".format(
+ honoraires.split(":")[1].strip()
+ )
+ )
+ return details
diff --git a/modules/logicimmo/test.py b/modules/logicimmo/test.py
new file mode 100644
index 0000000..46f9ef3
--- /dev/null
+++ b/modules/logicimmo/test.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Bezleputh
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
+from weboob.tools.test import BackendTest
+from weboob.tools.capabilities.housing.housing_test import HousingTest
+
+
+class LogicimmoTest(BackendTest, HousingTest):
+ MODULE = 'logicimmo'
+
+ FIELDS_ALL_HOUSINGS_LIST = [
+ "id", "type", "advert_type", "house_type", "url", "title", "area",
+ "cost", "currency", "utilities", "date", "location", "text",
+ "details", "rooms"
+ ]
+ FIELDS_ANY_HOUSINGS_LIST = [
+ "photos",
+ ]
+ FIELDS_ALL_SINGLE_HOUSING = [
+ "id", "url", "type", "advert_type", "house_type", "title", "area",
+ "cost", "currency", "utilities", "date", "location", "text",
+ "phone", "details"
+ ]
+ FIELDS_ANY_SINGLE_HOUSING = [
+ "photos",
+ "station",
+ "rooms",
+ "phone",
+ "DPE",
+ "GES"
+ ]
+ DO_NOT_DISTINGUISH_FURNISHED_RENT = True
+
+ def test_logicimmo_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_logicimmo_sale(self):
+ query = Query()
+ query.area_min = 20
+ query.type = POSTS_TYPES.SALE
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_logicimmo_furnished_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.FURNISHED_RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_logicimmo_viager(self):
+ query = Query()
+ query.type = POSTS_TYPES.VIAGER
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ if len(query.cities) == 3:
+ break
+ self.check_against_query(query)
+
+ def test_logicimmo_personal(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 900
+ query.type = POSTS_TYPES.RENT
+ query.advert_types = [ADVERT_TYPES.PERSONAL]
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+
+ results = list(self.backend.search_housings(query))
+ self.assertEqual(len(results), 0)
diff --git a/modules/pap/__init__.py b/modules/pap/__init__.py
new file mode 100644
index 0000000..efb6685
--- /dev/null
+++ b/modules/pap/__init__.py
@@ -0,0 +1,3 @@
+from .module import PapModule
+
+__all__ = ['PapModule']
diff --git a/modules/pap/browser.py b/modules/pap/browser.py
new file mode 100644
index 0000000..9d23f95
--- /dev/null
+++ b/modules/pap/browser.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from weboob.browser import PagesBrowser, URL
+from weboob.capabilities.housing import TypeNotSupported, POSTS_TYPES
+from weboob.tools.compat import urlencode
+
+from .pages import HousingPage, CitiesPage
+from .constants import TYPES, RET
+
+
+__all__ = ['PapBrowser']
+
+
+class PapBrowser(PagesBrowser):
+
+ BASEURL = 'https://www.pap.fr'
+ housing = URL('/annonces/(?P<_id>.*)', HousingPage)
+ search_page = URL('/recherche')
+ search_result_page = URL('/annonce/.*', HousingPage)
+ cities = URL('/json/ac-geo\?q=(?P.*)', CitiesPage)
+
+ def search_geo(self, pattern):
+ return self.cities.open(pattern=pattern).iter_cities()
+
+ def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
+
+ if type not in TYPES:
+ raise TypeNotSupported()
+
+ self.session.headers.update({'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'})
+
+ data = {'geo_objets_ids': ','.join(cities),
+ 'surface[min]': area_min or '',
+ 'surface[max]': area_max or '',
+ 'prix[min]': cost_min or '',
+ 'prix[max]': cost_max or '',
+ 'produit': TYPES.get(type, 'location'),
+ 'nb_resultats_par_page': 40,
+ 'action': 'submit'
+ }
+
+ if nb_rooms:
+ data['nb_pieces[min]'] = nb_rooms
+ data['nb_pieces[max]'] = nb_rooms
+
+ if type == POSTS_TYPES.FURNISHED_RENT:
+ data['tags[]'] = 'meuble'
+
+ ret = []
+ if type == POSTS_TYPES.VIAGER:
+ ret = ['viager']
+ else:
+ for house_type in house_types:
+ if house_type in RET:
+ ret.append(RET.get(house_type))
+
+ _data = '%s%s%s' % (urlencode(data), '&typesbien%5B%5D=', '&typesbien%5B%5D='.join(ret))
+ return self.search_page.go(data=_data).iter_housings(
+ query_type=type
+ )
+
+ def get_housing(self, _id, housing=None):
+ return self.housing.go(_id=_id).get_housing(obj=housing)
diff --git a/modules/pap/constants.py b/modules/pap/constants.py
new file mode 100644
index 0000000..83795f7
--- /dev/null
+++ b/modules/pap/constants.py
@@ -0,0 +1,12 @@
+from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
+
+TYPES = {POSTS_TYPES.RENT: 'location',
+ POSTS_TYPES.FURNISHED_RENT: 'location',
+ POSTS_TYPES.SALE: 'vente',
+ POSTS_TYPES.VIAGER: 'vente'}
+
+RET = {HOUSE_TYPES.HOUSE: 'maison',
+ HOUSE_TYPES.APART: 'appartement',
+ HOUSE_TYPES.LAND: 'terrain',
+ HOUSE_TYPES.PARKING: 'garage-parking',
+ HOUSE_TYPES.OTHER: 'divers'}
diff --git a/modules/pap/favicon.png b/modules/pap/favicon.png
new file mode 100644
index 0000000..a4c7a50
Binary files /dev/null and b/modules/pap/favicon.png differ
diff --git a/modules/pap/module.py b/modules/pap/module.py
new file mode 100644
index 0000000..89f0887
--- /dev/null
+++ b/modules/pap/module.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+
+from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto,
+ ADVERT_TYPES)
+from weboob.tools.backend import Module
+
+from .browser import PapBrowser
+
+
+__all__ = ['PapModule']
+
+
+class PapModule(Module, CapHousing):
+ NAME = 'pap'
+ MAINTAINER = u'Romain Bignon'
+ EMAIL = 'romain@weboob.org'
+ VERSION = '2.1'
+ DESCRIPTION = 'French housing website'
+ LICENSE = 'AGPLv3+'
+ BROWSER = PapBrowser
+
+ def search_housings(self, query):
+ if(len(query.advert_types) == 1 and
+ query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL):
+ # Pap is personal only
+ return list()
+
+ cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
+ if len(cities) == 0:
+ return list()
+
+ return self.browser.search_housings(query.type, cities, query.nb_rooms,
+ query.area_min, query.area_max,
+ query.cost_min, query.cost_max,
+ query.house_types)
+
+ def get_housing(self, housing):
+ if isinstance(housing, Housing):
+ id = housing.id
+ else:
+ id = housing
+ housing = None
+
+ return self.browser.get_housing(id, housing)
+
+ def search_city(self, pattern):
+ return self.browser.search_geo(pattern)
+
+ def fill_photo(self, photo, fields):
+ if 'data' in fields and photo.url and not photo.data:
+ photo.data = self.browser.open(photo.url).content
+ return photo
+
+ def fill_housing(self, housing, fields):
+ return self.browser.get_housing(housing.id, housing)
+
+ OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}
diff --git a/modules/pap/pages.py b/modules/pap/pages.py
new file mode 100644
index 0000000..1a137fe
--- /dev/null
+++ b/modules/pap/pages.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+from __future__ import unicode_literals
+from decimal import Decimal
+
+from weboob.tools.date import parse_french_date
+from weboob.browser.pages import HTMLPage, JsonPage, pagination
+from weboob.browser.elements import ItemElement, ListElement, DictElement, method
+from weboob.browser.filters.standard import (CleanText, CleanDecimal, Regexp,
+ Env, BrowserURL, Format, Currency)
+from weboob.browser.filters.html import Attr, Link, XPath, CleanHTML
+from weboob.browser.filters.json import Dict
+from weboob.capabilities.base import NotAvailable, NotLoaded
+from weboob.capabilities.housing import (Housing, City, HousingPhoto,
+ UTILITIES, ENERGY_CLASS, POSTS_TYPES,
+ ADVERT_TYPES, HOUSE_TYPES)
+from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
+
+
+class CitiesPage(JsonPage):
+ @method
+ class iter_cities(DictElement):
+
+ class item(ItemElement):
+ klass = City
+
+ obj_id = Dict('id')
+ obj_name = Dict('name')
+
+
+class HousingPage(HTMLPage):
+ @pagination
+ @method
+ class iter_housings(ListElement):
+ item_xpath = '//div[has-class("search-list-item-alt")]'
+
+ def next_page(self):
+ return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self)
+
+ class item(ItemElement):
+ klass = Housing
+
+ def condition(self):
+ title = self.obj_title(self)
+ isNotFurnishedOk = True
+ if self.env['query_type'] == POSTS_TYPES.RENT:
+ isNotFurnishedOk = 'meublé' not in title.lower()
+ return (
+ Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and
+ isNotFurnishedOk
+ )
+
+ def parse(self, el):
+ rooms_bedrooms_area = el.xpath(
+ './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
+ )
+ self.env['rooms'] = NotLoaded
+ self.env['bedrooms'] = NotLoaded
+ self.env['area'] = NotLoaded
+
+ for item in rooms_bedrooms_area:
+ name = CleanText('.')(item)
+ if 'chambre' in name.lower():
+ name = 'bedrooms'
+ value = CleanDecimal('.')(item)
+ elif 'pièce' in name.lower():
+ name = 'rooms'
+ value = CleanDecimal('.')(item)
+ else:
+ name = 'area'
+ value = CleanDecimal(
+ Regexp(
+ CleanText(
+ '.'
+ ),
+ r'(\d*\.*\d*) .*'
+ )
+ )(item)
+ self.env[name] = value
+
+ obj_id = Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)')
+ obj_type = Env('query_type')
+ obj_advert_type = ADVERT_TYPES.PERSONAL
+
+ def obj_house_type(self):
+ item_link = Link('./div/a[@class="item-title"]')(self)
+ house_type = item_link.split('/')[-1].split('-')[0]
+ if 'parking' in house_type:
+ return HOUSE_TYPES.PARKING
+ elif 'appartement' in house_type:
+ return HOUSE_TYPES.APART
+ elif 'terrain' in house_type:
+ return HOUSE_TYPES.LAND
+ elif 'maison' in house_type:
+ return HOUSE_TYPES.HOUSE
+ else:
+ return HOUSE_TYPES.OTHER
+
+ obj_title = CleanText('./div/a[has-class("item-title")]')
+ obj_area = Env('area')
+ obj_cost = CleanDecimal(CleanText('./div/a[has-class("item-title")]/span[@class="item-price"]'),
+ replace_dots=True, default=Decimal(0))
+ obj_currency = Currency(
+ './div/a[@class="item-title"]/span[@class="item-price"]'
+ )
+ obj_utilities = UTILITIES.UNKNOWN
+
+ obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded)
+
+ def obj_location(self):
+ return CleanText('./div/p[@class="item-description"]')(self).split(".")[0]
+
+ obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')])
+ obj_rooms = Env('rooms')
+ obj_bedrooms = Env('bedrooms')
+ obj_price_per_meter = PricePerMeterFilter()
+
+ obj_url = Format(
+ u'http://www.pap.fr%s',
+ Link('./div/a[@class="item-title"]')
+ )
+
+ def obj_photos(self):
+ photos = []
+ for img in XPath('./a/img/@src')(self):
+ if(
+ img.endswith("visuel-nophoto.png") or
+ img.endswith('miniature-video.png')
+ ):
+ continue
+ photos.append(HousingPhoto(u'%s' % img))
+ return photos
+
+ @method
+ class get_housing(ItemElement):
+ klass = Housing
+
+ def parse(self, el):
+ rooms_bedrooms_area = el.xpath(
+ './/ul[has-class("item-tags")]/li'
+ )
+ self.env['rooms'] = NotAvailable
+ self.env['bedrooms'] = NotAvailable
+ self.env['area'] = NotAvailable
+
+ for item in rooms_bedrooms_area:
+ name = CleanText('.')(item)
+ if 'chambre' in name.lower():
+ name = 'bedrooms'
+ value = CleanDecimal('./strong')(item)
+ elif 'pièce' in name.lower():
+ name = 'rooms'
+ value = CleanDecimal('./strong')(item)
+ elif ' m²' in name and 'le m²' not in name:
+ name = 'area'
+ value = CleanDecimal(
+ Regexp(
+ CleanText(
+ '.'
+ ),
+ r'(\d*\.*\d*) .*'
+ )
+ )(item)
+ self.env[name] = value
+
+ obj_id = Env('_id')
+
+ def obj_type(self):
+ prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
+ if 'location' in prev_link:
+ title = self.obj_title(self)
+ if 'meublé' in title.lower():
+ return POSTS_TYPES.FURNISHED_RENT
+ else:
+ return POSTS_TYPES.RENT
+ elif 'vente' in prev_link:
+ return POSTS_TYPES.SALE
+ elif 'viager' in prev_link:
+ return POSTS_TYPES.VIAGER
+ else:
+ return NotAvailable
+ obj_advert_type = ADVERT_TYPES.PERSONAL
+
+ def obj_house_type(self):
+ prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
+ house_type = prev_link.split('-')[-1]
+ if 'parking' in house_type:
+ return HOUSE_TYPES.PARKING
+ elif 'appartement' in house_type:
+ return HOUSE_TYPES.APART
+ elif 'terrain' in house_type:
+ return HOUSE_TYPES.LAND
+ elif 'maison' in house_type:
+ return HOUSE_TYPES.HOUSE
+ else:
+ return HOUSE_TYPES.OTHER
+
+ obj_title = CleanText(
+ '//h1[@class="item-title"]'
+ )
+ obj_cost = CleanDecimal(
+ '//h1[@class="item-title"]/span[@class="item-price"]',
+ replace_dots=True
+ )
+ obj_currency = Currency(
+ '//h1[@class="item-title"]/span[@class="item-price"]'
+ )
+ obj_utilities = UTILITIES.UNKNOWN
+ obj_area = Env('area')
+
+ def obj_date(self):
+ date = CleanText(
+ '//p[@class="item-date"]'
+ )(self).split("/")[-1].strip()
+ return parse_french_date(date)
+
+ obj_rooms = Env('rooms')
+ obj_bedrooms = Env('bedrooms')
+ obj_price_per_meter = PricePerMeterFilter()
+ obj_location = CleanText('//div[has-class("item-description")]/h2')
+ obj_text = CleanText(CleanHTML('//div[has-class("item-description")]/div/p'))
+
+ def obj_station(self):
+ return ", ".join([
+ station.text
+ for station in XPath(
+ '//ul[has-class("item-transports")]//span[has-class("label")]'
+ )(self)
+ ])
+
+ def obj_phone(self):
+ phone = CleanText('(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]')(self)
+ phone = phone.replace(' ', ', ')
+ return phone
+
+ obj_url = BrowserURL('housing', _id=Env('_id'))
+
+ def obj_DPE(self):
+ DPE = Attr(
+ '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
+ 'class',
+ default=""
+ )(self)
+ if DPE:
+ DPE = [x.replace("energy-rank-", "").upper()
+ for x in DPE.split() if x.startswith("energy-rank-")][0]
+ return getattr(ENERGY_CLASS, DPE, NotAvailable)
+
+ def obj_photos(self):
+ photos = []
+ for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self):
+ if not img.endswith('miniature-video.png'):
+ photos.append(HousingPhoto(u'%s' % img))
+ return photos
diff --git a/modules/pap/test.py b/modules/pap/test.py
new file mode 100644
index 0000000..3785666
--- /dev/null
+++ b/modules/pap/test.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Romain Bignon
+#
+# This file is part of a weboob module.
+#
+# This weboob module is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This weboob module is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this weboob module. If not, see .
+
+from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
+from weboob.tools.test import BackendTest
+from weboob.tools.capabilities.housing.housing_test import HousingTest
+
+
+class PapTest(BackendTest, HousingTest):
+ MODULE = 'pap'
+
+ FIELDS_ALL_HOUSINGS_LIST = [
+ "id", "type", "advert_type", "house_type", "url", "title", "area",
+ "cost", "currency", "utilities", "location", "text"
+ ]
+ FIELDS_ANY_HOUSINGS_LIST = [
+ "photos",
+ "station",
+ ]
+ FIELDS_ALL_SINGLE_HOUSING = [
+ "id", "url", "type", "advert_type", "house_type", "title", "area",
+ "cost", "currency", "utilities", "date", "location", "text",
+ "phone"
+ ]
+ FIELDS_ANY_SINGLE_HOUSING = [
+ "photos",
+ "rooms",
+ "bedrooms",
+ "station"
+ ]
+
+ def test_pap_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_pap_sale(self):
+ query = Query()
+ query.area_min = 20
+ query.type = POSTS_TYPES.SALE
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_pap_furnished_rent(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 1500
+ query.type = POSTS_TYPES.FURNISHED_RENT
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ self.check_against_query(query)
+
+ def test_pap_viager(self):
+ query = Query()
+ query.type = POSTS_TYPES.VIAGER
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+ # Remove rooms from the tested fields as viager never have them
+ self.FIELDS_ANY_HOUSINGS_LIST = [
+ "photos",
+ "station",
+ "bedrooms"
+ ]
+ self.FIELDS_ANY_SINGLE_HOUSING = [
+ "photos",
+ "bedrooms",
+ "station"
+ ]
+ self.check_against_query(query)
+
+ def test_pap_professional(self):
+ query = Query()
+ query.area_min = 20
+ query.cost_max = 900
+ query.type = POSTS_TYPES.RENT
+ query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
+ query.cities = []
+ for city in self.backend.search_city('paris'):
+ city.backend = self.backend.name
+ query.cities.append(city)
+
+ results = list(self.backend.search_housings(query))
+ self.assertEqual(len(results), 0)