diff --git a/doc/0.getting_started.md b/doc/0.getting_started.md index dcfd1c5..7bb5412 100644 --- a/doc/0.getting_started.md +++ b/doc/0.getting_started.md @@ -27,6 +27,13 @@ your disk, to point `modules_path` configuration option to `path_to_weboob_git/modules` (see the configuration section below) and to run a `git pull; python setup.py install` in the WebOOB git repo often. +A copy of the WebOOB modules is available in the `modules` directory at the +root of this repository, you can use `"modules_path": "/path/to/flatisfy/modules"` to use them. +This copy may or may not be more up to date than the current state of official +WebOOB modules. Some changes are made there, which are not backported +upstream. WebOOB official modules are not synced in the `modules` folder on a +regular basis, so try both and see which ones match your needs! :) + ## TL;DR diff --git a/modules/explorimmo/__init__.py b/modules/explorimmo/__init__.py new file mode 100644 index 0000000..e38012f --- /dev/null +++ b/modules/explorimmo/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from .module import ExplorimmoModule + + +__all__ = ['ExplorimmoModule'] diff --git a/modules/explorimmo/browser.py b/modules/explorimmo/browser.py new file mode 100644 index 0000000..d308aee --- /dev/null +++ b/modules/explorimmo/browser.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from weboob.browser import PagesBrowser, URL +from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES, + HOUSE_TYPES) +from weboob.tools.compat import urlencode +from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage + + +class ExplorimmoBrowser(PagesBrowser): + BASEURL = 'https://immobilier.lefigaro.fr' + + cities = URL('/rest/locations\?q=(?P.*)', CitiesPage) + search = URL('/annonces/resultat/annonces.html\?(?P.*)', SearchPage) + housing_html = URL('/annonces/annonce-(?P<_id>.*).html', HousingPage) + phone = URL('/rest/classifieds/(?P<_id>.*)/phone', PhonePage) + housing = URL('/rest/classifieds/(?P<_id>.*)', + '/rest/classifieds/\?(?P.*)', HousingPage2) + + TYPES = {POSTS_TYPES.RENT: 'location', + POSTS_TYPES.SALE: 'vente', + POSTS_TYPES.FURNISHED_RENT: 'location', + POSTS_TYPES.VIAGER: 'vente'} + + RET = {HOUSE_TYPES.HOUSE: 'Maison', + HOUSE_TYPES.APART: 'Appartement', + HOUSE_TYPES.LAND: 'Terrain', + HOUSE_TYPES.PARKING: 'Parking', + HOUSE_TYPES.OTHER: 'Divers'} + + def get_cities(self, pattern): + return self.cities.open(city=pattern).get_cities() + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, + cost_min, cost_max, house_types, advert_types): + + if type not in self.TYPES: + raise TypeNotSupported() + + ret = [] + if type == POSTS_TYPES.VIAGER: + ret = ['Viager'] + else: + for house_type in house_types: + if house_type in self.RET: + ret.append(self.RET.get(house_type)) + + data = {'location': ','.join(cities).encode('iso 8859-1'), + 'furnished': type == POSTS_TYPES.FURNISHED_RENT, + 'areaMin': area_min or '', + 'areaMax': area_max or '', + 'priceMin': cost_min or '', + 'priceMax': cost_max or '', + 'transaction': self.TYPES.get(type, 'location'), + 'recherche': '', + 'mode': '', + 'proximity': '0', + 'roomMin': nb_rooms or '', + 'page': '1'} + + query = u'%s%s%s' % (urlencode(data), '&type=', '&type='.join(ret)) + + return self.search.go(query=query).iter_housings( + query_type=type, + advert_types=advert_types + ) + + def get_housing(self, _id, housing=None): + return self.housing.go(_id=_id).get_housing(obj=housing) + + def get_phone(self, _id): + return self.phone.go(_id=_id).get_phone() + + def get_total_page(self, js_datas): + return self.housing.open(js_datas=js_datas).get_total_page() diff --git a/modules/explorimmo/module.py b/modules/explorimmo/module.py new file mode 100644 index 0000000..856d38f --- /dev/null +++ b/modules/explorimmo/module.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto + +from .browser import ExplorimmoBrowser + + +__all__ = ['ExplorimmoModule'] + + +class ExplorimmoModule(Module, CapHousing): + NAME = 'explorimmo' + DESCRIPTION = u'explorimmo website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '2.1' + + BROWSER = ExplorimmoBrowser + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + housing = None + housing = self.browser.get_housing(id, housing) + return housing + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + cities = ['%s' % c.id for c in query.cities if c.backend == self.name] + if len(cities) == 0: + return list() + + return self.browser.search_housings(query.type, cities, query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types, + query.advert_types) + + def fill_housing(self, housing, fields): + if 'phone' in fields: + housing.phone = self.browser.get_phone(housing.id) + fields.remove('phone') + + if len(fields) > 0: + self.browser.get_housing(housing.id, housing) + + return housing + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + OBJECTS = {Housing: fill_housing, + HousingPhoto: fill_photo, + } diff --git a/modules/explorimmo/pages.py b/modules/explorimmo/pages.py new file mode 100644 index 0000000..35631b8 --- /dev/null +++ b/modules/explorimmo/pages.py @@ -0,0 +1,455 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . +from __future__ import unicode_literals + +import json +import math +import re +from decimal import Decimal +from datetime import datetime +from weboob.browser.filters.json import Dict +from weboob.browser.elements import ItemElement, ListElement, DictElement, method +from weboob.browser.pages import JsonPage, HTMLPage, pagination +from weboob.browser.filters.standard import (CleanText, CleanDecimal, Currency, + Regexp, Env, BrowserURL, Filter, + Format) +from weboob.browser.filters.html import Attr, CleanHTML, XPath +from weboob.capabilities.base import NotAvailable, NotLoaded, Currency as BaseCurrency +from weboob.capabilities.housing import (Housing, HousingPhoto, City, + UTILITIES, ENERGY_CLASS, POSTS_TYPES, + ADVERT_TYPES, HOUSE_TYPES) +from weboob.tools.capabilities.housing.housing import PricePerMeterFilter +from weboob.tools.compat import unquote + + +class CitiesPage(JsonPage): + + ENCODING = 'UTF-8' + + def build_doc(self, content): + content = super(CitiesPage, self).build_doc(content) + if content: + return content + else: + return [{"locations": []}] + + @method + class get_cities(DictElement): + item_xpath = '0/locations' + + class item(ItemElement): + klass = City + + obj_id = Dict('label') + obj_name = Dict('label') + + +class SearchPage(HTMLPage): + @pagination + @method + class iter_housings(ListElement): + item_xpath = '//div[starts-with(@id, "bloc-vue-")]' + + def next_page(self): + js_datas = CleanText( + '//div[@id="js-data"]/@data-rest-search-request' + )(self).split('?')[-1].split('&') + + try: + resultsPerPage = next( + x for x in js_datas if 'resultsPerPage' in x + ).split('=')[-1] + currentPageNumber = next( + x for x in js_datas if 'currentPageNumber' in x + ).split('=')[-1] + resultCount = CleanText( + '(//div[@id="js-data"]/@data-result-count)[1]' + )(self) + totalPageNumber = math.ceil( + int(resultCount) / int(resultsPerPage) + ) + + next_page = int(currentPageNumber) + 1 + if next_page <= totalPageNumber: + return self.page.url.replace( + 'page=%s' % currentPageNumber, + 'page=%d' % next_page + ) + except StopIteration: + pass + + class item(ItemElement): + klass = Housing + price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]' + + def is_agency(self): + agency = CleanText('.//span[has-class("item-agency-name")]')(self.el) + return 'annonce de particulier' not in agency.lower() + + def condition(self): + if len(self.env['advert_types']) == 1: + is_agency = self.is_agency() + if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL: + return not is_agency + elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL: + return is_agency + return Attr('.', 'data-classified-id', default=False)(self) + + obj_id = Attr('.', 'data-classified-id') + obj_type = Env('query_type') + obj_title = CleanText('./div/h2[@class="item-type"]') + + def obj_advert_type(self): + if self.is_agency(): + return ADVERT_TYPES.PROFESSIONAL + else: + return ADVERT_TYPES.PERSONAL + + def obj_house_type(self): + type = self.obj_title(self).split()[0].lower() + if type == "appartement" or type == "studio" or type == "chambre": + return HOUSE_TYPES.APART + elif type == "maison" or type == "villa": + return HOUSE_TYPES.HOUSE + elif type == "parking": + return HOUSE_TYPES.PARKING + elif type == "terrain": + return HOUSE_TYPES.LAND + else: + return HOUSE_TYPES.OTHER + + def obj_location(self): + script = CleanText('./script')(self) + try: + # Should be standard JSON+LD data + script = json.loads(script) + except ValueError: + try: + # But explorimmo can't write JSON correctly and there + # is a trailing "}" + script = json.loads(script.strip().rstrip('}')) + except ValueError: + script = None + if not script: + return NotLoaded + + try: + return '%s (%s)' % ( + script['address']['addressLocality'], + script['address']['postalCode'] + ) + except (KeyError): + return NotLoaded + + def obj_cost(self): + cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''), + r'de (.*) à .*', + default=0))(self) + if cost == 0: + return CleanDecimal(self.price_selector, default=NotAvailable)(self) + else: + return cost + + obj_currency = Currency(price_selector) + + def obj_utilities(self): + utilities = CleanText( + './div/div/span[@class="price-label"]|' + './div/div[@class="item-price-pdf"]|' + './div/div/span[@class="item-price"]' + )(self) + if "CC" in utilities: + return UTILITIES.INCLUDED + else: + return UTILITIES.UNKNOWN + + obj_text = CleanText('./div/p[@itemprop="description"]') + obj_area = CleanDecimal( + Regexp( + obj_title, + r'(.*?)([\d,\.]*) m2(.*?)', + '\\2', + default=None + ), + replace_dots=True, + default=NotLoaded + ) + + obj_url = Format( + "https://immobilier.lefigaro.fr/annonces/annonce-%s.html", + CleanText('./@data-classified-id') + ) + + obj_price_per_meter = PricePerMeterFilter() + + def obj_phone(self): + phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]', + replace=[('Téléphoner : ', '')], + default=NotLoaded)(self) + + if '...' in phone: + return NotLoaded + + return phone + + def obj_details(self): + charges = CleanText('.//span[@class="price-fees"]', + default=None)(self) + if charges: + return { + "fees": charges.split(":")[1].strip() + } + else: + return NotLoaded + + def obj_photos(self): + url = CleanText('./div[has-class("default-img")]/img/@data-src')(self) + if url: + url = unquote(url) + if "http://" in url[3:]: + rindex = url.rfind("?") + if rindex == -1: + rindex = None + url = url[url.find("http://", 3):rindex] + return [HousingPhoto(url)] + else: + return NotLoaded + + +class TypeDecimal(Filter): + def filter(self, el): + return Decimal(el) + + +class FromTimestamp(Filter): + def filter(self, el): + return datetime.fromtimestamp(el / 1000.0) + + +class PhonePage(JsonPage): + def get_phone(self): + return self.doc.get('phoneNumber') + + +class HousingPage2(JsonPage): + @method + class get_housing(ItemElement): + klass = Housing + + def is_agency(self): + return Dict('agency/isParticulier')(self) == 'false' + + obj_id = Env('_id') + + def obj_type(self): + transaction = Dict('characteristics/transaction')(self) + if transaction == 'location': + if Dict('characteristics/isFurnished')(self): + return POSTS_TYPES.FURNISHED_RENT + else: + return POSTS_TYPES.RENT + elif transaction == 'vente': + type = Dict('characteristics/estateType')(self).lower() + if 'viager' in type: + return POSTS_TYPES.VIAGER + else: + return POSTS_TYPES.SALE + else: + return NotAvailable + + def obj_advert_type(self): + if self.is_agency: + return ADVERT_TYPES.PROFESSIONAL + else: + return ADVERT_TYPES.PERSONAL + + def obj_house_type(self): + type = Dict('characteristics/estateType')(self).lower() + if 'appartement' in type: + return HOUSE_TYPES.APART + elif 'maison' in type: + return HOUSE_TYPES.HOUSE + elif 'parking' in type: + return HOUSE_TYPES.PARKING + elif 'terrain' in type: + return HOUSE_TYPES.LAND + else: + return HOUSE_TYPES.OTHER + + obj_title = Dict('characteristics/titleWithTransaction') + obj_location = Format('%s %s %s', Dict('location/address'), + Dict('location/cityLabel'), + Dict('location/postalCode')) + + def obj_cost(self): + cost = TypeDecimal(Dict('characteristics/price'))(self) + if cost == 0: + cost = TypeDecimal(Dict('characteristics/priceMin'))(self) + return cost + + obj_currency = BaseCurrency.get_currency('€') + + def obj_utilities(self): + are_fees_included = Dict('characteristics/areFeesIncluded', + default=None)(self) + if are_fees_included: + return UTILITIES.INCLUDED + else: + return UTILITIES.EXCLUDED + + obj_text = CleanHTML(Dict('characteristics/description')) + obj_url = BrowserURL('housing_html', _id=Env('_id')) + + def obj_area(self): + area = TypeDecimal(Dict('characteristics/area'))(self) + if area == 0: + area = TypeDecimal(Dict('characteristics/areaMin'))(self) + return area + + obj_date = FromTimestamp(Dict('characteristics/date')) + obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount')) + + def obj_rooms(self): + # TODO: Why is roomCount a list? + rooms = Dict('characteristics/roomCount', default=[])(self) + if rooms: + return TypeDecimal(rooms[0])(self) + return NotAvailable + + obj_price_per_meter = PricePerMeterFilter() + + def obj_photos(self): + photos = [] + for img in Dict('characteristics/images')(self): + m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl')) + if m: + photos.append(HousingPhoto(m.group(1))) + else: + photos.append(HousingPhoto(img.get('xl'))) + return photos + + def obj_DPE(self): + DPE = Dict( + 'characteristics/energyConsumptionCategory', + default="" + )(self) + return getattr(ENERGY_CLASS, DPE, NotAvailable) + + def obj_GES(self): + GES = Dict( + 'characteristics/greenhouseGasEmissionCategory', + default="" + )(self) + return getattr(ENERGY_CLASS, GES, NotAvailable) + + def obj_details(self): + details = {} + details['fees'] = Dict( + 'characteristics/fees', default=NotAvailable + )(self) + details['agencyFees'] = Dict( + 'characteristics/agencyFees', default=NotAvailable + )(self) + details['guarantee'] = Dict( + 'characteristics/guarantee', default=NotAvailable + )(self) + details['bathrooms'] = Dict( + 'characteristics/bathroomCount', default=NotAvailable + )(self) + details['creationDate'] = FromTimestamp( + Dict( + 'characteristics/creationDate', default=NotAvailable + ), + default=NotAvailable + )(self) + details['availabilityDate'] = Dict( + 'characteristics/estateAvailabilityDate', default=NotAvailable + )(self) + details['exposure'] = Dict( + 'characteristics/exposure', default=NotAvailable + )(self) + details['heatingType'] = Dict( + 'characteristics/heatingType', default=NotAvailable + )(self) + details['floor'] = Dict( + 'characteristics/floor', default=NotAvailable + )(self) + details['bedrooms'] = Dict( + 'characteristics/bedroomCount', default=NotAvailable + )(self) + details['isFurnished'] = Dict( + 'characteristics/isFurnished', default=NotAvailable + )(self) + rooms = Dict('characteristics/roomCount', default=[])(self) + if len(rooms): + details['rooms'] = rooms[0] + details['available'] = Dict( + 'characteristics/isAvailable', default=NotAvailable + )(self) + agency = Dict('agency', default=NotAvailable)(self) + details['agency'] = ', '.join([ + x for x in [ + agency.get('corporateName', ''), + agency.get('corporateAddress', ''), + agency.get('corporatePostalCode', ''), + agency.get('corporateCity', '') + ] if x + ]) + return details + + def get_total_page(self): + return self.doc.get('pagination').get('total') if 'pagination' in self.doc else 0 + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Env('_id') + obj_title = CleanText('//h1[@itemprop="name"]') + obj_location = CleanText('//span[@class="informations-localisation"]') + obj_cost = CleanDecimal('//span[@itemprop="price"]') + obj_currency = Currency('//span[@itemprop="price"]') + obj_text = CleanHTML('//div[@itemprop="description"]') + obj_url = BrowserURL('housing', _id=Env('_id')) + obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'), + r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable) + obj_price_per_meter = PricePerMeterFilter() + + def obj_photos(self): + photos = [] + for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self): + url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img) + photos.append(HousingPhoto(url)) + return photos + + def obj_details(self): + details = dict() + for item in XPath('//div[@class="features clearfix"]/ul/li')(self): + key = CleanText('./span[@class="name"]')(item) + value = CleanText('./span[@class="value"]')(item) + if value and key: + details[key] = value + + key = CleanText('//div[@class="title-dpe clearfix"]')(self) + value = CleanText('//div[@class="energy-consumption"]')(self) + if value and key: + details[key] = value + return details diff --git a/modules/explorimmo/test.py b/modules/explorimmo/test.py new file mode 100644 index 0000000..dfa8cdf --- /dev/null +++ b/modules/explorimmo/test.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from weboob.capabilities.housing import Query, ADVERT_TYPES, POSTS_TYPES +from weboob.tools.capabilities.housing.housing_test import HousingTest +from weboob.tools.test import BackendTest + + +class ExplorimmoTest(BackendTest, HousingTest): + MODULE = 'explorimmo' + + FIELDS_ALL_HOUSINGS_LIST = [ + "id", "type", "advert_type", "house_type", "title", "location", + "utilities", "text", "area", "url" + ] + FIELDS_ANY_HOUSINGS_LIST = [ + "photos", "cost", "currency" + ] + FIELDS_ALL_SINGLE_HOUSING = [ + "id", "url", "type", "advert_type", "house_type", "title", "area", + "cost", "currency", "utilities", "date", "location", "text", "rooms", + "details" + ] + FIELDS_ANY_SINGLE_HOUSING = [ + "bedrooms", + "photos", + "DPE", + "GES", + "phone" + ] + + def test_explorimmo_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_explorimmo_sale(self): + query = Query() + query.area_min = 20 + query.type = POSTS_TYPES.SALE + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_explorimmo_furnished_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.FURNISHED_RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_explorimmo_viager(self): + query = Query() + query.type = POSTS_TYPES.VIAGER + query.cities = [] + for city in self.backend.search_city('85'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_explorimmo_personal(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.type = POSTS_TYPES.RENT + query.advert_types = [ADVERT_TYPES.PERSONAL] + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + self.assertEqual(len(results), 0) diff --git a/modules/foncia/__init__.py b/modules/foncia/__init__.py new file mode 100644 index 0000000..4b71602 --- /dev/null +++ b/modules/foncia/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Phyks (Lucas Verney) +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from __future__ import unicode_literals + + +from .module import FonciaModule + + +__all__ = ['FonciaModule'] diff --git a/modules/foncia/browser.py b/modules/foncia/browser.py new file mode 100644 index 0000000..ce12558 --- /dev/null +++ b/modules/foncia/browser.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Phyks (Lucas Verney) +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from __future__ import unicode_literals + + +from weboob.browser import PagesBrowser, URL + +from .constants import QUERY_TYPES +from .pages import CitiesPage, HousingPage, SearchPage, SearchResultsPage + + +class FonciaBrowser(PagesBrowser): + BASEURL = 'https://fr.foncia.com' + + cities = URL(r'/recherche/autocomplete\?term=(?P.+)', CitiesPage) + housing = URL(r'/(?P[^/]+)/.*\d+.htm', HousingPage) + search_results = URL(r'/(?P[^/]+)/.*', SearchResultsPage) + search = URL(r'/(?P.+)', SearchPage) + + def get_cities(self, pattern): + """ + Get cities matching a given pattern. + """ + return self.cities.open(term=pattern).iter_cities() + + def search_housings(self, query, cities): + """ + Search for housings matching given query. + """ + try: + query_type = QUERY_TYPES[query.type] + except KeyError: + return [] + + self.search.go(type=query_type).do_search(query, cities) + return self.page.iter_housings(query_type=query.type) + + def get_housing(self, housing): + """ + Get specific housing. + """ + query_type, housing = housing.split(':') + self.search.go(type=query_type).find_housing(query_type, housing) + return self.page.get_housing() diff --git a/modules/foncia/constants.py b/modules/foncia/constants.py new file mode 100644 index 0000000..404f2af --- /dev/null +++ b/modules/foncia/constants.py @@ -0,0 +1,24 @@ +from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES + +QUERY_TYPES = { + POSTS_TYPES.RENT: 'location', + POSTS_TYPES.SALE: 'achat', + POSTS_TYPES.FURNISHED_RENT: 'location' +} + +QUERY_HOUSE_TYPES = { + HOUSE_TYPES.APART: ['appartement', 'appartement-meuble'], + HOUSE_TYPES.HOUSE: ['maison'], + HOUSE_TYPES.PARKING: ['parking'], + HOUSE_TYPES.LAND: ['terrain'], + HOUSE_TYPES.OTHER: ['chambre', 'programme-neuf', + 'local-commercial', 'immeuble'] +} + +AVAILABLE_TYPES = { + POSTS_TYPES.RENT: ['appartement', 'maison', 'parking', 'chambre', + 'local-commercial'], + POSTS_TYPES.SALE: ['appartement', 'maison', 'parking', 'local-commercial', + 'terrain', 'immeuble', 'programme-neuf'], + POSTS_TYPES.FURNISHED_RENT: ['appartement-meuble'] +} diff --git a/modules/foncia/favicon.png b/modules/foncia/favicon.png new file mode 100644 index 0000000..bdda286 Binary files /dev/null and b/modules/foncia/favicon.png differ diff --git a/modules/foncia/module.py b/modules/foncia/module.py new file mode 100644 index 0000000..7364ab3 --- /dev/null +++ b/modules/foncia/module.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Phyks (Lucas Verney) +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from __future__ import unicode_literals + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import CapHousing, Housing, ADVERT_TYPES, HousingPhoto + +from .browser import FonciaBrowser + + +__all__ = ['FonciaModule'] + + +class FonciaModule(Module, CapHousing): + NAME = 'foncia' + DESCRIPTION = u'Foncia housing website.' + MAINTAINER = u'Phyks (Lucas Verney)' + EMAIL = 'phyks@phyks.me' + LICENSE = 'AGPLv3+' + VERSION = '2.1' + + BROWSER = FonciaBrowser + + def get_housing(self, housing): + return self.browser.get_housing(housing) + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + if ( + len(query.advert_types) == 1 and + query.advert_types[0] == ADVERT_TYPES.PERSONAL + ): + # Foncia is pro only + return list() + + cities = ','.join( + ['%s' % c.name for c in query.cities if c.backend == self.name] + ) + if len(cities) == 0: + return [] + + return self.browser.search_housings(query, cities) + + def fill_housing(self, housing, fields): + if len(fields) > 0: + self.browser.get_housing(housing) + return housing + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo} diff --git a/modules/foncia/pages.py b/modules/foncia/pages.py new file mode 100644 index 0000000..9c881e8 --- /dev/null +++ b/modules/foncia/pages.py @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Phyks (Lucas Verney) +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from __future__ import unicode_literals + +import datetime + +from weboob.browser.pages import JsonPage, HTMLPage, pagination +from weboob.browser.filters.standard import ( + CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError +) +from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.capabilities.housing import ( + City, Housing, HousingPhoto, + UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES +) +from weboob.tools.capabilities.housing.housing import PricePerMeterFilter + +from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES + + +class CitiesPage(JsonPage): + def iter_cities(self): + cities_list = self.doc + if isinstance(self.doc, dict): + cities_list = self.doc.values() + + for city in cities_list: + city_obj = City() + city_obj.id = city + city_obj.name = city + yield city_obj + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Format( + '%s:%s', + Env('type'), + Attr('//div[boolean(@data-property-reference)]', 'data-property-reference') + ) + obj_advert_type = ADVERT_TYPES.PROFESSIONAL + + def obj_type(self): + type = Env('type')(self) + if type == 'location': + if 'appartement-meuble' in self.page.url: + return POSTS_TYPES.FURNISHED_RENT + else: + return POSTS_TYPES.RENT + elif type == 'achat': + return POSTS_TYPES.SALE + else: + return NotAvailable + + def obj_url(self): + return self.page.url + + def obj_house_type(self): + url = self.obj_url() + for house_type, types in QUERY_HOUSE_TYPES.items(): + for type in types: + if ('/%s/' % type) in url: + return house_type + return NotAvailable + + obj_title = CleanText('//h1[has-class("OfferTop-title")]') + obj_area = CleanDecimal( + Regexp( + CleanText( + '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]' + ), + r'(\d*\.*\d*) .*', + default=NotAvailable + ), + default=NotAvailable + ) + obj_cost = CleanDecimal( + '//span[has-class("OfferTop-price")]', + default=NotAvailable + ) + obj_price_per_meter = PricePerMeterFilter() + obj_currency = Currency( + '//span[has-class("OfferTop-price")]' + ) + obj_location = Format( + '%s - %s', + CleanText('//p[@data-behat="adresseBien"]'), + CleanText('//p[has-class("OfferTop-loc")]') + ) + obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]') + obj_phone = Regexp( + Link( + '//a[has-class("OfferContact-btn--tel")]' + ), + r'tel:(.*)' + ) + + def obj_photos(self): + photos = [] + for photo in self.xpath('//div[has-class("OfferSlider")]//img'): + photo_url = Attr('.', 'src')(photo) + photo_url = photo_url.replace('640/480', '800/600') + photos.append(HousingPhoto(photo_url)) + return photos + + obj_date = datetime.date.today() + + def obj_utilities(self): + price = CleanText( + '//p[has-class("OfferTop-price")]' + )(self) + if "charges comprises" in price.lower(): + return UTILITIES.INCLUDED + else: + return UTILITIES.EXCLUDED + + obj_rooms = CleanDecimal( + '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]', + default=NotAvailable + ) + obj_bedrooms = CleanDecimal( + '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]', + default=NotAvailable + ) + + def obj_DPE(self): + try: + electric_consumption = CleanDecimal(Regexp( + Attr('//div[has-class("OfferDetails-content")]//img', 'src'), + r'https://dpe.foncia.net\/(\d+)\/.*' + ))(self) + except (RegexpError, XPathNotFound): + electric_consumption = None + + DPE = "" + if electric_consumption is not None: + if electric_consumption <= 50: + DPE = "A" + elif 50 < electric_consumption <= 90: + DPE = "B" + elif 90 < electric_consumption <= 150: + DPE = "C" + elif 150 < electric_consumption <= 230: + DPE = "D" + elif 230 < electric_consumption <= 330: + DPE = "E" + elif 330 < electric_consumption <= 450: + DPE = "F" + else: + DPE = "G" + return getattr(ENERGY_CLASS, DPE, NotAvailable) + return NotAvailable + + def obj_details(self): + details = {} + + dispo = Date( + Regexp( + CleanText('//p[has-class("OfferTop-dispo")]'), + r'.* (\d\d\/\d\d\/\d\d\d\d)', + default=datetime.date.today().isoformat() + ) + )(self) + if dispo is not None: + details["dispo"] = dispo + + priceMentions = CleanText( + '//p[has-class("OfferTop-mentions")]', + default=None + )(self) + if priceMentions is not None: + details["priceMentions"] = priceMentions + + agency = CleanText( + '//p[has-class("OfferContact-address")]', + default=None + )(self) + if agency is not None: + details["agency"] = agency + + for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'): + category = CleanText( + './h3[has-class("OfferDetails-title--2")]', + default=None + )(item) + if not category: + continue + + details[category] = {} + + for detail_item in item.xpath('.//ul[has-class("List--data")]/li'): + detail_title = CleanText('.//span[has-class("List-data")]')(detail_item) + detail_value = CleanText('.//*[has-class("List-value")]')(detail_item) + details[category][detail_title] = detail_value + + for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'): + detail_title = CleanText('.')(detail_item) + details[category][detail_title] = True + + try: + electric_consumption = CleanDecimal(Regexp( + Attr('//div[has-class("OfferDetails-content")]//img', 'src'), + r'https://dpe.foncia.net\/(\d+)\/.*' + ))(self) + details["electric_consumption"] = ( + '{} kWhEP/m².an'.format(electric_consumption) + ) + except (RegexpError, XPathNotFound): + pass + + return details + + +class SearchPage(HTMLPage): + def do_search(self, query, cities): + form = self.get_form('//form[@name="searchForm"]') + + form['searchForm[type]'] = QUERY_TYPES.get(query.type, None) + form['searchForm[localisation]'] = cities + form['searchForm[type_bien][]'] = [] + for house_type in query.house_types: + try: + form['searchForm[type_bien][]'].extend( + QUERY_HOUSE_TYPES[house_type] + ) + except KeyError: + pass + form['searchForm[type_bien][]'] = [ + x for x in form['searchForm[type_bien][]'] + if x in AVAILABLE_TYPES.get(query.type, []) + ] + if query.area_min: + form['searchForm[surface_min]'] = query.area_min + if query.area_max: + form['searchForm[surface_max]'] = query.area_max + if query.cost_min: + form['searchForm[prix_min]'] = query.cost_min + if query.cost_max: + form['searchForm[prix_max]'] = query.cost_max + if query.nb_rooms: + form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)] + form.submit() + + def find_housing(self, query_type, housing): + form = self.get_form('//form[@name="searchForm"]') + form['searchForm[type]'] = query_type + form['searchForm[reference]'] = housing + form.submit() + + +class SearchResultsPage(HTMLPage): + @pagination + @method + class iter_housings(ListElement): + item_xpath = '//article[has-class("TeaserOffer")]' + + next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]') + + class item(ItemElement): + klass = Housing + + obj_id = Format( + '%s:%s', + Env('type'), + Attr('.//span[boolean(@data-reference)]', 'data-reference') + ) + obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a') + obj_type = Env('query_type') + obj_advert_type = ADVERT_TYPES.PROFESSIONAL + + def obj_house_type(self): + url = self.obj_url(self) + for house_type, types in QUERY_HOUSE_TYPES.items(): + for type in types: + if ('/%s/' % type) in url: + return house_type + return NotLoaded + + obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a') + obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]') + obj_area = CleanDecimal( + Regexp( + CleanText( + './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]' + ), + r'(\d*\.*\d*) .*', + default=NotAvailable + ), + default=NotAvailable + ) + obj_cost = CleanDecimal( + './/strong[has-class("TeaserOffer-price-num")]', + default=NotAvailable + ) + obj_price_per_meter = PricePerMeterFilter() + obj_currency = Currency( + './/strong[has-class("TeaserOffer-price-num")]' + ) + obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]') + obj_text = CleanText('.//p[has-class("TeaserOffer-description")]') + + def obj_photos(self): + url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self) + # If the used photo is a default no photo, the src is on the same domain. + if url[0] == '/': + return [] + else: + return [HousingPhoto(url)] + + obj_date = datetime.date.today() + + def obj_utilities(self): + price = CleanText( + './/strong[has-class("TeaserOffer-price-num")]' + )(self) + if "charges comprises" in price.lower(): + return UTILITIES.INCLUDED + else: + return UTILITIES.EXCLUDED + + obj_rooms = CleanDecimal( + './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]', + default=NotLoaded + ) + obj_bedrooms = CleanDecimal( + './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]', + default=NotLoaded + ) + + def obj_details(self): + return { + "dispo": Date( + Attr('.//span[boolean(@data-dispo)]', 'data-dispo', + default=datetime.date.today().isoformat()) + )(self), + "priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self) + } diff --git a/modules/foncia/test.py b/modules/foncia/test.py new file mode 100644 index 0000000..1206523 --- /dev/null +++ b/modules/foncia/test.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Phyks (Lucas Verney) +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from __future__ import unicode_literals + +from weboob.capabilities.housing import ( + Query, POSTS_TYPES, ADVERT_TYPES +) +from weboob.tools.capabilities.housing.housing_test import HousingTest +from weboob.tools.test import BackendTest + + +class FonciaTest(BackendTest, HousingTest): + MODULE = 'foncia' + + FIELDS_ALL_HOUSINGS_LIST = [ + "id", "type", "advert_type", "house_type", "url", "title", "area", + "cost", "currency", "date", "location", "text", "details" + ] + FIELDS_ANY_HOUSINGS_LIST = [ + "photos", + "rooms" + ] + FIELDS_ALL_SINGLE_HOUSING = [ + "id", "url", "type", "advert_type", "house_type", "title", "area", + "cost", "currency", "utilities", "date", "location", "text", "phone", + "DPE", "details" + ] + FIELDS_ANY_SINGLE_HOUSING = [ + "bedrooms", + "photos", + "rooms" + ] + + def test_foncia_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_foncia_sale(self): + query = Query() + query.area_min = 20 + query.type = POSTS_TYPES.SALE + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_foncia_furnished_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.FURNISHED_RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_foncia_personal(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.type = POSTS_TYPES.RENT + query.advert_types = [ADVERT_TYPES.PERSONAL] + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + self.assertEqual(len(results), 0) diff --git a/modules/leboncoin/__init__.py b/modules/leboncoin/__init__.py new file mode 100644 index 0000000..2206442 --- /dev/null +++ b/modules/leboncoin/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from .module import LeboncoinModule + + +__all__ = ['LeboncoinModule'] diff --git a/modules/leboncoin/browser.py b/modules/leboncoin/browser.py new file mode 100644 index 0000000..fcfe6cb --- /dev/null +++ b/modules/leboncoin/browser.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from weboob.tools.json import json + +from weboob.browser import PagesBrowser, URL +from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES, + HOUSE_TYPES, ADVERT_TYPES) +from .pages import CityListPage, HousingListPage, HousingPage, PhonePage, HomePage + + +class LeboncoinBrowser(PagesBrowser): + BASEURL = 'https://www.leboncoin.fr/' + city = URL('ajax/location_list.html\?city=(?P.*)&zipcode=(?P.*)', CityListPage) + housing = URL('ventes_immobilieres/(?P<_id>.*).htm', HousingPage) + + home = URL('annonces/offres', HomePage) + api = URL('https://api.leboncoin.fr/finder/search', HousingListPage) + phone = URL('https://api.leboncoin.fr/api/utils/phonenumber.json', PhonePage) + + TYPES = {POSTS_TYPES.RENT: '10', + POSTS_TYPES.FURNISHED_RENT: '10', + POSTS_TYPES.SALE: '9', + POSTS_TYPES.SHARING: '11', } + + RET = {HOUSE_TYPES.HOUSE: '1', + HOUSE_TYPES.APART: '2', + HOUSE_TYPES.LAND: '3', + HOUSE_TYPES.PARKING: '4', + HOUSE_TYPES.OTHER: '5'} + + def __init__(self, *args, **kwargs): + super(LeboncoinBrowser, self).__init__(*args, **kwargs) + + def get_cities(self, pattern): + city = '' + zip_code = '' + if pattern.isdigit(): + zip_code = pattern + else: + city = pattern.replace(" ", "_") + + return self.city.go(city=city, zip=zip_code).get_cities() + + def search_housings(self, query, module_name): + + if query.type not in self.TYPES.keys(): + return TypeNotSupported() + + data = {} + data['filters'] = {} + data['filters']['category'] = {} + data['filters']['category']['id'] = self.TYPES.get(query.type) + data['filters']['enums'] = {} + data['filters']['enums']['ad_type'] = ['offer'] + + data['filters']['enums']['real_estate_type'] = [] + for t in query.house_types: + t = self.RET.get(t) + if t: + data['filters']['enums']['real_estate_type'].append(t) + + if query.type == POSTS_TYPES.FURNISHED_RENT: + data['filters']['enums']['furnished'] = ['1'] + elif query.type == POSTS_TYPES.RENT: + data['filters']['enums']['furnished'] = ['2'] + + data['filters']['keywords'] = {} + data['filters']['ranges'] = {} + + if query.cost_max or query.cost_min: + data['filters']['ranges']['price'] = {} + + if query.cost_max: + data['filters']['ranges']['price']['max'] = query.cost_max + + if query.cost_min: + data['filters']['ranges']['price']['min'] = query.cost_min + + if query.area_max or query.area_min: + data['filters']['ranges']['square'] = {} + if query.area_max: + data['filters']['ranges']['square']['max'] = query.area_max + + if query.area_min: + data['filters']['ranges']['square']['min'] = query.area_min + + if query.nb_rooms: + data['filters']['ranges']['rooms'] = {} + data['filters']['ranges']['rooms']['min'] = query.nb_rooms + + data['filters']['location'] = {} + data['filters']['location']['city_zipcodes'] = [] + + for c in query.cities: + if c.backend == module_name: + _c = c.id.split(' ') + __c = {} + __c['city'] = _c[0] + __c['zipcode'] = _c[1] + __c['label'] = c.name + + data['filters']['location']['city_zipcodes'].append(__c) + + if len(query.advert_types) == 1: + if query.advert_types[0] == ADVERT_TYPES.PERSONAL: + data['owner_type'] = 'private' + elif query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL: + data['owner_type'] = 'pro' + else: + data['owner_type'] = 'all' + + data['limit'] = 100 + data['limit_alu'] = 3 + data['offset'] = 0 + + self.session.headers.update({"api_key": self.home.go().get_api_key()}) + return self.api.go(data=json.dumps(data)).get_housing_list(query_type=query.type, data=data) + + def get_housing(self, _id, obj=None): + return self.housing.go(_id=_id).get_housing(obj=obj) + + def get_phone(self, _id): + api_key = self.housing.stay_or_go(_id=_id).get_api_key() + data = {'list_id': _id, + 'app_id': 'leboncoin_web_utils', + 'key': api_key, + 'text': 1, } + return self.phone.go(data=data).get_phone() diff --git a/modules/leboncoin/favicon.png b/modules/leboncoin/favicon.png new file mode 100644 index 0000000..bb70200 Binary files /dev/null and b/modules/leboncoin/favicon.png differ diff --git a/modules/leboncoin/module.py b/modules/leboncoin/module.py new file mode 100644 index 0000000..6a9a311 --- /dev/null +++ b/modules/leboncoin/module.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto) +from .browser import LeboncoinBrowser + + +__all__ = ['LeboncoinModule'] + + +class LeboncoinModule(Module, CapHousing): + NAME = 'leboncoin' + DESCRIPTION = u'search house on leboncoin website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '2.1' + + BROWSER = LeboncoinBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_housing(self, _id): + return self.browser.get_housing(_id) + + def fill_housing(self, housing, fields): + if 'phone' in fields: + housing.phone = self.browser.get_phone(housing.id) + fields.remove('phone') + + if len(fields) > 0: + self.browser.get_housing(housing.id, housing) + + return housing + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + return self.browser.search_housings(query, self.name) + + OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo} diff --git a/modules/leboncoin/pages.py b/modules/leboncoin/pages.py new file mode 100644 index 0000000..6079d23 --- /dev/null +++ b/modules/leboncoin/pages.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . +from __future__ import unicode_literals + +import requests + +from weboob.browser.pages import HTMLPage, JsonPage, pagination +from weboob.browser.elements import ItemElement, ListElement, method, DictElement +from weboob.capabilities.base import Currency as BaseCurrency +from weboob.browser.filters.standard import (CleanText, CleanDecimal, _Filter, + Env, DateTime, Format) +from weboob.browser.filters.json import Dict +from weboob.capabilities.housing import (City, Housing, HousingPhoto, + UTILITIES, ENERGY_CLASS, POSTS_TYPES, + ADVERT_TYPES, HOUSE_TYPES) +from weboob.capabilities.base import NotAvailable +from weboob.tools.capabilities.housing.housing import PricePerMeterFilter + +from decimal import Decimal +from lxml import etree +import json + + +class PopDetail(_Filter): + def __init__(self, name, default=NotAvailable): + super(PopDetail, self).__init__(default) + self.name = name + + def __call__(self, item): + return item.env['details'].pop(self.name, self.default) + + +class CityListPage(HTMLPage): + + def build_doc(self, content): + content = super(CityListPage, self).build_doc(content) + if content.getroot() is not None: + return content + return etree.Element("html") + + @method + class get_cities(ListElement): + item_xpath = '//li' + + class item(ItemElement): + klass = City + + obj_id = Format('%s %s', + CleanText('./span[has-class("city")]'), + CleanText('./span[@class="zipcode"]')) + + obj_name = Format('%s %s', + CleanText('./span[has-class("city")]'), + CleanText('./span[@class="zipcode"]')) + + +class HomePage(HTMLPage): + def __init__(self, *args, **kwargs): + HTMLPage.__init__(self, *args, **kwargs) + + add_content = CleanText('(//body/script)[4]', replace=[('window.FLUX_STATE = ', '')])(self.doc) or '{}' + api_content = CleanText('(//body/script[@id="__NEXT_DATA__"])')(self.doc) + + self.htmldoc = self.doc + self.api_content = json.loads(api_content) + self.doc = json.loads(add_content) + + def get_api_key(self): + return Dict('runtimeConfig/API/KEY')(self.api_content) + + +class HousingListPage(JsonPage): + + def __init__(self, *args, **kwargs): + JsonPage.__init__(self, *args, **kwargs) + if 'ads' not in self.doc: + self.doc['ads'] = [] + + @pagination + @method + class get_housing_list(DictElement): + item_xpath = 'ads' + + def next_page(self): + data = Env('data')(self) + if data['offset'] > self.page.doc['total_all']: + return + + data['offset'] = data['offset'] + data['limit'] + return requests.Request("POST", self.page.url, data=json.dumps(data)) + + class item(ItemElement): + klass = Housing + + def parse(self, el): + self.env['details'] = {obj['key']: obj['value_label'] for obj in self.el['attributes']} + + obj_id = Dict('list_id') + obj_url = Dict('url') + obj_type = Env('query_type') + + obj_area = CleanDecimal(PopDetail('square', + default=0), + default=NotAvailable) + obj_rooms = CleanDecimal(PopDetail('rooms', + default=0), + default=NotAvailable) + + def obj_GES(self): + ges = CleanText(PopDetail('ges', default='|'))(self) + return getattr(ENERGY_CLASS, ges[0], NotAvailable) + + def obj_DPE(self): + dpe = CleanText(PopDetail('energy_rate', default='|'))(self) + return getattr(ENERGY_CLASS, dpe[0], NotAvailable) + + def obj_house_type(self): + value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower() + if value == 'parking': + return HOUSE_TYPES.PARKING + elif value == 'appartement': + return HOUSE_TYPES.APART + elif value == 'maison': + return HOUSE_TYPES.HOUSE + elif value == 'terrain': + return HOUSE_TYPES.LAND + else: + return HOUSE_TYPES.OTHER + + def obj_utilities(self): + value = CleanText(PopDetail('charges_included', + default='Non'), + default=NotAvailable)(self) + if value == "Oui": + return UTILITIES.INCLUDED + else: + return UTILITIES.EXCLUDED + + def obj_advert_type(self): + line_pro = Dict('owner/type')(self) + if line_pro == u'pro': + return ADVERT_TYPES.PROFESSIONAL + else: + return ADVERT_TYPES.PERSONAL + + obj_title = Dict('subject') + obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0)) + obj_currency = BaseCurrency.get_currency(u'€') + obj_text = Dict('body') + obj_location = Dict('location/city_label') + obj_date = DateTime(Dict('first_publication_date')) + + def obj_photos(self): + photos = [] + for img in Dict('images/urls_large', default=[])(self): + photos.append(HousingPhoto(img)) + return photos + + def obj_type(self): + try: + breadcrumb = int(Dict('category_id')(self)) + except ValueError: + breadcrumb = None + + if breadcrumb == 11: + return POSTS_TYPES.SHARING + elif breadcrumb == 10: + + isFurnished = CleanText(PopDetail('furnished', default=' '))(self) + + if isFurnished.lower() == u'meublé': + return POSTS_TYPES.FURNISHED_RENT + else: + return POSTS_TYPES.RENT + else: + return POSTS_TYPES.SALE + + obj_price_per_meter = PricePerMeterFilter() + obj_details = Env('details') + + +class HousingPage(HomePage): + def __init__(self, *args, **kwargs): + HomePage.__init__(self, *args, **kwargs) + self.doc = self.api_content["props"]["pageProps"]["ad"] + + def get_api_key(self): + return Dict('runtimeConfig/API/KEY_JSON')(self.api_content) + + @method + class get_housing(ItemElement): + klass = Housing + + def parse(self, el): + self.env['details'] = {obj['key']: obj['value_label'] for obj in el['attributes']} + + obj_id = Env('_id') + + obj_area = CleanDecimal(PopDetail('square', + default=0), + default=NotAvailable) + obj_rooms = CleanDecimal(PopDetail('rooms', + default=0), + default=NotAvailable) + + def obj_GES(self): + ges = CleanText(PopDetail('ges', default='|'))(self) + return getattr(ENERGY_CLASS, ges[0], NotAvailable) + + def obj_DPE(self): + dpe = CleanText(PopDetail('energy_rate', default='|'))(self) + return getattr(ENERGY_CLASS, dpe[0], NotAvailable) + + def obj_house_type(self): + value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower() + if value == 'parking': + return HOUSE_TYPES.PARKING + elif value == 'appartement': + return HOUSE_TYPES.APART + elif value == 'maison': + return HOUSE_TYPES.HOUSE + elif value == 'terrain': + return HOUSE_TYPES.LAND + else: + return HOUSE_TYPES.OTHER + + def obj_utilities(self): + value = CleanText(PopDetail('charges_included', + default='Non'), + default=NotAvailable)(self) + if value == "Oui": + return UTILITIES.INCLUDED + else: + return UTILITIES.EXCLUDED + + obj_title = Dict('subject') + obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0)) + obj_currency = BaseCurrency.get_currency(u'€') + obj_text = Dict('body') + obj_location = Dict('location/city_label') + + def obj_advert_type(self): + line_pro = Dict('owner/type')(self) + if line_pro == u'pro': + return ADVERT_TYPES.PROFESSIONAL + else: + return ADVERT_TYPES.PERSONAL + + obj_date = DateTime(Dict('first_publication_date')) + + def obj_photos(self): + photos = [] + for img in Dict('images/urls_large', default=[])(self): + photos.append(HousingPhoto(img)) + return photos + + def obj_type(self): + try: + breadcrumb = int(Dict('category_id')(self)) + except ValueError: + breadcrumb = None + + if breadcrumb == 11: + return POSTS_TYPES.SHARING + elif breadcrumb == 10: + + isFurnished = CleanText(PopDetail('furnished', default=' '))(self) + + if isFurnished.lower() == u'meublé': + return POSTS_TYPES.FURNISHED_RENT + else: + return POSTS_TYPES.RENT + else: + return POSTS_TYPES.SALE + + obj_price_per_meter = PricePerMeterFilter() + obj_url = Dict('url') + obj_details = Env('details') + + +class PhonePage(JsonPage): + def get_phone(self): + if Dict('utils/status')(self.doc) == u'OK': + return Dict('utils/phonenumber')(self.doc) + return NotAvailable diff --git a/modules/leboncoin/test.py b/modules/leboncoin/test.py new file mode 100644 index 0000000..6367705 --- /dev/null +++ b/modules/leboncoin/test.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from weboob.tools.test import BackendTest +from weboob.tools.value import Value +from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES +from weboob.tools.capabilities.housing.housing_test import HousingTest + + +class LeboncoinTest(BackendTest, HousingTest): + MODULE = 'leboncoin' + + FIELDS_ALL_HOUSINGS_LIST = [ + "id", "type", "advert_type", "url", "title", + "currency", "utilities", "date", "location", "text" + ] + FIELDS_ANY_HOUSINGS_LIST = [ + "area", + "cost", + "price_per_meter", + "photos" + ] + FIELDS_ALL_SINGLE_HOUSING = [ + "id", "url", "type", "advert_type", "house_type", "title", + "cost", "currency", "utilities", "date", "location", "text", + "rooms", "details" + ] + FIELDS_ANY_SINGLE_HOUSING = [ + "area", + "GES", + "DPE", + "photos", + # Don't test phone as leboncoin API is strongly rate-limited + ] + + def setUp(self): + if not self.is_backend_configured(): + self.backend.config['advert_type'] = Value(value='a') + self.backend.config['region'] = Value(value='ile_de_france') + + def test_leboncoin_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_leboncoin_sale(self): + query = Query() + query.area_min = 20 + query.type = POSTS_TYPES.SALE + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_leboncoin_furnished_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.FURNISHED_RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_leboncoin_professional(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.type = POSTS_TYPES.RENT + query.advert_types = [ADVERT_TYPES.PROFESSIONAL] + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) diff --git a/modules/logicimmo/__init__.py b/modules/logicimmo/__init__.py new file mode 100644 index 0000000..b052736 --- /dev/null +++ b/modules/logicimmo/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from .module import LogicimmoModule + + +__all__ = ['LogicimmoModule'] diff --git a/modules/logicimmo/browser.py b/modules/logicimmo/browser.py new file mode 100644 index 0000000..ab8d2fb --- /dev/null +++ b/modules/logicimmo/browser.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from weboob.browser import PagesBrowser, URL +from weboob.browser.profiles import Firefox +from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES, + HOUSE_TYPES) +from .pages import CitiesPage, SearchPage, HousingPage, PhonePage + + +class LogicimmoBrowser(PagesBrowser): + BASEURL = 'https://www.logic-immo.com/' + PROFILE = Firefox() + city = URL('asset/t9/getLocalityT9.php\?site=fr&lang=fr&json=%22(?P.*)%22', + CitiesPage) + search = URL('(?Plocation-immobilier|vente-immobilier|recherche-colocation)-(?P.*)/options/(?P.*)', SearchPage) + housing = URL('detail-(?P<_id>.*).htm', HousingPage) + phone = URL('(?P.*)', PhonePage) + + TYPES = {POSTS_TYPES.RENT: 'location-immobilier', + POSTS_TYPES.SALE: 'vente-immobilier', + POSTS_TYPES.SHARING: 'recherche-colocation', + POSTS_TYPES.FURNISHED_RENT: 'location-immobilier', + POSTS_TYPES.VIAGER: 'vente-immobilier'} + + RET = {HOUSE_TYPES.HOUSE: '2', + HOUSE_TYPES.APART: '1', + HOUSE_TYPES.LAND: '3', + HOUSE_TYPES.PARKING: '10', + HOUSE_TYPES.OTHER: '14'} + + def __init__(self, *args, **kwargs): + super(LogicimmoBrowser, self).__init__(*args, **kwargs) + self.session.headers['X-Requested-With'] = 'XMLHttpRequest' + + def get_cities(self, pattern): + if pattern: + return self.city.go(pattern=pattern).get_cities() + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): + if type not in self.TYPES: + raise TypeNotSupported() + + options = [] + + ret = [] + if type == POSTS_TYPES.VIAGER: + ret = ['15'] + else: + for house_type in house_types: + if house_type in self.RET: + ret.append(self.RET.get(house_type)) + + if len(ret): + options.append('groupprptypesids=%s' % ','.join(ret)) + + if type == POSTS_TYPES.FURNISHED_RENT: + options.append('searchoptions=4') + + options.append('pricemin=%s' % (cost_min if cost_min else '0')) + + if cost_max: + options.append('pricemax=%s' % cost_max) + + options.append('areamin=%s' % (area_min if area_min else '0')) + + if area_max: + options.append('areamax=%s' % area_max) + + if nb_rooms: + if type == POSTS_TYPES.SHARING: + options.append('nbbedrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)])) + else: + options.append('nbrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)])) + + self.search.go(type=self.TYPES.get(type, 'location-immobilier'), + cities=cities, + options='/'.join(options)) + + if type == POSTS_TYPES.SHARING: + return self.page.iter_sharing() + + return self.page.iter_housings(query_type=type) + + def get_housing(self, _id, housing=None): + return self.housing.go(_id=_id).get_housing(obj=housing) + + def get_phone(self, _id): + if _id.startswith('location') or _id.startswith('vente'): + urlcontact, params = self.housing.stay_or_go(_id=_id).get_phone_url_datas() + return self.phone.go(urlcontact=urlcontact, params=params).get_phone() diff --git a/modules/logicimmo/favicon.png b/modules/logicimmo/favicon.png new file mode 100644 index 0000000..bd25006 Binary files /dev/null and b/modules/logicimmo/favicon.png differ diff --git a/modules/logicimmo/module.py b/modules/logicimmo/module.py new file mode 100644 index 0000000..c714ed3 --- /dev/null +++ b/modules/logicimmo/module.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto, + ADVERT_TYPES) +from weboob.capabilities.base import UserError +from .browser import LogicimmoBrowser + + +__all__ = ['LogicimmoModule'] + + +class LogicImmoCitiesError(UserError): + """ + Raised when more than 3 cities are selected + """ + def __init__(self, msg='You cannot select more than three cities'): + UserError.__init__(self, msg) + + +class LogicimmoModule(Module, CapHousing): + NAME = 'logicimmo' + DESCRIPTION = u'logicimmo website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '2.1' + + BROWSER = LogicimmoBrowser + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + housing = None + housing = self.browser.get_housing(id, housing) + return housing + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + if(len(query.advert_types) == 1 and + query.advert_types[0] == ADVERT_TYPES.PERSONAL): + # Logic-immo is pro only + return list() + + cities_names = ['%s' % c.name.replace(' ', '-') for c in query.cities if c.backend == self.name] + cities_ids = ['%s' % c.id for c in query.cities if c.backend == self.name] + + if len(cities_names) == 0: + return list() + + if len(cities_names) > 3: + raise LogicImmoCitiesError() + + cities = ','.join(cities_names + cities_ids) + return self.browser.search_housings(query.type, cities.lower(), query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types) + + def fill_housing(self, housing, fields): + if 'phone' in fields: + housing.phone = self.browser.get_phone(housing.id) + fields.remove('phone') + + if len(fields) > 0: + self.browser.get_housing(housing.id, housing) + + return housing + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + OBJECTS = {Housing: fill_housing, + HousingPhoto: fill_photo, + } diff --git a/modules/logicimmo/pages.py b/modules/logicimmo/pages.py new file mode 100644 index 0000000..403ccde --- /dev/null +++ b/modules/logicimmo/pages.py @@ -0,0 +1,377 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from __future__ import unicode_literals + +from weboob.browser.pages import HTMLPage, JsonPage +from weboob.browser.elements import ItemElement, ListElement, DictElement, method +from weboob.browser.filters.json import Dict +from weboob.browser.filters.standard import (Currency, Format, CleanText, + Regexp, CleanDecimal, Date, Env, + BrowserURL) +from weboob.browser.filters.html import Attr, XPath, CleanHTML +from weboob.capabilities.housing import (Housing, HousingPhoto, City, + UTILITIES, ENERGY_CLASS, POSTS_TYPES, + ADVERT_TYPES, HOUSE_TYPES) +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.capabilities.housing.housing import PricePerMeterFilter +from weboob.tools.compat import urljoin + + +class CitiesPage(JsonPage): + @method + class get_cities(DictElement): + item_xpath = '*/children' + + class item(ItemElement): + klass = City + + def condition(self): + return Dict('lct_parent_id')(self) != '0' + + obj_id = Format('%s_%s', Dict('lct_id'), Dict('lct_level')) + obj_name = Format('%s %s', Dict('lct_name'), Dict('lct_post_code')) + + +class PhonePage(HTMLPage): + def get_phone(self): + return CleanText('//div[has-class("phone")]', children=False)(self.doc) + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Env('_id') + + def obj_type(self): + url = BrowserURL('housing', _id=Env('_id'))(self) + if 'colocation' in url: + return POSTS_TYPES.SHARING + elif 'location' in url: + isFurnished = False + for li in XPath('//ul[@itemprop="description"]/li')(self): + label = CleanText('./span[has-class("criteria-label")]')(li) + if label.lower() == "meublé": + isFurnished = ( + CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui' + ) + if isFurnished: + return POSTS_TYPES.FURNISHED_RENT + else: + return POSTS_TYPES.RENT + elif 'vente' in url: + return POSTS_TYPES.SALE + return NotAvailable + obj_advert_type = ADVERT_TYPES.PROFESSIONAL + + def obj_house_type(self): + house_type = CleanText('.//h2[@class="offerMainFeatures"]/div')(self).lower() + if house_type == "appartement": + return HOUSE_TYPES.APART + elif house_type == "maison": + return HOUSE_TYPES.HOUSE + elif house_type == "terrain": + return HOUSE_TYPES.LAND + elif house_type == "parking": + return HOUSE_TYPES.PARKING + else: + return HOUSE_TYPES.OTHER + + obj_title = Attr('//meta[@property="og:title"]', 'content') + obj_area = CleanDecimal( + CleanText( + '//p[@class="offerArea"]/span', + ), + default=NotAvailable + ) + obj_rooms = CleanDecimal( + Regexp( + CleanText('//p[@class="offerRooms"]/span'), + '(\d) p.', + default=NotAvailable + ), + default=NotAvailable + ) + obj_bedrooms = CleanDecimal( + Regexp( + CleanText('//p[@class="offerRooms"]/span'), + '(\d) ch.', + default=NotAvailable + ), + default=NotAvailable + ) + obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0) + obj_currency = Currency( + '//*[@itemprop="price"]' + ) + + def obj_utilities(self): + notes = CleanText('//p[@class="offer-description-notes"]')(self) + if "Loyer mensuel charges comprises" in notes: + return UTILITIES.INCLUDED + else: + return UTILITIES.UNKNOWN + + obj_price_per_meter = PricePerMeterFilter() + obj_date = Date(Regexp(CleanText('//div[@class="offer-description-notes"]'), + u'.* Mis à jour: (\d{2}/\d{2}/\d{4}).*'), + dayfirst=True) + obj_text = CleanHTML('//p[@class="descrProperty"]') + obj_location = CleanText('//em[@class="infoAdresse"]') + obj_station = CleanText( + '//div[has-class("offer-description-metro")]', + default=NotAvailable + ) + + obj_url = BrowserURL('housing', _id=Env('_id')) + + def obj_photos(self): + photos = [] + for img in XPath('//ul[@class="thumbsContainer"]//img/@src')(self): + if img.endswith('.svg'): + continue + url = u'%s' % img.replace('182x136', '800x600') + url = urljoin(self.page.url, url) # Ensure URL is absolute + photos.append(HousingPhoto(url)) + return photos + + def obj_DPE(self): + energy_value = CleanText( + '//ul[@class="energyInfosDPE"]//li[@class="energyInfos"]/span/@data-class', + default="" + )(self) + if len(energy_value): + energy_value = energy_value.replace("DPE", "").strip()[0] + return getattr(ENERGY_CLASS, energy_value, NotAvailable) + + def obj_GES(self): + greenhouse_value = CleanText( + '//ul[@class="energyInfosGES"]//li[@class="energyInfos"]/span/@data-class', + default="" + )(self) + if len(greenhouse_value): + greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] + return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable) + + def obj_details(self): + details = {} + + details["creationDate"] = Date( + Regexp( + CleanText( + '//div[@class="offer-description-notes"]' + ), + u'.*Mis en ligne: (\d{2}/\d{2}/\d{4}).*' + ), + dayfirst=True + )(self) + + honoraires = CleanText( + ( + '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]' + ), + default=None + )(self) + if honoraires: + details["Honoraires"] = ( + "{} (TTC, en sus)".format( + honoraires.split(":")[1].strip() + ) + ) + + for li in XPath('//ul[@itemprop="description"]/li')(self): + label = CleanText('./span[has-class("criteria-label")]')(li) + value = CleanText('./span[has-class("criteria-value")]')(li) + details[label] = value + + return details + + def get_phone_url_datas(self): + a = XPath('//button[has-class("js-show-phone-offer-sale-bottom")]')(self.doc)[0] + urlcontact = 'http://www.logic-immo.com/modalMail' + params = {} + params['universe'] = CleanText('./@data-univers')(a) + params['source'] = CleanText('./@data-source')(a) + params['pushcontact'] = CleanText('./@data-pushcontact')(a) + params['mapper'] = CleanText('./@data-mapper')(a) + params['offerid'] = CleanText('./@data-offerid')(a) + params['offerflag'] = CleanText('./@data-offerflag')(a) + params['campaign'] = CleanText('./@data-campaign')(a) + params['xtpage'] = CleanText('./@data-xtpage')(a) + params['offertransactiontype'] = CleanText('./@data-offertransactiontype')(a) + params['aeisource'] = CleanText('./@data-aeisource')(a) + params['shownumber'] = CleanText('./@data-shownumber')(a) + params['corail'] = 1 + return urlcontact, params + + +class SearchPage(HTMLPage): + @method + class iter_sharing(ListElement): + item_xpath = '//article[has-class("offer-block")]' + + class item(ItemElement): + klass = Housing + + obj_id = Format('colocation-%s', CleanText('./div/header/@id', replace=[('header-offer-', '')])) + obj_type = POSTS_TYPES.SHARING + obj_advert_type = ADVERT_TYPES.PROFESSIONAL + obj_title = CleanText(CleanHTML('./div/header/section/p[@class="property-type"]/span/@title')) + + obj_area = CleanDecimal('./div/header/section/p[@class="offer-attributes"]/a/span[@class="offer-area-number"]', + default=0) + + obj_cost = CleanDecimal('./div/header/section/p[@class="price"]', default=0) + obj_currency = Currency( + './div/header/section/p[@class="price"]' + ) + obj_utilities = UTILITIES.UNKNOWN + + obj_text = CleanText( + './div/div[@class="content-offer"]/section[has-class("content-desc")]/p/span[has-class("offer-text")]/@title', + default=NotLoaded + ) + + obj_date = Date(Regexp(CleanText('./div/header/section/p[has-class("update-date")]'), + ".*(\d{2}/\d{2}/\d{4}).*")) + + obj_location = CleanText( + '(./div/div[@class="content-offer"]/section[has-class("content-desc")]/p)[1]/span/@title', + default=NotLoaded + ) + + @method + class iter_housings(ListElement): + item_xpath = '//div[has-class("offer-list")]//div[has-class("offer-block")]' + + class item(ItemElement): + offer_details_wrapper = ( + './/div[has-class("offer-details-wrapper")]' + ) + klass = Housing + + obj_id = Format( + '%s-%s', + Regexp(Env('type'), '(.*)-.*'), + CleanText('./@id', replace=[('header-offer-', '')]) + ) + obj_type = Env('query_type') + obj_advert_type = ADVERT_TYPES.PROFESSIONAL + + def obj_house_type(self): + house_type = CleanText('.//div[has-class("offer-details-caracteristik")]/meta[@itemprop="name"]/@content')(self).lower() + if house_type == "appartement": + return HOUSE_TYPES.APART + elif house_type == "maison": + return HOUSE_TYPES.HOUSE + elif house_type == "terrain": + return HOUSE_TYPES.LAND + elif house_type == "parking": + return HOUSE_TYPES.PARKING + else: + return HOUSE_TYPES.OTHER + + obj_title = CleanText('.//div[has-class("offer-details-type")]/a/@title') + + obj_url = Format(u'%s%s', + CleanText('.//div/a[@class="offer-link"]/@href'), + CleanText('.//div/a[@class="offer-link"]/\ +@data-orpi', default="")) + + obj_area = CleanDecimal( + ( + offer_details_wrapper + + '/div/div/div[has-class("offer-details-second")]' + + '/div/h3[has-class("offer-attributes")]/span' + + '/span[has-class("offer-area-number")]' + ), + default=NotLoaded + ) + obj_rooms = CleanDecimal( + ( + offer_details_wrapper + + '/div/div/div[has-class("offer-details-second")]' + + '/div/h3[has-class("offer-attributes")]' + + '/span[has-class("offer-rooms")]' + + '/span[has-class("offer-rooms-number")]' + ), + default=NotAvailable + ) + obj_cost = CleanDecimal( + Regexp( + CleanText( + ( + offer_details_wrapper + + '/div/p[@class="offer-price"]/span' + ), + default=NotLoaded + ), + '(.*) [%s%s%s]' % (u'€', u'$', u'£'), + default=NotLoaded + ), + default=NotLoaded + ) + obj_currency = Currency( + offer_details_wrapper + '/div/p[has-class("offer-price")]/span' + ) + obj_price_per_meter = PricePerMeterFilter() + obj_utilities = UTILITIES.UNKNOWN + obj_text = CleanText( + offer_details_wrapper + '/div/div/div/p[has-class("offer-description")]/span' + ) + obj_location = CleanText( + offer_details_wrapper + '/div[@class="offer-details-location"]', + replace=[('Voir sur la carte','')] + ) + + def obj_photos(self): + photos = [] + url = None + try: + url = Attr( + './/div[has-class("offer-picture")]//img', + 'src' + )(self) + except: + pass + + if url: + url = url.replace('335x253', '800x600') + url = urljoin(self.page.url, url) # Ensure URL is absolute + photos.append(HousingPhoto(url)) + return photos + + def obj_details(self): + details = {} + honoraires = CleanText( + ( + self.offer_details_wrapper + + '/div/div/p[@class="offer-agency-fees"]' + ), + default=None + )(self) + if honoraires: + details["Honoraires"] = ( + "{} (TTC, en sus)".format( + honoraires.split(":")[1].strip() + ) + ) + return details diff --git a/modules/logicimmo/test.py b/modules/logicimmo/test.py new file mode 100644 index 0000000..46f9ef3 --- /dev/null +++ b/modules/logicimmo/test.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES +from weboob.tools.test import BackendTest +from weboob.tools.capabilities.housing.housing_test import HousingTest + + +class LogicimmoTest(BackendTest, HousingTest): + MODULE = 'logicimmo' + + FIELDS_ALL_HOUSINGS_LIST = [ + "id", "type", "advert_type", "house_type", "url", "title", "area", + "cost", "currency", "utilities", "date", "location", "text", + "details", "rooms" + ] + FIELDS_ANY_HOUSINGS_LIST = [ + "photos", + ] + FIELDS_ALL_SINGLE_HOUSING = [ + "id", "url", "type", "advert_type", "house_type", "title", "area", + "cost", "currency", "utilities", "date", "location", "text", + "phone", "details" + ] + FIELDS_ANY_SINGLE_HOUSING = [ + "photos", + "station", + "rooms", + "phone", + "DPE", + "GES" + ] + DO_NOT_DISTINGUISH_FURNISHED_RENT = True + + def test_logicimmo_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_logicimmo_sale(self): + query = Query() + query.area_min = 20 + query.type = POSTS_TYPES.SALE + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_logicimmo_furnished_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.FURNISHED_RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_logicimmo_viager(self): + query = Query() + query.type = POSTS_TYPES.VIAGER + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + if len(query.cities) == 3: + break + self.check_against_query(query) + + def test_logicimmo_personal(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.type = POSTS_TYPES.RENT + query.advert_types = [ADVERT_TYPES.PERSONAL] + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + self.assertEqual(len(results), 0) diff --git a/modules/pap/__init__.py b/modules/pap/__init__.py new file mode 100644 index 0000000..efb6685 --- /dev/null +++ b/modules/pap/__init__.py @@ -0,0 +1,3 @@ +from .module import PapModule + +__all__ = ['PapModule'] diff --git a/modules/pap/browser.py b/modules/pap/browser.py new file mode 100644 index 0000000..9d23f95 --- /dev/null +++ b/modules/pap/browser.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from weboob.browser import PagesBrowser, URL +from weboob.capabilities.housing import TypeNotSupported, POSTS_TYPES +from weboob.tools.compat import urlencode + +from .pages import HousingPage, CitiesPage +from .constants import TYPES, RET + + +__all__ = ['PapBrowser'] + + +class PapBrowser(PagesBrowser): + + BASEURL = 'https://www.pap.fr' + housing = URL('/annonces/(?P<_id>.*)', HousingPage) + search_page = URL('/recherche') + search_result_page = URL('/annonce/.*', HousingPage) + cities = URL('/json/ac-geo\?q=(?P.*)', CitiesPage) + + def search_geo(self, pattern): + return self.cities.open(pattern=pattern).iter_cities() + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): + + if type not in TYPES: + raise TypeNotSupported() + + self.session.headers.update({'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'}) + + data = {'geo_objets_ids': ','.join(cities), + 'surface[min]': area_min or '', + 'surface[max]': area_max or '', + 'prix[min]': cost_min or '', + 'prix[max]': cost_max or '', + 'produit': TYPES.get(type, 'location'), + 'nb_resultats_par_page': 40, + 'action': 'submit' + } + + if nb_rooms: + data['nb_pieces[min]'] = nb_rooms + data['nb_pieces[max]'] = nb_rooms + + if type == POSTS_TYPES.FURNISHED_RENT: + data['tags[]'] = 'meuble' + + ret = [] + if type == POSTS_TYPES.VIAGER: + ret = ['viager'] + else: + for house_type in house_types: + if house_type in RET: + ret.append(RET.get(house_type)) + + _data = '%s%s%s' % (urlencode(data), '&typesbien%5B%5D=', '&typesbien%5B%5D='.join(ret)) + return self.search_page.go(data=_data).iter_housings( + query_type=type + ) + + def get_housing(self, _id, housing=None): + return self.housing.go(_id=_id).get_housing(obj=housing) diff --git a/modules/pap/constants.py b/modules/pap/constants.py new file mode 100644 index 0000000..83795f7 --- /dev/null +++ b/modules/pap/constants.py @@ -0,0 +1,12 @@ +from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES + +TYPES = {POSTS_TYPES.RENT: 'location', + POSTS_TYPES.FURNISHED_RENT: 'location', + POSTS_TYPES.SALE: 'vente', + POSTS_TYPES.VIAGER: 'vente'} + +RET = {HOUSE_TYPES.HOUSE: 'maison', + HOUSE_TYPES.APART: 'appartement', + HOUSE_TYPES.LAND: 'terrain', + HOUSE_TYPES.PARKING: 'garage-parking', + HOUSE_TYPES.OTHER: 'divers'} diff --git a/modules/pap/favicon.png b/modules/pap/favicon.png new file mode 100644 index 0000000..a4c7a50 Binary files /dev/null and b/modules/pap/favicon.png differ diff --git a/modules/pap/module.py b/modules/pap/module.py new file mode 100644 index 0000000..89f0887 --- /dev/null +++ b/modules/pap/module.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + + +from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto, + ADVERT_TYPES) +from weboob.tools.backend import Module + +from .browser import PapBrowser + + +__all__ = ['PapModule'] + + +class PapModule(Module, CapHousing): + NAME = 'pap' + MAINTAINER = u'Romain Bignon' + EMAIL = 'romain@weboob.org' + VERSION = '2.1' + DESCRIPTION = 'French housing website' + LICENSE = 'AGPLv3+' + BROWSER = PapBrowser + + def search_housings(self, query): + if(len(query.advert_types) == 1 and + query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL): + # Pap is personal only + return list() + + cities = ['%s' % c.id for c in query.cities if c.backend == self.name] + if len(cities) == 0: + return list() + + return self.browser.search_housings(query.type, cities, query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types) + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + housing = None + + return self.browser.get_housing(id, housing) + + def search_city(self, pattern): + return self.browser.search_geo(pattern) + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + def fill_housing(self, housing, fields): + return self.browser.get_housing(housing.id, housing) + + OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing} diff --git a/modules/pap/pages.py b/modules/pap/pages.py new file mode 100644 index 0000000..1a137fe --- /dev/null +++ b/modules/pap/pages.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . +from __future__ import unicode_literals +from decimal import Decimal + +from weboob.tools.date import parse_french_date +from weboob.browser.pages import HTMLPage, JsonPage, pagination +from weboob.browser.elements import ItemElement, ListElement, DictElement, method +from weboob.browser.filters.standard import (CleanText, CleanDecimal, Regexp, + Env, BrowserURL, Format, Currency) +from weboob.browser.filters.html import Attr, Link, XPath, CleanHTML +from weboob.browser.filters.json import Dict +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.capabilities.housing import (Housing, City, HousingPhoto, + UTILITIES, ENERGY_CLASS, POSTS_TYPES, + ADVERT_TYPES, HOUSE_TYPES) +from weboob.tools.capabilities.housing.housing import PricePerMeterFilter + + +class CitiesPage(JsonPage): + @method + class iter_cities(DictElement): + + class item(ItemElement): + klass = City + + obj_id = Dict('id') + obj_name = Dict('name') + + +class HousingPage(HTMLPage): + @pagination + @method + class iter_housings(ListElement): + item_xpath = '//div[has-class("search-list-item-alt")]' + + def next_page(self): + return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self) + + class item(ItemElement): + klass = Housing + + def condition(self): + title = self.obj_title(self) + isNotFurnishedOk = True + if self.env['query_type'] == POSTS_TYPES.RENT: + isNotFurnishedOk = 'meublé' not in title.lower() + return ( + Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and + isNotFurnishedOk + ) + + def parse(self, el): + rooms_bedrooms_area = el.xpath( + './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li' + ) + self.env['rooms'] = NotLoaded + self.env['bedrooms'] = NotLoaded + self.env['area'] = NotLoaded + + for item in rooms_bedrooms_area: + name = CleanText('.')(item) + if 'chambre' in name.lower(): + name = 'bedrooms' + value = CleanDecimal('.')(item) + elif 'pièce' in name.lower(): + name = 'rooms' + value = CleanDecimal('.')(item) + else: + name = 'area' + value = CleanDecimal( + Regexp( + CleanText( + '.' + ), + r'(\d*\.*\d*) .*' + ) + )(item) + self.env[name] = value + + obj_id = Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)') + obj_type = Env('query_type') + obj_advert_type = ADVERT_TYPES.PERSONAL + + def obj_house_type(self): + item_link = Link('./div/a[@class="item-title"]')(self) + house_type = item_link.split('/')[-1].split('-')[0] + if 'parking' in house_type: + return HOUSE_TYPES.PARKING + elif 'appartement' in house_type: + return HOUSE_TYPES.APART + elif 'terrain' in house_type: + return HOUSE_TYPES.LAND + elif 'maison' in house_type: + return HOUSE_TYPES.HOUSE + else: + return HOUSE_TYPES.OTHER + + obj_title = CleanText('./div/a[has-class("item-title")]') + obj_area = Env('area') + obj_cost = CleanDecimal(CleanText('./div/a[has-class("item-title")]/span[@class="item-price"]'), + replace_dots=True, default=Decimal(0)) + obj_currency = Currency( + './div/a[@class="item-title"]/span[@class="item-price"]' + ) + obj_utilities = UTILITIES.UNKNOWN + + obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded) + + def obj_location(self): + return CleanText('./div/p[@class="item-description"]')(self).split(".")[0] + + obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')]) + obj_rooms = Env('rooms') + obj_bedrooms = Env('bedrooms') + obj_price_per_meter = PricePerMeterFilter() + + obj_url = Format( + u'http://www.pap.fr%s', + Link('./div/a[@class="item-title"]') + ) + + def obj_photos(self): + photos = [] + for img in XPath('./a/img/@src')(self): + if( + img.endswith("visuel-nophoto.png") or + img.endswith('miniature-video.png') + ): + continue + photos.append(HousingPhoto(u'%s' % img)) + return photos + + @method + class get_housing(ItemElement): + klass = Housing + + def parse(self, el): + rooms_bedrooms_area = el.xpath( + './/ul[has-class("item-tags")]/li' + ) + self.env['rooms'] = NotAvailable + self.env['bedrooms'] = NotAvailable + self.env['area'] = NotAvailable + + for item in rooms_bedrooms_area: + name = CleanText('.')(item) + if 'chambre' in name.lower(): + name = 'bedrooms' + value = CleanDecimal('./strong')(item) + elif 'pièce' in name.lower(): + name = 'rooms' + value = CleanDecimal('./strong')(item) + elif ' m²' in name and 'le m²' not in name: + name = 'area' + value = CleanDecimal( + Regexp( + CleanText( + '.' + ), + r'(\d*\.*\d*) .*' + ) + )(item) + self.env[name] = value + + obj_id = Env('_id') + + def obj_type(self): + prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self) + if 'location' in prev_link: + title = self.obj_title(self) + if 'meublé' in title.lower(): + return POSTS_TYPES.FURNISHED_RENT + else: + return POSTS_TYPES.RENT + elif 'vente' in prev_link: + return POSTS_TYPES.SALE + elif 'viager' in prev_link: + return POSTS_TYPES.VIAGER + else: + return NotAvailable + obj_advert_type = ADVERT_TYPES.PERSONAL + + def obj_house_type(self): + prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self) + house_type = prev_link.split('-')[-1] + if 'parking' in house_type: + return HOUSE_TYPES.PARKING + elif 'appartement' in house_type: + return HOUSE_TYPES.APART + elif 'terrain' in house_type: + return HOUSE_TYPES.LAND + elif 'maison' in house_type: + return HOUSE_TYPES.HOUSE + else: + return HOUSE_TYPES.OTHER + + obj_title = CleanText( + '//h1[@class="item-title"]' + ) + obj_cost = CleanDecimal( + '//h1[@class="item-title"]/span[@class="item-price"]', + replace_dots=True + ) + obj_currency = Currency( + '//h1[@class="item-title"]/span[@class="item-price"]' + ) + obj_utilities = UTILITIES.UNKNOWN + obj_area = Env('area') + + def obj_date(self): + date = CleanText( + '//p[@class="item-date"]' + )(self).split("/")[-1].strip() + return parse_french_date(date) + + obj_rooms = Env('rooms') + obj_bedrooms = Env('bedrooms') + obj_price_per_meter = PricePerMeterFilter() + obj_location = CleanText('//div[has-class("item-description")]/h2') + obj_text = CleanText(CleanHTML('//div[has-class("item-description")]/div/p')) + + def obj_station(self): + return ", ".join([ + station.text + for station in XPath( + '//ul[has-class("item-transports")]//span[has-class("label")]' + )(self) + ]) + + def obj_phone(self): + phone = CleanText('(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]')(self) + phone = phone.replace(' ', ', ') + return phone + + obj_url = BrowserURL('housing', _id=Env('_id')) + + def obj_DPE(self): + DPE = Attr( + '//div[has-class("energy-box")]//div[has-class("energy-rank")]', + 'class', + default="" + )(self) + if DPE: + DPE = [x.replace("energy-rank-", "").upper() + for x in DPE.split() if x.startswith("energy-rank-")][0] + return getattr(ENERGY_CLASS, DPE, NotAvailable) + + def obj_photos(self): + photos = [] + for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self): + if not img.endswith('miniature-video.png'): + photos.append(HousingPhoto(u'%s' % img)) + return photos diff --git a/modules/pap/test.py b/modules/pap/test.py new file mode 100644 index 0000000..3785666 --- /dev/null +++ b/modules/pap/test.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of a weboob module. +# +# This weboob module is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This weboob module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this weboob module. If not, see . + +from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES +from weboob.tools.test import BackendTest +from weboob.tools.capabilities.housing.housing_test import HousingTest + + +class PapTest(BackendTest, HousingTest): + MODULE = 'pap' + + FIELDS_ALL_HOUSINGS_LIST = [ + "id", "type", "advert_type", "house_type", "url", "title", "area", + "cost", "currency", "utilities", "location", "text" + ] + FIELDS_ANY_HOUSINGS_LIST = [ + "photos", + "station", + ] + FIELDS_ALL_SINGLE_HOUSING = [ + "id", "url", "type", "advert_type", "house_type", "title", "area", + "cost", "currency", "utilities", "date", "location", "text", + "phone" + ] + FIELDS_ANY_SINGLE_HOUSING = [ + "photos", + "rooms", + "bedrooms", + "station" + ] + + def test_pap_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_pap_sale(self): + query = Query() + query.area_min = 20 + query.type = POSTS_TYPES.SALE + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_pap_furnished_rent(self): + query = Query() + query.area_min = 20 + query.cost_max = 1500 + query.type = POSTS_TYPES.FURNISHED_RENT + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + self.check_against_query(query) + + def test_pap_viager(self): + query = Query() + query.type = POSTS_TYPES.VIAGER + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + # Remove rooms from the tested fields as viager never have them + self.FIELDS_ANY_HOUSINGS_LIST = [ + "photos", + "station", + "bedrooms" + ] + self.FIELDS_ANY_SINGLE_HOUSING = [ + "photos", + "bedrooms", + "station" + ] + self.check_against_query(query) + + def test_pap_professional(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.type = POSTS_TYPES.RENT + query.advert_types = [ADVERT_TYPES.PROFESSIONAL] + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + self.assertEqual(len(results), 0)