Add a copy of WebOOB modules

This commit is contained in:
Lucas Verney 2021-03-28 18:59:07 +02:00
parent 7dca7c535d
commit 9a532c0da1
32 changed files with 3312 additions and 0 deletions

View File

@ -27,6 +27,13 @@ your disk, to point `modules_path` configuration option to
`path_to_weboob_git/modules` (see the configuration section below) and to run `path_to_weboob_git/modules` (see the configuration section below) and to run
a `git pull; python setup.py install` in the WebOOB git repo often. a `git pull; python setup.py install` in the WebOOB git repo often.
A copy of the WebOOB modules is available in the `modules` directory at the
root of this repository, you can use `"modules_path": "/path/to/flatisfy/modules"` to use them.
This copy may or may not be more up to date than the current state of official
WebOOB modules. Some changes are made there, which are not backported
upstream. WebOOB official modules are not synced in the `modules` folder on a
regular basis, so try both and see which ones match your needs! :)
## TL;DR ## TL;DR

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from .module import ExplorimmoModule
__all__ = ['ExplorimmoModule']

View File

@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
HOUSE_TYPES)
from weboob.tools.compat import urlencode
from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage
class ExplorimmoBrowser(PagesBrowser):
BASEURL = 'https://immobilier.lefigaro.fr'
cities = URL('/rest/locations\?q=(?P<city>.*)', CitiesPage)
search = URL('/annonces/resultat/annonces.html\?(?P<query>.*)', SearchPage)
housing_html = URL('/annonces/annonce-(?P<_id>.*).html', HousingPage)
phone = URL('/rest/classifieds/(?P<_id>.*)/phone', PhonePage)
housing = URL('/rest/classifieds/(?P<_id>.*)',
'/rest/classifieds/\?(?P<js_datas>.*)', HousingPage2)
TYPES = {POSTS_TYPES.RENT: 'location',
POSTS_TYPES.SALE: 'vente',
POSTS_TYPES.FURNISHED_RENT: 'location',
POSTS_TYPES.VIAGER: 'vente'}
RET = {HOUSE_TYPES.HOUSE: 'Maison',
HOUSE_TYPES.APART: 'Appartement',
HOUSE_TYPES.LAND: 'Terrain',
HOUSE_TYPES.PARKING: 'Parking',
HOUSE_TYPES.OTHER: 'Divers'}
def get_cities(self, pattern):
return self.cities.open(city=pattern).get_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max,
cost_min, cost_max, house_types, advert_types):
if type not in self.TYPES:
raise TypeNotSupported()
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['Viager']
else:
for house_type in house_types:
if house_type in self.RET:
ret.append(self.RET.get(house_type))
data = {'location': ','.join(cities).encode('iso 8859-1'),
'furnished': type == POSTS_TYPES.FURNISHED_RENT,
'areaMin': area_min or '',
'areaMax': area_max or '',
'priceMin': cost_min or '',
'priceMax': cost_max or '',
'transaction': self.TYPES.get(type, 'location'),
'recherche': '',
'mode': '',
'proximity': '0',
'roomMin': nb_rooms or '',
'page': '1'}
query = u'%s%s%s' % (urlencode(data), '&type=', '&type='.join(ret))
return self.search.go(query=query).iter_housings(
query_type=type,
advert_types=advert_types
)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)
def get_phone(self, _id):
return self.phone.go(_id=_id).get_phone()
def get_total_page(self, js_datas):
return self.housing.open(js_datas=js_datas).get_total_page()

View File

@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import Module
from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
from .browser import ExplorimmoBrowser
__all__ = ['ExplorimmoModule']
class ExplorimmoModule(Module, CapHousing):
NAME = 'explorimmo'
DESCRIPTION = u'explorimmo website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '2.1'
BROWSER = ExplorimmoBrowser
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
housing = self.browser.get_housing(id, housing)
return housing
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list()
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types,
query.advert_types)
def fill_housing(self, housing, fields):
if 'phone' in fields:
housing.phone = self.browser.get_phone(housing.id)
fields.remove('phone')
if len(fields) > 0:
self.browser.get_housing(housing.id, housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing,
HousingPhoto: fill_photo,
}

455
modules/explorimmo/pages.py Normal file
View File

@ -0,0 +1,455 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import json
import math
import re
from decimal import Decimal
from datetime import datetime
from weboob.browser.filters.json import Dict
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.pages import JsonPage, HTMLPage, pagination
from weboob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
Regexp, Env, BrowserURL, Filter,
Format)
from weboob.browser.filters.html import Attr, CleanHTML, XPath
from weboob.capabilities.base import NotAvailable, NotLoaded, Currency as BaseCurrency
from weboob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from weboob.tools.compat import unquote
class CitiesPage(JsonPage):
ENCODING = 'UTF-8'
def build_doc(self, content):
content = super(CitiesPage, self).build_doc(content)
if content:
return content
else:
return [{"locations": []}]
@method
class get_cities(DictElement):
item_xpath = '0/locations'
class item(ItemElement):
klass = City
obj_id = Dict('label')
obj_name = Dict('label')
class SearchPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//div[starts-with(@id, "bloc-vue-")]'
def next_page(self):
js_datas = CleanText(
'//div[@id="js-data"]/@data-rest-search-request'
)(self).split('?')[-1].split('&')
try:
resultsPerPage = next(
x for x in js_datas if 'resultsPerPage' in x
).split('=')[-1]
currentPageNumber = next(
x for x in js_datas if 'currentPageNumber' in x
).split('=')[-1]
resultCount = CleanText(
'(//div[@id="js-data"]/@data-result-count)[1]'
)(self)
totalPageNumber = math.ceil(
int(resultCount) / int(resultsPerPage)
)
next_page = int(currentPageNumber) + 1
if next_page <= totalPageNumber:
return self.page.url.replace(
'page=%s' % currentPageNumber,
'page=%d' % next_page
)
except StopIteration:
pass
class item(ItemElement):
klass = Housing
price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'
def is_agency(self):
agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
return 'annonce de particulier' not in agency.lower()
def condition(self):
if len(self.env['advert_types']) == 1:
is_agency = self.is_agency()
if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
return not is_agency
elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL:
return is_agency
return Attr('.', 'data-classified-id', default=False)(self)
obj_id = Attr('.', 'data-classified-id')
obj_type = Env('query_type')
obj_title = CleanText('./div/h2[@class="item-type"]')
def obj_advert_type(self):
if self.is_agency():
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = self.obj_title(self).split()[0].lower()
if type == "appartement" or type == "studio" or type == "chambre":
return HOUSE_TYPES.APART
elif type == "maison" or type == "villa":
return HOUSE_TYPES.HOUSE
elif type == "parking":
return HOUSE_TYPES.PARKING
elif type == "terrain":
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
def obj_location(self):
script = CleanText('./script')(self)
try:
# Should be standard JSON+LD data
script = json.loads(script)
except ValueError:
try:
# But explorimmo can't write JSON correctly and there
# is a trailing "}"
script = json.loads(script.strip().rstrip('}'))
except ValueError:
script = None
if not script:
return NotLoaded
try:
return '%s (%s)' % (
script['address']['addressLocality'],
script['address']['postalCode']
)
except (KeyError):
return NotLoaded
def obj_cost(self):
cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''),
r'de (.*) à .*',
default=0))(self)
if cost == 0:
return CleanDecimal(self.price_selector, default=NotAvailable)(self)
else:
return cost
obj_currency = Currency(price_selector)
def obj_utilities(self):
utilities = CleanText(
'./div/div/span[@class="price-label"]|'
'./div/div[@class="item-price-pdf"]|'
'./div/div/span[@class="item-price"]'
)(self)
if "CC" in utilities:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
obj_text = CleanText('./div/p[@itemprop="description"]')
obj_area = CleanDecimal(
Regexp(
obj_title,
r'(.*?)([\d,\.]*) m2(.*?)',
'\\2',
default=None
),
replace_dots=True,
default=NotLoaded
)
obj_url = Format(
"https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
CleanText('./@data-classified-id')
)
obj_price_per_meter = PricePerMeterFilter()
def obj_phone(self):
phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
replace=[('Téléphoner : ', '')],
default=NotLoaded)(self)
if '...' in phone:
return NotLoaded
return phone
def obj_details(self):
charges = CleanText('.//span[@class="price-fees"]',
default=None)(self)
if charges:
return {
"fees": charges.split(":")[1].strip()
}
else:
return NotLoaded
def obj_photos(self):
url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
if url:
url = unquote(url)
if "http://" in url[3:]:
rindex = url.rfind("?")
if rindex == -1:
rindex = None
url = url[url.find("http://", 3):rindex]
return [HousingPhoto(url)]
else:
return NotLoaded
class TypeDecimal(Filter):
def filter(self, el):
return Decimal(el)
class FromTimestamp(Filter):
def filter(self, el):
return datetime.fromtimestamp(el / 1000.0)
class PhonePage(JsonPage):
def get_phone(self):
return self.doc.get('phoneNumber')
class HousingPage2(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def is_agency(self):
return Dict('agency/isParticulier')(self) == 'false'
obj_id = Env('_id')
def obj_type(self):
transaction = Dict('characteristics/transaction')(self)
if transaction == 'location':
if Dict('characteristics/isFurnished')(self):
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif transaction == 'vente':
type = Dict('characteristics/estateType')(self).lower()
if 'viager' in type:
return POSTS_TYPES.VIAGER
else:
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_advert_type(self):
if self.is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = Dict('characteristics/estateType')(self).lower()
if 'appartement' in type:
return HOUSE_TYPES.APART
elif 'maison' in type:
return HOUSE_TYPES.HOUSE
elif 'parking' in type:
return HOUSE_TYPES.PARKING
elif 'terrain' in type:
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
obj_title = Dict('characteristics/titleWithTransaction')
obj_location = Format('%s %s %s', Dict('location/address'),
Dict('location/cityLabel'),
Dict('location/postalCode'))
def obj_cost(self):
cost = TypeDecimal(Dict('characteristics/price'))(self)
if cost == 0:
cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
return cost
obj_currency = BaseCurrency.get_currency('')
def obj_utilities(self):
are_fees_included = Dict('characteristics/areFeesIncluded',
default=None)(self)
if are_fees_included:
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_text = CleanHTML(Dict('characteristics/description'))
obj_url = BrowserURL('housing_html', _id=Env('_id'))
def obj_area(self):
area = TypeDecimal(Dict('characteristics/area'))(self)
if area == 0:
area = TypeDecimal(Dict('characteristics/areaMin'))(self)
return area
obj_date = FromTimestamp(Dict('characteristics/date'))
obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))
def obj_rooms(self):
# TODO: Why is roomCount a list?
rooms = Dict('characteristics/roomCount', default=[])(self)
if rooms:
return TypeDecimal(rooms[0])(self)
return NotAvailable
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in Dict('characteristics/images')(self):
m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl'))
if m:
photos.append(HousingPhoto(m.group(1)))
else:
photos.append(HousingPhoto(img.get('xl')))
return photos
def obj_DPE(self):
DPE = Dict(
'characteristics/energyConsumptionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_GES(self):
GES = Dict(
'characteristics/greenhouseGasEmissionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, GES, NotAvailable)
def obj_details(self):
details = {}
details['fees'] = Dict(
'characteristics/fees', default=NotAvailable
)(self)
details['agencyFees'] = Dict(
'characteristics/agencyFees', default=NotAvailable
)(self)
details['guarantee'] = Dict(
'characteristics/guarantee', default=NotAvailable
)(self)
details['bathrooms'] = Dict(
'characteristics/bathroomCount', default=NotAvailable
)(self)
details['creationDate'] = FromTimestamp(
Dict(
'characteristics/creationDate', default=NotAvailable
),
default=NotAvailable
)(self)
details['availabilityDate'] = Dict(
'characteristics/estateAvailabilityDate', default=NotAvailable
)(self)
details['exposure'] = Dict(
'characteristics/exposure', default=NotAvailable
)(self)
details['heatingType'] = Dict(
'characteristics/heatingType', default=NotAvailable
)(self)
details['floor'] = Dict(
'characteristics/floor', default=NotAvailable
)(self)
details['bedrooms'] = Dict(
'characteristics/bedroomCount', default=NotAvailable
)(self)
details['isFurnished'] = Dict(
'characteristics/isFurnished', default=NotAvailable
)(self)
rooms = Dict('characteristics/roomCount', default=[])(self)
if len(rooms):
details['rooms'] = rooms[0]
details['available'] = Dict(
'characteristics/isAvailable', default=NotAvailable
)(self)
agency = Dict('agency', default=NotAvailable)(self)
details['agency'] = ', '.join([
x for x in [
agency.get('corporateName', ''),
agency.get('corporateAddress', ''),
agency.get('corporatePostalCode', ''),
agency.get('corporateCity', '')
] if x
])
return details
def get_total_page(self):
return self.doc.get('pagination').get('total') if 'pagination' in self.doc else 0
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Env('_id')
obj_title = CleanText('//h1[@itemprop="name"]')
obj_location = CleanText('//span[@class="informations-localisation"]')
obj_cost = CleanDecimal('//span[@itemprop="price"]')
obj_currency = Currency('//span[@itemprop="price"]')
obj_text = CleanHTML('//div[@itemprop="description"]')
obj_url = BrowserURL('housing', _id=Env('_id'))
obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img)
photos.append(HousingPhoto(url))
return photos
def obj_details(self):
details = dict()
for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
key = CleanText('./span[@class="name"]')(item)
value = CleanText('./span[@class="value"]')(item)
if value and key:
details[key] = value
key = CleanText('//div[@class="title-dpe clearfix"]')(self)
value = CleanText('//div[@class="energy-consumption"]')(self)
if value and key:
details[key] = value
return details

101
modules/explorimmo/test.py Normal file
View File

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import Query, ADVERT_TYPES, POSTS_TYPES
from weboob.tools.capabilities.housing.housing_test import HousingTest
from weboob.tools.test import BackendTest
class ExplorimmoTest(BackendTest, HousingTest):
MODULE = 'explorimmo'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "title", "location",
"utilities", "text", "area", "url"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos", "cost", "currency"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text", "rooms",
"details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"bedrooms",
"photos",
"DPE",
"GES",
"phone"
]
def test_explorimmo_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('85'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PERSONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from .module import FonciaModule
__all__ = ['FonciaModule']

61
modules/foncia/browser.py Normal file
View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.browser import PagesBrowser, URL
from .constants import QUERY_TYPES
from .pages import CitiesPage, HousingPage, SearchPage, SearchResultsPage
class FonciaBrowser(PagesBrowser):
BASEURL = 'https://fr.foncia.com'
cities = URL(r'/recherche/autocomplete\?term=(?P<term>.+)', CitiesPage)
housing = URL(r'/(?P<type>[^/]+)/.*\d+.htm', HousingPage)
search_results = URL(r'/(?P<type>[^/]+)/.*', SearchResultsPage)
search = URL(r'/(?P<type>.+)', SearchPage)
def get_cities(self, pattern):
"""
Get cities matching a given pattern.
"""
return self.cities.open(term=pattern).iter_cities()
def search_housings(self, query, cities):
"""
Search for housings matching given query.
"""
try:
query_type = QUERY_TYPES[query.type]
except KeyError:
return []
self.search.go(type=query_type).do_search(query, cities)
return self.page.iter_housings(query_type=query.type)
def get_housing(self, housing):
"""
Get specific housing.
"""
query_type, housing = housing.split(':')
self.search.go(type=query_type).find_housing(query_type, housing)
return self.page.get_housing()

View File

@ -0,0 +1,24 @@
from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
QUERY_TYPES = {
POSTS_TYPES.RENT: 'location',
POSTS_TYPES.SALE: 'achat',
POSTS_TYPES.FURNISHED_RENT: 'location'
}
QUERY_HOUSE_TYPES = {
HOUSE_TYPES.APART: ['appartement', 'appartement-meuble'],
HOUSE_TYPES.HOUSE: ['maison'],
HOUSE_TYPES.PARKING: ['parking'],
HOUSE_TYPES.LAND: ['terrain'],
HOUSE_TYPES.OTHER: ['chambre', 'programme-neuf',
'local-commercial', 'immeuble']
}
AVAILABLE_TYPES = {
POSTS_TYPES.RENT: ['appartement', 'maison', 'parking', 'chambre',
'local-commercial'],
POSTS_TYPES.SALE: ['appartement', 'maison', 'parking', 'local-commercial',
'terrain', 'immeuble', 'programme-neuf'],
POSTS_TYPES.FURNISHED_RENT: ['appartement-meuble']
}

BIN
modules/foncia/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

74
modules/foncia/module.py Normal file
View File

@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.tools.backend import Module
from weboob.capabilities.housing import CapHousing, Housing, ADVERT_TYPES, HousingPhoto
from .browser import FonciaBrowser
__all__ = ['FonciaModule']
class FonciaModule(Module, CapHousing):
NAME = 'foncia'
DESCRIPTION = u'Foncia housing website.'
MAINTAINER = u'Phyks (Lucas Verney)'
EMAIL = 'phyks@phyks.me'
LICENSE = 'AGPLv3+'
VERSION = '2.1'
BROWSER = FonciaBrowser
def get_housing(self, housing):
return self.browser.get_housing(housing)
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
if (
len(query.advert_types) == 1 and
query.advert_types[0] == ADVERT_TYPES.PERSONAL
):
# Foncia is pro only
return list()
cities = ','.join(
['%s' % c.name for c in query.cities if c.backend == self.name]
)
if len(cities) == 0:
return []
return self.browser.search_housings(query, cities)
def fill_housing(self, housing, fields):
if len(fields) > 0:
self.browser.get_housing(housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo}

359
modules/foncia/pages.py Normal file
View File

@ -0,0 +1,359 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import datetime
from weboob.browser.pages import JsonPage, HTMLPage, pagination
from weboob.browser.filters.standard import (
CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError
)
from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.housing import (
City, Housing, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES
)
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES
class CitiesPage(JsonPage):
def iter_cities(self):
cities_list = self.doc
if isinstance(self.doc, dict):
cities_list = self.doc.values()
for city in cities_list:
city_obj = City()
city_obj.id = city
city_obj.name = city
yield city_obj
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Format(
'%s:%s',
Env('type'),
Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')
)
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_type(self):
type = Env('type')(self)
if type == 'location':
if 'appartement-meuble' in self.page.url:
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif type == 'achat':
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_url(self):
return self.page.url
def obj_house_type(self):
url = self.obj_url()
for house_type, types in QUERY_HOUSE_TYPES.items():
for type in types:
if ('/%s/' % type) in url:
return house_type
return NotAvailable
obj_title = CleanText('//h1[has-class("OfferTop-title")]')
obj_area = CleanDecimal(
Regexp(
CleanText(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
),
r'(\d*\.*\d*) .*',
default=NotAvailable
),
default=NotAvailable
)
obj_cost = CleanDecimal(
'//span[has-class("OfferTop-price")]',
default=NotAvailable
)
obj_price_per_meter = PricePerMeterFilter()
obj_currency = Currency(
'//span[has-class("OfferTop-price")]'
)
obj_location = Format(
'%s - %s',
CleanText('//p[@data-behat="adresseBien"]'),
CleanText('//p[has-class("OfferTop-loc")]')
)
obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
obj_phone = Regexp(
Link(
'//a[has-class("OfferContact-btn--tel")]'
),
r'tel:(.*)'
)
def obj_photos(self):
photos = []
for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
photo_url = Attr('.', 'src')(photo)
photo_url = photo_url.replace('640/480', '800/600')
photos.append(HousingPhoto(photo_url))
return photos
obj_date = datetime.date.today()
def obj_utilities(self):
price = CleanText(
'//p[has-class("OfferTop-price")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_rooms = CleanDecimal(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
default=NotAvailable
)
obj_bedrooms = CleanDecimal(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
default=NotAvailable
)
def obj_DPE(self):
try:
electric_consumption = CleanDecimal(Regexp(
Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
r'https://dpe.foncia.net\/(\d+)\/.*'
))(self)
except (RegexpError, XPathNotFound):
electric_consumption = None
DPE = ""
if electric_consumption is not None:
if electric_consumption <= 50:
DPE = "A"
elif 50 < electric_consumption <= 90:
DPE = "B"
elif 90 < electric_consumption <= 150:
DPE = "C"
elif 150 < electric_consumption <= 230:
DPE = "D"
elif 230 < electric_consumption <= 330:
DPE = "E"
elif 330 < electric_consumption <= 450:
DPE = "F"
else:
DPE = "G"
return getattr(ENERGY_CLASS, DPE, NotAvailable)
return NotAvailable
def obj_details(self):
details = {}
dispo = Date(
Regexp(
CleanText('//p[has-class("OfferTop-dispo")]'),
r'.* (\d\d\/\d\d\/\d\d\d\d)',
default=datetime.date.today().isoformat()
)
)(self)
if dispo is not None:
details["dispo"] = dispo
priceMentions = CleanText(
'//p[has-class("OfferTop-mentions")]',
default=None
)(self)
if priceMentions is not None:
details["priceMentions"] = priceMentions
agency = CleanText(
'//p[has-class("OfferContact-address")]',
default=None
)(self)
if agency is not None:
details["agency"] = agency
for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'):
category = CleanText(
'./h3[has-class("OfferDetails-title--2")]',
default=None
)(item)
if not category:
continue
details[category] = {}
for detail_item in item.xpath('.//ul[has-class("List--data")]/li'):
detail_title = CleanText('.//span[has-class("List-data")]')(detail_item)
detail_value = CleanText('.//*[has-class("List-value")]')(detail_item)
details[category][detail_title] = detail_value
for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'):
detail_title = CleanText('.')(detail_item)
details[category][detail_title] = True
try:
electric_consumption = CleanDecimal(Regexp(
Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
r'https://dpe.foncia.net\/(\d+)\/.*'
))(self)
details["electric_consumption"] = (
'{} kWhEP/m².an'.format(electric_consumption)
)
except (RegexpError, XPathNotFound):
pass
return details
class SearchPage(HTMLPage):
def do_search(self, query, cities):
form = self.get_form('//form[@name="searchForm"]')
form['searchForm[type]'] = QUERY_TYPES.get(query.type, None)
form['searchForm[localisation]'] = cities
form['searchForm[type_bien][]'] = []
for house_type in query.house_types:
try:
form['searchForm[type_bien][]'].extend(
QUERY_HOUSE_TYPES[house_type]
)
except KeyError:
pass
form['searchForm[type_bien][]'] = [
x for x in form['searchForm[type_bien][]']
if x in AVAILABLE_TYPES.get(query.type, [])
]
if query.area_min:
form['searchForm[surface_min]'] = query.area_min
if query.area_max:
form['searchForm[surface_max]'] = query.area_max
if query.cost_min:
form['searchForm[prix_min]'] = query.cost_min
if query.cost_max:
form['searchForm[prix_max]'] = query.cost_max
if query.nb_rooms:
form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)]
form.submit()
def find_housing(self, query_type, housing):
form = self.get_form('//form[@name="searchForm"]')
form['searchForm[type]'] = query_type
form['searchForm[reference]'] = housing
form.submit()
class SearchResultsPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//article[has-class("TeaserOffer")]'
next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]')
class item(ItemElement):
klass = Housing
obj_id = Format(
'%s:%s',
Env('type'),
Attr('.//span[boolean(@data-reference)]', 'data-reference')
)
obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_house_type(self):
url = self.obj_url(self)
for house_type, types in QUERY_HOUSE_TYPES.items():
for type in types:
if ('/%s/' % type) in url:
return house_type
return NotLoaded
obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
obj_area = CleanDecimal(
Regexp(
CleanText(
'.//div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
),
r'(\d*\.*\d*) .*',
default=NotAvailable
),
default=NotAvailable
)
obj_cost = CleanDecimal(
'.//strong[has-class("TeaserOffer-price-num")]',
default=NotAvailable
)
obj_price_per_meter = PricePerMeterFilter()
obj_currency = Currency(
'.//strong[has-class("TeaserOffer-price-num")]'
)
obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')
def obj_photos(self):
url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self)
# If the used photo is a default no photo, the src is on the same domain.
if url[0] == '/':
return []
else:
return [HousingPhoto(url)]
obj_date = datetime.date.today()
def obj_utilities(self):
price = CleanText(
'.//strong[has-class("TeaserOffer-price-num")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_rooms = CleanDecimal(
'.//div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
default=NotLoaded
)
obj_bedrooms = CleanDecimal(
'.//div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
default=NotLoaded
)
def obj_details(self):
return {
"dispo": Date(
Attr('.//span[boolean(@data-dispo)]', 'data-dispo',
default=datetime.date.today().isoformat())
)(self),
"priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self)
}

95
modules/foncia/test.py Normal file
View File

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.capabilities.housing import (
Query, POSTS_TYPES, ADVERT_TYPES
)
from weboob.tools.capabilities.housing.housing_test import HousingTest
from weboob.tools.test import BackendTest
class FonciaTest(BackendTest, HousingTest):
MODULE = 'foncia'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"cost", "currency", "date", "location", "text", "details"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"rooms"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text", "phone",
"DPE", "details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"bedrooms",
"photos",
"rooms"
]
def test_foncia_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_foncia_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_foncia_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_foncia_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PERSONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from .module import LeboncoinModule
__all__ = ['LeboncoinModule']

View File

@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.json import json
from weboob.browser import PagesBrowser, URL
from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
HOUSE_TYPES, ADVERT_TYPES)
from .pages import CityListPage, HousingListPage, HousingPage, PhonePage, HomePage
class LeboncoinBrowser(PagesBrowser):
BASEURL = 'https://www.leboncoin.fr/'
city = URL('ajax/location_list.html\?city=(?P<city>.*)&zipcode=(?P<zip>.*)', CityListPage)
housing = URL('ventes_immobilieres/(?P<_id>.*).htm', HousingPage)
home = URL('annonces/offres', HomePage)
api = URL('https://api.leboncoin.fr/finder/search', HousingListPage)
phone = URL('https://api.leboncoin.fr/api/utils/phonenumber.json', PhonePage)
TYPES = {POSTS_TYPES.RENT: '10',
POSTS_TYPES.FURNISHED_RENT: '10',
POSTS_TYPES.SALE: '9',
POSTS_TYPES.SHARING: '11', }
RET = {HOUSE_TYPES.HOUSE: '1',
HOUSE_TYPES.APART: '2',
HOUSE_TYPES.LAND: '3',
HOUSE_TYPES.PARKING: '4',
HOUSE_TYPES.OTHER: '5'}
def __init__(self, *args, **kwargs):
super(LeboncoinBrowser, self).__init__(*args, **kwargs)
def get_cities(self, pattern):
city = ''
zip_code = ''
if pattern.isdigit():
zip_code = pattern
else:
city = pattern.replace(" ", "_")
return self.city.go(city=city, zip=zip_code).get_cities()
def search_housings(self, query, module_name):
if query.type not in self.TYPES.keys():
return TypeNotSupported()
data = {}
data['filters'] = {}
data['filters']['category'] = {}
data['filters']['category']['id'] = self.TYPES.get(query.type)
data['filters']['enums'] = {}
data['filters']['enums']['ad_type'] = ['offer']
data['filters']['enums']['real_estate_type'] = []
for t in query.house_types:
t = self.RET.get(t)
if t:
data['filters']['enums']['real_estate_type'].append(t)
if query.type == POSTS_TYPES.FURNISHED_RENT:
data['filters']['enums']['furnished'] = ['1']
elif query.type == POSTS_TYPES.RENT:
data['filters']['enums']['furnished'] = ['2']
data['filters']['keywords'] = {}
data['filters']['ranges'] = {}
if query.cost_max or query.cost_min:
data['filters']['ranges']['price'] = {}
if query.cost_max:
data['filters']['ranges']['price']['max'] = query.cost_max
if query.cost_min:
data['filters']['ranges']['price']['min'] = query.cost_min
if query.area_max or query.area_min:
data['filters']['ranges']['square'] = {}
if query.area_max:
data['filters']['ranges']['square']['max'] = query.area_max
if query.area_min:
data['filters']['ranges']['square']['min'] = query.area_min
if query.nb_rooms:
data['filters']['ranges']['rooms'] = {}
data['filters']['ranges']['rooms']['min'] = query.nb_rooms
data['filters']['location'] = {}
data['filters']['location']['city_zipcodes'] = []
for c in query.cities:
if c.backend == module_name:
_c = c.id.split(' ')
__c = {}
__c['city'] = _c[0]
__c['zipcode'] = _c[1]
__c['label'] = c.name
data['filters']['location']['city_zipcodes'].append(__c)
if len(query.advert_types) == 1:
if query.advert_types[0] == ADVERT_TYPES.PERSONAL:
data['owner_type'] = 'private'
elif query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL:
data['owner_type'] = 'pro'
else:
data['owner_type'] = 'all'
data['limit'] = 100
data['limit_alu'] = 3
data['offset'] = 0
self.session.headers.update({"api_key": self.home.go().get_api_key()})
return self.api.go(data=json.dumps(data)).get_housing_list(query_type=query.type, data=data)
def get_housing(self, _id, obj=None):
return self.housing.go(_id=_id).get_housing(obj=obj)
def get_phone(self, _id):
api_key = self.housing.stay_or_go(_id=_id).get_api_key()
data = {'list_id': _id,
'app_id': 'leboncoin_web_utils',
'key': api_key,
'text': 1, }
return self.phone.go(data=data).get_phone()

Binary file not shown.

After

Width:  |  Height:  |  Size: 766 B

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import Module
from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto)
from .browser import LeboncoinBrowser
__all__ = ['LeboncoinModule']
class LeboncoinModule(Module, CapHousing):
NAME = 'leboncoin'
DESCRIPTION = u'search house on leboncoin website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '2.1'
BROWSER = LeboncoinBrowser
def create_default_browser(self):
return self.create_browser()
def get_housing(self, _id):
return self.browser.get_housing(_id)
def fill_housing(self, housing, fields):
if 'phone' in fields:
housing.phone = self.browser.get_phone(housing.id)
fields.remove('phone')
if len(fields) > 0:
self.browser.get_housing(housing.id, housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
return self.browser.search_housings(query, self.name)
OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo}

301
modules/leboncoin/pages.py Normal file
View File

@ -0,0 +1,301 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import requests
from weboob.browser.pages import HTMLPage, JsonPage, pagination
from weboob.browser.elements import ItemElement, ListElement, method, DictElement
from weboob.capabilities.base import Currency as BaseCurrency
from weboob.browser.filters.standard import (CleanText, CleanDecimal, _Filter,
Env, DateTime, Format)
from weboob.browser.filters.json import Dict
from weboob.capabilities.housing import (City, Housing, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from weboob.capabilities.base import NotAvailable
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from decimal import Decimal
from lxml import etree
import json
class PopDetail(_Filter):
def __init__(self, name, default=NotAvailable):
super(PopDetail, self).__init__(default)
self.name = name
def __call__(self, item):
return item.env['details'].pop(self.name, self.default)
class CityListPage(HTMLPage):
def build_doc(self, content):
content = super(CityListPage, self).build_doc(content)
if content.getroot() is not None:
return content
return etree.Element("html")
@method
class get_cities(ListElement):
item_xpath = '//li'
class item(ItemElement):
klass = City
obj_id = Format('%s %s',
CleanText('./span[has-class("city")]'),
CleanText('./span[@class="zipcode"]'))
obj_name = Format('%s %s',
CleanText('./span[has-class("city")]'),
CleanText('./span[@class="zipcode"]'))
class HomePage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
add_content = CleanText('(//body/script)[4]', replace=[('window.FLUX_STATE = ', '')])(self.doc) or '{}'
api_content = CleanText('(//body/script[@id="__NEXT_DATA__"])')(self.doc)
self.htmldoc = self.doc
self.api_content = json.loads(api_content)
self.doc = json.loads(add_content)
def get_api_key(self):
return Dict('runtimeConfig/API/KEY')(self.api_content)
class HousingListPage(JsonPage):
def __init__(self, *args, **kwargs):
JsonPage.__init__(self, *args, **kwargs)
if 'ads' not in self.doc:
self.doc['ads'] = []
@pagination
@method
class get_housing_list(DictElement):
item_xpath = 'ads'
def next_page(self):
data = Env('data')(self)
if data['offset'] > self.page.doc['total_all']:
return
data['offset'] = data['offset'] + data['limit']
return requests.Request("POST", self.page.url, data=json.dumps(data))
class item(ItemElement):
klass = Housing
def parse(self, el):
self.env['details'] = {obj['key']: obj['value_label'] for obj in self.el['attributes']}
obj_id = Dict('list_id')
obj_url = Dict('url')
obj_type = Env('query_type')
obj_area = CleanDecimal(PopDetail('square',
default=0),
default=NotAvailable)
obj_rooms = CleanDecimal(PopDetail('rooms',
default=0),
default=NotAvailable)
def obj_GES(self):
ges = CleanText(PopDetail('ges', default='|'))(self)
return getattr(ENERGY_CLASS, ges[0], NotAvailable)
def obj_DPE(self):
dpe = CleanText(PopDetail('energy_rate', default='|'))(self)
return getattr(ENERGY_CLASS, dpe[0], NotAvailable)
def obj_house_type(self):
value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower()
if value == 'parking':
return HOUSE_TYPES.PARKING
elif value == 'appartement':
return HOUSE_TYPES.APART
elif value == 'maison':
return HOUSE_TYPES.HOUSE
elif value == 'terrain':
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
def obj_utilities(self):
value = CleanText(PopDetail('charges_included',
default='Non'),
default=NotAvailable)(self)
if value == "Oui":
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
def obj_advert_type(self):
line_pro = Dict('owner/type')(self)
if line_pro == u'pro':
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
obj_title = Dict('subject')
obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0))
obj_currency = BaseCurrency.get_currency(u'')
obj_text = Dict('body')
obj_location = Dict('location/city_label')
obj_date = DateTime(Dict('first_publication_date'))
def obj_photos(self):
photos = []
for img in Dict('images/urls_large', default=[])(self):
photos.append(HousingPhoto(img))
return photos
def obj_type(self):
try:
breadcrumb = int(Dict('category_id')(self))
except ValueError:
breadcrumb = None
if breadcrumb == 11:
return POSTS_TYPES.SHARING
elif breadcrumb == 10:
isFurnished = CleanText(PopDetail('furnished', default=' '))(self)
if isFurnished.lower() == u'meublé':
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
else:
return POSTS_TYPES.SALE
obj_price_per_meter = PricePerMeterFilter()
obj_details = Env('details')
class HousingPage(HomePage):
def __init__(self, *args, **kwargs):
HomePage.__init__(self, *args, **kwargs)
self.doc = self.api_content["props"]["pageProps"]["ad"]
def get_api_key(self):
return Dict('runtimeConfig/API/KEY_JSON')(self.api_content)
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
self.env['details'] = {obj['key']: obj['value_label'] for obj in el['attributes']}
obj_id = Env('_id')
obj_area = CleanDecimal(PopDetail('square',
default=0),
default=NotAvailable)
obj_rooms = CleanDecimal(PopDetail('rooms',
default=0),
default=NotAvailable)
def obj_GES(self):
ges = CleanText(PopDetail('ges', default='|'))(self)
return getattr(ENERGY_CLASS, ges[0], NotAvailable)
def obj_DPE(self):
dpe = CleanText(PopDetail('energy_rate', default='|'))(self)
return getattr(ENERGY_CLASS, dpe[0], NotAvailable)
def obj_house_type(self):
value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower()
if value == 'parking':
return HOUSE_TYPES.PARKING
elif value == 'appartement':
return HOUSE_TYPES.APART
elif value == 'maison':
return HOUSE_TYPES.HOUSE
elif value == 'terrain':
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
def obj_utilities(self):
value = CleanText(PopDetail('charges_included',
default='Non'),
default=NotAvailable)(self)
if value == "Oui":
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_title = Dict('subject')
obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0))
obj_currency = BaseCurrency.get_currency(u'')
obj_text = Dict('body')
obj_location = Dict('location/city_label')
def obj_advert_type(self):
line_pro = Dict('owner/type')(self)
if line_pro == u'pro':
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
obj_date = DateTime(Dict('first_publication_date'))
def obj_photos(self):
photos = []
for img in Dict('images/urls_large', default=[])(self):
photos.append(HousingPhoto(img))
return photos
def obj_type(self):
try:
breadcrumb = int(Dict('category_id')(self))
except ValueError:
breadcrumb = None
if breadcrumb == 11:
return POSTS_TYPES.SHARING
elif breadcrumb == 10:
isFurnished = CleanText(PopDetail('furnished', default=' '))(self)
if isFurnished.lower() == u'meublé':
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
else:
return POSTS_TYPES.SALE
obj_price_per_meter = PricePerMeterFilter()
obj_url = Dict('url')
obj_details = Env('details')
class PhonePage(JsonPage):
def get_phone(self):
if Dict('utils/status')(self.doc) == u'OK':
return Dict('utils/phonenumber')(self.doc)
return NotAvailable

105
modules/leboncoin/test.py Normal file
View File

@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
from weboob.tools.value import Value
from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from weboob.tools.capabilities.housing.housing_test import HousingTest
class LeboncoinTest(BackendTest, HousingTest):
MODULE = 'leboncoin'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "url", "title",
"currency", "utilities", "date", "location", "text"
]
FIELDS_ANY_HOUSINGS_LIST = [
"area",
"cost",
"price_per_meter",
"photos"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title",
"cost", "currency", "utilities", "date", "location", "text",
"rooms", "details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"area",
"GES",
"DPE",
"photos",
# Don't test phone as leboncoin API is strongly rate-limited
]
def setUp(self):
if not self.is_backend_configured():
self.backend.config['advert_type'] = Value(value='a')
self.backend.config['region'] = Value(value='ile_de_france')
def test_leboncoin_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_leboncoin_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_leboncoin_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_leboncoin_professional(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from .module import LogicimmoModule
__all__ = ['LogicimmoModule']

View File

@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from weboob.browser.profiles import Firefox
from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
HOUSE_TYPES)
from .pages import CitiesPage, SearchPage, HousingPage, PhonePage
class LogicimmoBrowser(PagesBrowser):
BASEURL = 'https://www.logic-immo.com/'
PROFILE = Firefox()
city = URL('asset/t9/getLocalityT9.php\?site=fr&lang=fr&json=%22(?P<pattern>.*)%22',
CitiesPage)
search = URL('(?P<type>location-immobilier|vente-immobilier|recherche-colocation)-(?P<cities>.*)/options/(?P<options>.*)', SearchPage)
housing = URL('detail-(?P<_id>.*).htm', HousingPage)
phone = URL('(?P<urlcontact>.*)', PhonePage)
TYPES = {POSTS_TYPES.RENT: 'location-immobilier',
POSTS_TYPES.SALE: 'vente-immobilier',
POSTS_TYPES.SHARING: 'recherche-colocation',
POSTS_TYPES.FURNISHED_RENT: 'location-immobilier',
POSTS_TYPES.VIAGER: 'vente-immobilier'}
RET = {HOUSE_TYPES.HOUSE: '2',
HOUSE_TYPES.APART: '1',
HOUSE_TYPES.LAND: '3',
HOUSE_TYPES.PARKING: '10',
HOUSE_TYPES.OTHER: '14'}
def __init__(self, *args, **kwargs):
super(LogicimmoBrowser, self).__init__(*args, **kwargs)
self.session.headers['X-Requested-With'] = 'XMLHttpRequest'
def get_cities(self, pattern):
if pattern:
return self.city.go(pattern=pattern).get_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
if type not in self.TYPES:
raise TypeNotSupported()
options = []
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['15']
else:
for house_type in house_types:
if house_type in self.RET:
ret.append(self.RET.get(house_type))
if len(ret):
options.append('groupprptypesids=%s' % ','.join(ret))
if type == POSTS_TYPES.FURNISHED_RENT:
options.append('searchoptions=4')
options.append('pricemin=%s' % (cost_min if cost_min else '0'))
if cost_max:
options.append('pricemax=%s' % cost_max)
options.append('areamin=%s' % (area_min if area_min else '0'))
if area_max:
options.append('areamax=%s' % area_max)
if nb_rooms:
if type == POSTS_TYPES.SHARING:
options.append('nbbedrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)]))
else:
options.append('nbrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)]))
self.search.go(type=self.TYPES.get(type, 'location-immobilier'),
cities=cities,
options='/'.join(options))
if type == POSTS_TYPES.SHARING:
return self.page.iter_sharing()
return self.page.iter_housings(query_type=type)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)
def get_phone(self, _id):
if _id.startswith('location') or _id.startswith('vente'):
urlcontact, params = self.housing.stay_or_go(_id=_id).get_phone_url_datas()
return self.phone.go(urlcontact=urlcontact, params=params).get_phone()

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import Module
from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto,
ADVERT_TYPES)
from weboob.capabilities.base import UserError
from .browser import LogicimmoBrowser
__all__ = ['LogicimmoModule']
class LogicImmoCitiesError(UserError):
"""
Raised when more than 3 cities are selected
"""
def __init__(self, msg='You cannot select more than three cities'):
UserError.__init__(self, msg)
class LogicimmoModule(Module, CapHousing):
NAME = 'logicimmo'
DESCRIPTION = u'logicimmo website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '2.1'
BROWSER = LogicimmoBrowser
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
housing = self.browser.get_housing(id, housing)
return housing
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
if(len(query.advert_types) == 1 and
query.advert_types[0] == ADVERT_TYPES.PERSONAL):
# Logic-immo is pro only
return list()
cities_names = ['%s' % c.name.replace(' ', '-') for c in query.cities if c.backend == self.name]
cities_ids = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities_names) == 0:
return list()
if len(cities_names) > 3:
raise LogicImmoCitiesError()
cities = ','.join(cities_names + cities_ids)
return self.browser.search_housings(query.type, cities.lower(), query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types)
def fill_housing(self, housing, fields):
if 'phone' in fields:
housing.phone = self.browser.get_phone(housing.id)
fields.remove('phone')
if len(fields) > 0:
self.browser.get_housing(housing.id, housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing,
HousingPhoto: fill_photo,
}

377
modules/logicimmo/pages.py Normal file
View File

@ -0,0 +1,377 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.filters.json import Dict
from weboob.browser.filters.standard import (Currency, Format, CleanText,
Regexp, CleanDecimal, Date, Env,
BrowserURL)
from weboob.browser.filters.html import Attr, XPath, CleanHTML
from weboob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from weboob.tools.compat import urljoin
class CitiesPage(JsonPage):
@method
class get_cities(DictElement):
item_xpath = '*/children'
class item(ItemElement):
klass = City
def condition(self):
return Dict('lct_parent_id')(self) != '0'
obj_id = Format('%s_%s', Dict('lct_id'), Dict('lct_level'))
obj_name = Format('%s %s', Dict('lct_name'), Dict('lct_post_code'))
class PhonePage(HTMLPage):
def get_phone(self):
return CleanText('//div[has-class("phone")]', children=False)(self.doc)
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Env('_id')
def obj_type(self):
url = BrowserURL('housing', _id=Env('_id'))(self)
if 'colocation' in url:
return POSTS_TYPES.SHARING
elif 'location' in url:
isFurnished = False
for li in XPath('//ul[@itemprop="description"]/li')(self):
label = CleanText('./span[has-class("criteria-label")]')(li)
if label.lower() == "meublé":
isFurnished = (
CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui'
)
if isFurnished:
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif 'vente' in url:
return POSTS_TYPES.SALE
return NotAvailable
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_house_type(self):
house_type = CleanText('.//h2[@class="offerMainFeatures"]/div')(self).lower()
if house_type == "appartement":
return HOUSE_TYPES.APART
elif house_type == "maison":
return HOUSE_TYPES.HOUSE
elif house_type == "terrain":
return HOUSE_TYPES.LAND
elif house_type == "parking":
return HOUSE_TYPES.PARKING
else:
return HOUSE_TYPES.OTHER
obj_title = Attr('//meta[@property="og:title"]', 'content')
obj_area = CleanDecimal(
CleanText(
'//p[@class="offerArea"]/span',
),
default=NotAvailable
)
obj_rooms = CleanDecimal(
Regexp(
CleanText('//p[@class="offerRooms"]/span'),
'(\d) p.',
default=NotAvailable
),
default=NotAvailable
)
obj_bedrooms = CleanDecimal(
Regexp(
CleanText('//p[@class="offerRooms"]/span'),
'(\d) ch.',
default=NotAvailable
),
default=NotAvailable
)
obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0)
obj_currency = Currency(
'//*[@itemprop="price"]'
)
def obj_utilities(self):
notes = CleanText('//p[@class="offer-description-notes"]')(self)
if "Loyer mensuel charges comprises" in notes:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
obj_price_per_meter = PricePerMeterFilter()
obj_date = Date(Regexp(CleanText('//div[@class="offer-description-notes"]'),
u'.* Mis à jour: (\d{2}/\d{2}/\d{4}).*'),
dayfirst=True)
obj_text = CleanHTML('//p[@class="descrProperty"]')
obj_location = CleanText('//em[@class="infoAdresse"]')
obj_station = CleanText(
'//div[has-class("offer-description-metro")]',
default=NotAvailable
)
obj_url = BrowserURL('housing', _id=Env('_id'))
def obj_photos(self):
photos = []
for img in XPath('//ul[@class="thumbsContainer"]//img/@src')(self):
if img.endswith('.svg'):
continue
url = u'%s' % img.replace('182x136', '800x600')
url = urljoin(self.page.url, url) # Ensure URL is absolute
photos.append(HousingPhoto(url))
return photos
def obj_DPE(self):
energy_value = CleanText(
'//ul[@class="energyInfosDPE"]//li[@class="energyInfos"]/span/@data-class',
default=""
)(self)
if len(energy_value):
energy_value = energy_value.replace("DPE", "").strip()[0]
return getattr(ENERGY_CLASS, energy_value, NotAvailable)
def obj_GES(self):
greenhouse_value = CleanText(
'//ul[@class="energyInfosGES"]//li[@class="energyInfos"]/span/@data-class',
default=""
)(self)
if len(greenhouse_value):
greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
def obj_details(self):
details = {}
details["creationDate"] = Date(
Regexp(
CleanText(
'//div[@class="offer-description-notes"]'
),
u'.*Mis en ligne: (\d{2}/\d{2}/\d{4}).*'
),
dayfirst=True
)(self)
honoraires = CleanText(
(
'//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
),
default=None
)(self)
if honoraires:
details["Honoraires"] = (
"{} (TTC, en sus)".format(
honoraires.split(":")[1].strip()
)
)
for li in XPath('//ul[@itemprop="description"]/li')(self):
label = CleanText('./span[has-class("criteria-label")]')(li)
value = CleanText('./span[has-class("criteria-value")]')(li)
details[label] = value
return details
def get_phone_url_datas(self):
a = XPath('//button[has-class("js-show-phone-offer-sale-bottom")]')(self.doc)[0]
urlcontact = 'http://www.logic-immo.com/modalMail'
params = {}
params['universe'] = CleanText('./@data-univers')(a)
params['source'] = CleanText('./@data-source')(a)
params['pushcontact'] = CleanText('./@data-pushcontact')(a)
params['mapper'] = CleanText('./@data-mapper')(a)
params['offerid'] = CleanText('./@data-offerid')(a)
params['offerflag'] = CleanText('./@data-offerflag')(a)
params['campaign'] = CleanText('./@data-campaign')(a)
params['xtpage'] = CleanText('./@data-xtpage')(a)
params['offertransactiontype'] = CleanText('./@data-offertransactiontype')(a)
params['aeisource'] = CleanText('./@data-aeisource')(a)
params['shownumber'] = CleanText('./@data-shownumber')(a)
params['corail'] = 1
return urlcontact, params
class SearchPage(HTMLPage):
@method
class iter_sharing(ListElement):
item_xpath = '//article[has-class("offer-block")]'
class item(ItemElement):
klass = Housing
obj_id = Format('colocation-%s', CleanText('./div/header/@id', replace=[('header-offer-', '')]))
obj_type = POSTS_TYPES.SHARING
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
obj_title = CleanText(CleanHTML('./div/header/section/p[@class="property-type"]/span/@title'))
obj_area = CleanDecimal('./div/header/section/p[@class="offer-attributes"]/a/span[@class="offer-area-number"]',
default=0)
obj_cost = CleanDecimal('./div/header/section/p[@class="price"]', default=0)
obj_currency = Currency(
'./div/header/section/p[@class="price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_text = CleanText(
'./div/div[@class="content-offer"]/section[has-class("content-desc")]/p/span[has-class("offer-text")]/@title',
default=NotLoaded
)
obj_date = Date(Regexp(CleanText('./div/header/section/p[has-class("update-date")]'),
".*(\d{2}/\d{2}/\d{4}).*"))
obj_location = CleanText(
'(./div/div[@class="content-offer"]/section[has-class("content-desc")]/p)[1]/span/@title',
default=NotLoaded
)
@method
class iter_housings(ListElement):
item_xpath = '//div[has-class("offer-list")]//div[has-class("offer-block")]'
class item(ItemElement):
offer_details_wrapper = (
'.//div[has-class("offer-details-wrapper")]'
)
klass = Housing
obj_id = Format(
'%s-%s',
Regexp(Env('type'), '(.*)-.*'),
CleanText('./@id', replace=[('header-offer-', '')])
)
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_house_type(self):
house_type = CleanText('.//div[has-class("offer-details-caracteristik")]/meta[@itemprop="name"]/@content')(self).lower()
if house_type == "appartement":
return HOUSE_TYPES.APART
elif house_type == "maison":
return HOUSE_TYPES.HOUSE
elif house_type == "terrain":
return HOUSE_TYPES.LAND
elif house_type == "parking":
return HOUSE_TYPES.PARKING
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText('.//div[has-class("offer-details-type")]/a/@title')
obj_url = Format(u'%s%s',
CleanText('.//div/a[@class="offer-link"]/@href'),
CleanText('.//div/a[@class="offer-link"]/\
@data-orpi', default=""))
obj_area = CleanDecimal(
(
offer_details_wrapper +
'/div/div/div[has-class("offer-details-second")]' +
'/div/h3[has-class("offer-attributes")]/span' +
'/span[has-class("offer-area-number")]'
),
default=NotLoaded
)
obj_rooms = CleanDecimal(
(
offer_details_wrapper +
'/div/div/div[has-class("offer-details-second")]' +
'/div/h3[has-class("offer-attributes")]' +
'/span[has-class("offer-rooms")]' +
'/span[has-class("offer-rooms-number")]'
),
default=NotAvailable
)
obj_cost = CleanDecimal(
Regexp(
CleanText(
(
offer_details_wrapper +
'/div/p[@class="offer-price"]/span'
),
default=NotLoaded
),
'(.*) [%s%s%s]' % (u'', u'$', u'£'),
default=NotLoaded
),
default=NotLoaded
)
obj_currency = Currency(
offer_details_wrapper + '/div/p[has-class("offer-price")]/span'
)
obj_price_per_meter = PricePerMeterFilter()
obj_utilities = UTILITIES.UNKNOWN
obj_text = CleanText(
offer_details_wrapper + '/div/div/div/p[has-class("offer-description")]/span'
)
obj_location = CleanText(
offer_details_wrapper + '/div[@class="offer-details-location"]',
replace=[('Voir sur la carte','')]
)
def obj_photos(self):
photos = []
url = None
try:
url = Attr(
'.//div[has-class("offer-picture")]//img',
'src'
)(self)
except:
pass
if url:
url = url.replace('335x253', '800x600')
url = urljoin(self.page.url, url) # Ensure URL is absolute
photos.append(HousingPhoto(url))
return photos
def obj_details(self):
details = {}
honoraires = CleanText(
(
self.offer_details_wrapper +
'/div/div/p[@class="offer-agency-fees"]'
),
default=None
)(self)
if honoraires:
details["Honoraires"] = (
"{} (TTC, en sus)".format(
honoraires.split(":")[1].strip()
)
)
return details

112
modules/logicimmo/test.py Normal file
View File

@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from weboob.tools.test import BackendTest
from weboob.tools.capabilities.housing.housing_test import HousingTest
class LogicimmoTest(BackendTest, HousingTest):
MODULE = 'logicimmo'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"cost", "currency", "utilities", "date", "location", "text",
"details", "rooms"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos",
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text",
"phone", "details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"station",
"rooms",
"phone",
"DPE",
"GES"
]
DO_NOT_DISTINGUISH_FURNISHED_RENT = True
def test_logicimmo_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_logicimmo_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_logicimmo_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_logicimmo_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
if len(query.cities) == 3:
break
self.check_against_query(query)
def test_logicimmo_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PERSONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)

3
modules/pap/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .module import PapModule
__all__ = ['PapModule']

81
modules/pap/browser.py Normal file
View File

@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from weboob.capabilities.housing import TypeNotSupported, POSTS_TYPES
from weboob.tools.compat import urlencode
from .pages import HousingPage, CitiesPage
from .constants import TYPES, RET
__all__ = ['PapBrowser']
class PapBrowser(PagesBrowser):
BASEURL = 'https://www.pap.fr'
housing = URL('/annonces/(?P<_id>.*)', HousingPage)
search_page = URL('/recherche')
search_result_page = URL('/annonce/.*', HousingPage)
cities = URL('/json/ac-geo\?q=(?P<pattern>.*)', CitiesPage)
def search_geo(self, pattern):
return self.cities.open(pattern=pattern).iter_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
if type not in TYPES:
raise TypeNotSupported()
self.session.headers.update({'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'})
data = {'geo_objets_ids': ','.join(cities),
'surface[min]': area_min or '',
'surface[max]': area_max or '',
'prix[min]': cost_min or '',
'prix[max]': cost_max or '',
'produit': TYPES.get(type, 'location'),
'nb_resultats_par_page': 40,
'action': 'submit'
}
if nb_rooms:
data['nb_pieces[min]'] = nb_rooms
data['nb_pieces[max]'] = nb_rooms
if type == POSTS_TYPES.FURNISHED_RENT:
data['tags[]'] = 'meuble'
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['viager']
else:
for house_type in house_types:
if house_type in RET:
ret.append(RET.get(house_type))
_data = '%s%s%s' % (urlencode(data), '&typesbien%5B%5D=', '&typesbien%5B%5D='.join(ret))
return self.search_page.go(data=_data).iter_housings(
query_type=type
)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)

12
modules/pap/constants.py Normal file
View File

@ -0,0 +1,12 @@
from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
TYPES = {POSTS_TYPES.RENT: 'location',
POSTS_TYPES.FURNISHED_RENT: 'location',
POSTS_TYPES.SALE: 'vente',
POSTS_TYPES.VIAGER: 'vente'}
RET = {HOUSE_TYPES.HOUSE: 'maison',
HOUSE_TYPES.APART: 'appartement',
HOUSE_TYPES.LAND: 'terrain',
HOUSE_TYPES.PARKING: 'garage-parking',
HOUSE_TYPES.OTHER: 'divers'}

BIN
modules/pap/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 841 B

75
modules/pap/module.py Normal file
View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto,
ADVERT_TYPES)
from weboob.tools.backend import Module
from .browser import PapBrowser
__all__ = ['PapModule']
class PapModule(Module, CapHousing):
NAME = 'pap'
MAINTAINER = u'Romain Bignon'
EMAIL = 'romain@weboob.org'
VERSION = '2.1'
DESCRIPTION = 'French housing website'
LICENSE = 'AGPLv3+'
BROWSER = PapBrowser
def search_housings(self, query):
if(len(query.advert_types) == 1 and
query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL):
# Pap is personal only
return list()
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list()
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types)
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
return self.browser.get_housing(id, housing)
def search_city(self, pattern):
return self.browser.search_geo(pattern)
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
def fill_housing(self, housing, fields):
return self.browser.get_housing(housing.id, housing)
OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}

270
modules/pap/pages.py Normal file
View File

@ -0,0 +1,270 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from decimal import Decimal
from weboob.tools.date import parse_french_date
from weboob.browser.pages import HTMLPage, JsonPage, pagination
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.filters.standard import (CleanText, CleanDecimal, Regexp,
Env, BrowserURL, Format, Currency)
from weboob.browser.filters.html import Attr, Link, XPath, CleanHTML
from weboob.browser.filters.json import Dict
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.housing import (Housing, City, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
class item(ItemElement):
klass = City
obj_id = Dict('id')
obj_name = Dict('name')
class HousingPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//div[has-class("search-list-item-alt")]'
def next_page(self):
return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self)
class item(ItemElement):
klass = Housing
def condition(self):
title = self.obj_title(self)
isNotFurnishedOk = True
if self.env['query_type'] == POSTS_TYPES.RENT:
isNotFurnishedOk = 'meublé' not in title.lower()
return (
Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and
isNotFurnishedOk
)
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'./div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotLoaded
self.env['bedrooms'] = NotLoaded
self.env['area'] = NotLoaded
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('.')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('.')(item)
else:
name = 'area'
value = CleanDecimal(
Regexp(
CleanText(
'.'
),
r'(\d*\.*\d*) .*'
)
)(item)
self.env[name] = value
obj_id = Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)')
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PERSONAL
def obj_house_type(self):
item_link = Link('./div/a[@class="item-title"]')(self)
house_type = item_link.split('/')[-1].split('-')[0]
if 'parking' in house_type:
return HOUSE_TYPES.PARKING
elif 'appartement' in house_type:
return HOUSE_TYPES.APART
elif 'terrain' in house_type:
return HOUSE_TYPES.LAND
elif 'maison' in house_type:
return HOUSE_TYPES.HOUSE
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText('./div/a[has-class("item-title")]')
obj_area = Env('area')
obj_cost = CleanDecimal(CleanText('./div/a[has-class("item-title")]/span[@class="item-price"]'),
replace_dots=True, default=Decimal(0))
obj_currency = Currency(
'./div/a[@class="item-title"]/span[@class="item-price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded)
def obj_location(self):
return CleanText('./div/p[@class="item-description"]')(self).split(".")[0]
obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')])
obj_rooms = Env('rooms')
obj_bedrooms = Env('bedrooms')
obj_price_per_meter = PricePerMeterFilter()
obj_url = Format(
u'http://www.pap.fr%s',
Link('./div/a[@class="item-title"]')
)
def obj_photos(self):
photos = []
for img in XPath('./a/img/@src')(self):
if(
img.endswith("visuel-nophoto.png") or
img.endswith('miniature-video.png')
):
continue
photos.append(HousingPhoto(u'%s' % img))
return photos
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'.//ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotAvailable
self.env['bedrooms'] = NotAvailable
self.env['area'] = NotAvailable
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('./strong')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('./strong')(item)
elif '' in name and 'le m²' not in name:
name = 'area'
value = CleanDecimal(
Regexp(
CleanText(
'.'
),
r'(\d*\.*\d*) .*'
)
)(item)
self.env[name] = value
obj_id = Env('_id')
def obj_type(self):
prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
if 'location' in prev_link:
title = self.obj_title(self)
if 'meublé' in title.lower():
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif 'vente' in prev_link:
return POSTS_TYPES.SALE
elif 'viager' in prev_link:
return POSTS_TYPES.VIAGER
else:
return NotAvailable
obj_advert_type = ADVERT_TYPES.PERSONAL
def obj_house_type(self):
prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
house_type = prev_link.split('-')[-1]
if 'parking' in house_type:
return HOUSE_TYPES.PARKING
elif 'appartement' in house_type:
return HOUSE_TYPES.APART
elif 'terrain' in house_type:
return HOUSE_TYPES.LAND
elif 'maison' in house_type:
return HOUSE_TYPES.HOUSE
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText(
'//h1[@class="item-title"]'
)
obj_cost = CleanDecimal(
'//h1[@class="item-title"]/span[@class="item-price"]',
replace_dots=True
)
obj_currency = Currency(
'//h1[@class="item-title"]/span[@class="item-price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_area = Env('area')
def obj_date(self):
date = CleanText(
'//p[@class="item-date"]'
)(self).split("/")[-1].strip()
return parse_french_date(date)
obj_rooms = Env('rooms')
obj_bedrooms = Env('bedrooms')
obj_price_per_meter = PricePerMeterFilter()
obj_location = CleanText('//div[has-class("item-description")]/h2')
obj_text = CleanText(CleanHTML('//div[has-class("item-description")]/div/p'))
def obj_station(self):
return ", ".join([
station.text
for station in XPath(
'//ul[has-class("item-transports")]//span[has-class("label")]'
)(self)
])
def obj_phone(self):
phone = CleanText('(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]')(self)
phone = phone.replace(' ', ', ')
return phone
obj_url = BrowserURL('housing', _id=Env('_id'))
def obj_DPE(self):
DPE = Attr(
'//div[has-class("energy-box")]//div[has-class("energy-rank")]',
'class',
default=""
)(self)
if DPE:
DPE = [x.replace("energy-rank-", "").upper()
for x in DPE.split() if x.startswith("energy-rank-")][0]
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_photos(self):
photos = []
for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self):
if not img.endswith('miniature-video.png'):
photos.append(HousingPhoto(u'%s' % img))
return photos

112
modules/pap/test.py Normal file
View File

@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from weboob.tools.test import BackendTest
from weboob.tools.capabilities.housing.housing_test import HousingTest
class PapTest(BackendTest, HousingTest):
MODULE = 'pap'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"cost", "currency", "utilities", "location", "text"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"station",
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text",
"phone"
]
FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"rooms",
"bedrooms",
"station"
]
def test_pap_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
# Remove rooms from the tested fields as viager never have them
self.FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"station",
"bedrooms"
]
self.FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"bedrooms",
"station"
]
self.check_against_query(query)
def test_pap_professional(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)