Browse Source

Add a copy of WebOOB modules

master
Lucas Verney 1 year ago
parent
commit
9a532c0da1
  1. 7
      doc/0.getting_started.md
  2. 24
      modules/explorimmo/__init__.py
  3. 92
      modules/explorimmo/browser.py
  4. 80
      modules/explorimmo/module.py
  5. 455
      modules/explorimmo/pages.py
  6. 101
      modules/explorimmo/test.py
  7. 26
      modules/foncia/__init__.py
  8. 61
      modules/foncia/browser.py
  9. 24
      modules/foncia/constants.py
  10. BIN
      modules/foncia/favicon.png
  11. 74
      modules/foncia/module.py
  12. 359
      modules/foncia/pages.py
  13. 95
      modules/foncia/test.py
  14. 24
      modules/leboncoin/__init__.py
  15. 145
      modules/leboncoin/browser.py
  16. BIN
      modules/leboncoin/favicon.png
  17. 66
      modules/leboncoin/module.py
  18. 301
      modules/leboncoin/pages.py
  19. 105
      modules/leboncoin/test.py
  20. 24
      modules/logicimmo/__init__.py
  21. 108
      modules/logicimmo/browser.py
  22. BIN
      modules/logicimmo/favicon.png
  23. 99
      modules/logicimmo/module.py
  24. 377
      modules/logicimmo/pages.py
  25. 112
      modules/logicimmo/test.py
  26. 3
      modules/pap/__init__.py
  27. 81
      modules/pap/browser.py
  28. 12
      modules/pap/constants.py
  29. BIN
      modules/pap/favicon.png
  30. 75
      modules/pap/module.py
  31. 270
      modules/pap/pages.py
  32. 112
      modules/pap/test.py

7
doc/0.getting_started.md

@ -27,6 +27,13 @@ your disk, to point `modules_path` configuration option to @@ -27,6 +27,13 @@ your disk, to point `modules_path` configuration option to
`path_to_weboob_git/modules` (see the configuration section below) and to run
a `git pull; python setup.py install` in the WebOOB git repo often.
A copy of the WebOOB modules is available in the `modules` directory at the
root of this repository, you can use `"modules_path": "/path/to/flatisfy/modules"` to use them.
This copy may or may not be more up to date than the current state of official
WebOOB modules. Some changes are made there, which are not backported
upstream. WebOOB official modules are not synced in the `modules` folder on a
regular basis, so try both and see which ones match your needs! :)
## TL;DR

24
modules/explorimmo/__init__.py

@ -0,0 +1,24 @@ @@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from .module import ExplorimmoModule
__all__ = ['ExplorimmoModule']

92
modules/explorimmo/browser.py

@ -0,0 +1,92 @@ @@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from weboob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
HOUSE_TYPES)
from weboob.tools.compat import urlencode
from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage
class ExplorimmoBrowser(PagesBrowser):
BASEURL = 'https://immobilier.lefigaro.fr'
cities = URL('/rest/locations\?q=(?P<city>.*)', CitiesPage)
search = URL('/annonces/resultat/annonces.html\?(?P<query>.*)', SearchPage)
housing_html = URL('/annonces/annonce-(?P<_id>.*).html', HousingPage)
phone = URL('/rest/classifieds/(?P<_id>.*)/phone', PhonePage)
housing = URL('/rest/classifieds/(?P<_id>.*)',
'/rest/classifieds/\?(?P<js_datas>.*)', HousingPage2)
TYPES = {POSTS_TYPES.RENT: 'location',
POSTS_TYPES.SALE: 'vente',
POSTS_TYPES.FURNISHED_RENT: 'location',
POSTS_TYPES.VIAGER: 'vente'}
RET = {HOUSE_TYPES.HOUSE: 'Maison',
HOUSE_TYPES.APART: 'Appartement',
HOUSE_TYPES.LAND: 'Terrain',
HOUSE_TYPES.PARKING: 'Parking',
HOUSE_TYPES.OTHER: 'Divers'}
def get_cities(self, pattern):
return self.cities.open(city=pattern).get_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max,
cost_min, cost_max, house_types, advert_types):
if type not in self.TYPES:
raise TypeNotSupported()
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['Viager']
else:
for house_type in house_types:
if house_type in self.RET:
ret.append(self.RET.get(house_type))
data = {'location': ','.join(cities).encode('iso 8859-1'),
'furnished': type == POSTS_TYPES.FURNISHED_RENT,
'areaMin': area_min or '',
'areaMax': area_max or '',
'priceMin': cost_min or '',
'priceMax': cost_max or '',
'transaction': self.TYPES.get(type, 'location'),
'recherche': '',
'mode': '',
'proximity': '0',
'roomMin': nb_rooms or '',
'page': '1'}
query = u'%s%s%s' % (urlencode(data), '&type=', '&type='.join(ret))
return self.search.go(query=query).iter_housings(
query_type=type,
advert_types=advert_types
)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)
def get_phone(self, _id):
return self.phone.go(_id=_id).get_phone()
def get_total_page(self, js_datas):
return self.housing.open(js_datas=js_datas).get_total_page()

80
modules/explorimmo/module.py

@ -0,0 +1,80 @@ @@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import Module
from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
from .browser import ExplorimmoBrowser
__all__ = ['ExplorimmoModule']
class ExplorimmoModule(Module, CapHousing):
NAME = 'explorimmo'
DESCRIPTION = u'explorimmo website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '2.1'
BROWSER = ExplorimmoBrowser
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
housing = self.browser.get_housing(id, housing)
return housing
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list()
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types,
query.advert_types)
def fill_housing(self, housing, fields):
if 'phone' in fields:
housing.phone = self.browser.get_phone(housing.id)
fields.remove('phone')
if len(fields) > 0:
self.browser.get_housing(housing.id, housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing,
HousingPhoto: fill_photo,
}

455
modules/explorimmo/pages.py

@ -0,0 +1,455 @@ @@ -0,0 +1,455 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import json
import math
import re
from decimal import Decimal
from datetime import datetime
from weboob.browser.filters.json import Dict
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.pages import JsonPage, HTMLPage, pagination
from weboob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
Regexp, Env, BrowserURL, Filter,
Format)
from weboob.browser.filters.html import Attr, CleanHTML, XPath
from weboob.capabilities.base import NotAvailable, NotLoaded, Currency as BaseCurrency
from weboob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from weboob.tools.compat import unquote
class CitiesPage(JsonPage):
ENCODING = 'UTF-8'
def build_doc(self, content):
content = super(CitiesPage, self).build_doc(content)
if content:
return content
else:
return [{"locations": []}]
@method
class get_cities(DictElement):
item_xpath = '0/locations'
class item(ItemElement):
klass = City
obj_id = Dict('label')
obj_name = Dict('label')
class SearchPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//div[starts-with(@id, "bloc-vue-")]'
def next_page(self):
js_datas = CleanText(
'//div[@id="js-data"]/@data-rest-search-request'
)(self).split('?')[-1].split('&')
try:
resultsPerPage = next(
x for x in js_datas if 'resultsPerPage' in x
).split('=')[-1]
currentPageNumber = next(
x for x in js_datas if 'currentPageNumber' in x
).split('=')[-1]
resultCount = CleanText(
'(//div[@id="js-data"]/@data-result-count)[1]'
)(self)
totalPageNumber = math.ceil(
int(resultCount) / int(resultsPerPage)
)
next_page = int(currentPageNumber) + 1
if next_page <= totalPageNumber:
return self.page.url.replace(
'page=%s' % currentPageNumber,
'page=%d' % next_page
)
except StopIteration:
pass
class item(ItemElement):
klass = Housing
price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'
def is_agency(self):
agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
return 'annonce de particulier' not in agency.lower()
def condition(self):
if len(self.env['advert_types']) == 1:
is_agency = self.is_agency()
if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
return not is_agency
elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL:
return is_agency
return Attr('.', 'data-classified-id', default=False)(self)
obj_id = Attr('.', 'data-classified-id')
obj_type = Env('query_type')
obj_title = CleanText('./div/h2[@class="item-type"]')
def obj_advert_type(self):
if self.is_agency():
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = self.obj_title(self).split()[0].lower()
if type == "appartement" or type == "studio" or type == "chambre":
return HOUSE_TYPES.APART
elif type == "maison" or type == "villa":
return HOUSE_TYPES.HOUSE
elif type == "parking":
return HOUSE_TYPES.PARKING
elif type == "terrain":
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
def obj_location(self):
script = CleanText('./script')(self)
try:
# Should be standard JSON+LD data
script = json.loads(script)
except ValueError:
try:
# But explorimmo can't write JSON correctly and there
# is a trailing "}"
script = json.loads(script.strip().rstrip('}'))
except ValueError:
script = None
if not script:
return NotLoaded
try:
return '%s (%s)' % (
script['address']['addressLocality'],
script['address']['postalCode']
)
except (KeyError):
return NotLoaded
def obj_cost(self):
cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''),
r'de (.*) à .*',
default=0))(self)
if cost == 0:
return CleanDecimal(self.price_selector, default=NotAvailable)(self)
else:
return cost
obj_currency = Currency(price_selector)
def obj_utilities(self):
utilities = CleanText(
'./div/div/span[@class="price-label"]|'
'./div/div[@class="item-price-pdf"]|'
'./div/div/span[@class="item-price"]'
)(self)
if "CC" in utilities:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
obj_text = CleanText('./div/p[@itemprop="description"]')
obj_area = CleanDecimal(
Regexp(
obj_title,
r'(.*?)([\d,\.]*) m2(.*?)',
'\\2',
default=None
),
replace_dots=True,
default=NotLoaded
)
obj_url = Format(
"https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
CleanText('./@data-classified-id')
)
obj_price_per_meter = PricePerMeterFilter()
def obj_phone(self):
phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
replace=[('Téléphoner : ', '')],
default=NotLoaded)(self)
if '...' in phone:
return NotLoaded
return phone
def obj_details(self):
charges = CleanText('.//span[@class="price-fees"]',
default=None)(self)
if charges:
return {
"fees": charges.split(":")[1].strip()
}
else:
return NotLoaded
def obj_photos(self):
url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
if url:
url = unquote(url)
if "http://" in url[3:]:
rindex = url.rfind("?")
if rindex == -1:
rindex = None
url = url[url.find("http://", 3):rindex]
return [HousingPhoto(url)]
else:
return NotLoaded
class TypeDecimal(Filter):
def filter(self, el):
return Decimal(el)
class FromTimestamp(Filter):
def filter(self, el):
return datetime.fromtimestamp(el / 1000.0)
class PhonePage(JsonPage):
def get_phone(self):
return self.doc.get('phoneNumber')
class HousingPage2(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def is_agency(self):
return Dict('agency/isParticulier')(self) == 'false'
obj_id = Env('_id')
def obj_type(self):
transaction = Dict('characteristics/transaction')(self)
if transaction == 'location':
if Dict('characteristics/isFurnished')(self):
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif transaction == 'vente':
type = Dict('characteristics/estateType')(self).lower()
if 'viager' in type:
return POSTS_TYPES.VIAGER
else:
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_advert_type(self):
if self.is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = Dict('characteristics/estateType')(self).lower()
if 'appartement' in type:
return HOUSE_TYPES.APART
elif 'maison' in type:
return HOUSE_TYPES.HOUSE
elif 'parking' in type:
return HOUSE_TYPES.PARKING
elif 'terrain' in type:
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
obj_title = Dict('characteristics/titleWithTransaction')
obj_location = Format('%s %s %s', Dict('location/address'),
Dict('location/cityLabel'),
Dict('location/postalCode'))
def obj_cost(self):
cost = TypeDecimal(Dict('characteristics/price'))(self)
if cost == 0:
cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
return cost
obj_currency = BaseCurrency.get_currency('')
def obj_utilities(self):
are_fees_included = Dict('characteristics/areFeesIncluded',
default=None)(self)
if are_fees_included:
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_text = CleanHTML(Dict('characteristics/description'))
obj_url = BrowserURL('housing_html', _id=Env('_id'))
def obj_area(self):
area = TypeDecimal(Dict('characteristics/area'))(self)
if area == 0:
area = TypeDecimal(Dict('characteristics/areaMin'))(self)
return area
obj_date = FromTimestamp(Dict('characteristics/date'))
obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))
def obj_rooms(self):
# TODO: Why is roomCount a list?
rooms = Dict('characteristics/roomCount', default=[])(self)
if rooms:
return TypeDecimal(rooms[0])(self)
return NotAvailable
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in Dict('characteristics/images')(self):
m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl'))
if m:
photos.append(HousingPhoto(m.group(1)))
else:
photos.append(HousingPhoto(img.get('xl')))
return photos
def obj_DPE(self):
DPE = Dict(
'characteristics/energyConsumptionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_GES(self):
GES = Dict(
'characteristics/greenhouseGasEmissionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, GES, NotAvailable)
def obj_details(self):
details = {}
details['fees'] = Dict(
'characteristics/fees', default=NotAvailable
)(self)
details['agencyFees'] = Dict(
'characteristics/agencyFees', default=NotAvailable
)(self)
details['guarantee'] = Dict(
'characteristics/guarantee', default=NotAvailable
)(self)
details['bathrooms'] = Dict(
'characteristics/bathroomCount', default=NotAvailable
)(self)
details['creationDate'] = FromTimestamp(
Dict(
'characteristics/creationDate', default=NotAvailable
),
default=NotAvailable
)(self)
details['availabilityDate'] = Dict(
'characteristics/estateAvailabilityDate', default=NotAvailable
)(self)
details['exposure'] = Dict(
'characteristics/exposure', default=NotAvailable
)(self)
details['heatingType'] = Dict(
'characteristics/heatingType', default=NotAvailable
)(self)
details['floor'] = Dict(
'characteristics/floor', default=NotAvailable
)(self)
details['bedrooms'] = Dict(
'characteristics/bedroomCount', default=NotAvailable
)(self)
details['isFurnished'] = Dict(
'characteristics/isFurnished', default=NotAvailable
)(self)
rooms = Dict('characteristics/roomCount', default=[])(self)
if len(rooms):
details['rooms'] = rooms[0]
details['available'] = Dict(
'characteristics/isAvailable', default=NotAvailable
)(self)
agency = Dict('agency', default=NotAvailable)(self)
details['agency'] = ', '.join([
x for x in [
agency.get('corporateName', ''),
agency.get('corporateAddress', ''),
agency.get('corporatePostalCode', ''),
agency.get('corporateCity', '')
] if x
])
return details
def get_total_page(self):
return self.doc.get('pagination').get('total') if 'pagination' in self.doc else 0
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Env('_id')
obj_title = CleanText('//h1[@itemprop="name"]')
obj_location = CleanText('//span[@class="informations-localisation"]')
obj_cost = CleanDecimal('//span[@itemprop="price"]')
obj_currency = Currency('//span[@itemprop="price"]')
obj_text = CleanHTML('//div[@itemprop="description"]')
obj_url = BrowserURL('housing', _id=Env('_id'))
obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img)
photos.append(HousingPhoto(url))
return photos
def obj_details(self):
details = dict()
for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
key = CleanText('./span[@class="name"]')(item)
value = CleanText('./span[@class="value"]')(item)
if value and key:
details[key] = value
key = CleanText('//div[@class="title-dpe clearfix"]')(self)
value = CleanText('//div[@class="energy-consumption"]')(self)
if value and key:
details[key] = value
return details

101
modules/explorimmo/test.py

@ -0,0 +1,101 @@ @@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import Query, ADVERT_TYPES, POSTS_TYPES
from weboob.tools.capabilities.housing.housing_test import HousingTest
from weboob.tools.test import BackendTest
class ExplorimmoTest(BackendTest, HousingTest):
MODULE = 'explorimmo'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "title", "location",
"utilities", "text", "area", "url"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos", "cost", "currency"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text", "rooms",
"details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"bedrooms",
"photos",
"DPE",
"GES",
"phone"
]
def test_explorimmo_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('85'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PERSONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)

26
modules/foncia/__init__.py

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from .module import FonciaModule
__all__ = ['FonciaModule']

61
modules/foncia/browser.py

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.browser import PagesBrowser, URL
from .constants import QUERY_TYPES
from .pages import CitiesPage, HousingPage, SearchPage, SearchResultsPage
class FonciaBrowser(PagesBrowser):
BASEURL = 'https://fr.foncia.com'
cities = URL(r'/recherche/autocomplete\?term=(?P<term>.+)', CitiesPage)
housing = URL(r'/(?P<type>[^/]+)/.*\d+.htm', HousingPage)
search_results = URL(r'/(?P<type>[^/]+)/.*', SearchResultsPage)
search = URL(r'/(?P<type>.+)', SearchPage)
def get_cities(self, pattern):
"""
Get cities matching a given pattern.
"""
return self.cities.open(term=pattern).iter_cities()
def search_housings(self, query, cities):
"""
Search for housings matching given query.
"""
try:
query_type = QUERY_TYPES[query.type]
except KeyError:
return []
self.search.go(type=query_type).do_search(query, cities)
return self.page.iter_housings(query_type=query.type)
def get_housing(self, housing):
"""
Get specific housing.
"""
query_type, housing = housing.split(':')
self.search.go(type=query_type).find_housing(query_type, housing)
return self.page.get_housing()

24
modules/foncia/constants.py

@ -0,0 +1,24 @@ @@ -0,0 +1,24 @@
from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
QUERY_TYPES = {
POSTS_TYPES.RENT: 'location',
POSTS_TYPES.SALE: 'achat',
POSTS_TYPES.FURNISHED_RENT: 'location'
}
QUERY_HOUSE_TYPES = {
HOUSE_TYPES.APART: ['appartement', 'appartement-meuble'],
HOUSE_TYPES.HOUSE: ['maison'],
HOUSE_TYPES.PARKING: ['parking'],
HOUSE_TYPES.LAND: ['terrain'],
HOUSE_TYPES.OTHER: ['chambre', 'programme-neuf',
'local-commercial', 'immeuble']
}
AVAILABLE_TYPES = {
POSTS_TYPES.RENT: ['appartement', 'maison', 'parking', 'chambre',
'local-commercial'],
POSTS_TYPES.SALE: ['appartement', 'maison', 'parking', 'local-commercial',
'terrain', 'immeuble', 'programme-neuf'],
POSTS_TYPES.FURNISHED_RENT: ['appartement-meuble']
}

BIN
modules/foncia/favicon.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

74
modules/foncia/module.py

@ -0,0 +1,74 @@ @@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.tools.backend import Module
from weboob.capabilities.housing import CapHousing, Housing, ADVERT_TYPES, HousingPhoto
from .browser import FonciaBrowser
__all__ = ['FonciaModule']
class FonciaModule(Module, CapHousing):
NAME = 'foncia'
DESCRIPTION = u'Foncia housing website.'
MAINTAINER = u'Phyks (Lucas Verney)'
EMAIL = 'phyks@phyks.me'
LICENSE = 'AGPLv3+'
VERSION = '2.1'
BROWSER = FonciaBrowser
def get_housing(self, housing):
return self.browser.get_housing(housing)
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
if (
len(query.advert_types) == 1 and
query.advert_types[0] == ADVERT_TYPES.PERSONAL
):
# Foncia is pro only
return list()
cities = ','.join(
['%s' % c.name for c in query.cities if c.backend == self.name]
)
if len(cities) == 0:
return []
return self.browser.search_housings(query, cities)
def fill_housing(self, housing, fields):
if len(fields) > 0:
self.browser.get_housing(housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo}

359
modules/foncia/pages.py

@ -0,0 +1,359 @@ @@ -0,0 +1,359 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import datetime
from weboob.browser.pages import JsonPage, HTMLPage, pagination
from weboob.browser.filters.standard import (
CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError
)
from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.housing import (
City, Housing, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES
)
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES
class CitiesPage(JsonPage):
def iter_cities(self):
cities_list = self.doc
if isinstance(self.doc, dict):
cities_list = self.doc.values()
for city in cities_list:
city_obj = City()
city_obj.id = city
city_obj.name = city
yield city_obj
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Format(
'%s:%s',
Env('type'),
Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')
)
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_type(self):
type = Env('type')(self)
if type == 'location':
if 'appartement-meuble' in self.page.url:
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif type == 'achat':
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_url(self):
return self.page.url
def obj_house_type(self):
url = self.obj_url()
for house_type, types in QUERY_HOUSE_TYPES.items():
for type in types:
if ('/%s/' % type) in url:
return house_type
return NotAvailable
obj_title = CleanText('//h1[has-class("OfferTop-title")]')
obj_area = CleanDecimal(
Regexp(
CleanText(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
),
r'(\d*\.*\d*) .*',
default=NotAvailable
),
default=NotAvailable
)
obj_cost = CleanDecimal(
'//span[has-class("OfferTop-price")]',
default=NotAvailable
)
obj_price_per_meter = PricePerMeterFilter()
obj_currency = Currency(
'//span[has-class("OfferTop-price")]'
)
obj_location = Format(
'%s - %s',
CleanText('//p[@data-behat="adresseBien"]'),
CleanText('//p[has-class("OfferTop-loc")]')
)
obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
obj_phone = Regexp(
Link(
'//a[has-class("OfferContact-btn--tel")]'
),
r'tel:(.*)'
)
def obj_photos(self):
photos = []
for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
photo_url = Attr('.', 'src')(photo)
photo_url = photo_url.replace('640/480', '800/600')
photos.append(HousingPhoto(photo_url))
return photos
obj_date = datetime.date.today()
def obj_utilities(self):
price = CleanText(
'//p[has-class("OfferTop-price")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_rooms = CleanDecimal(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
default=NotAvailable
)
obj_bedrooms = CleanDecimal(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
default=NotAvailable
)
def obj_DPE(self):
try:
electric_consumption = CleanDecimal(Regexp(
Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
r'https://dpe.foncia.net\/(\d+)\/.*'
))(self)
except (RegexpError, XPathNotFound):
electric_consumption = None
DPE = ""
if electric_consumption is not None:
if electric_consumption <= 50:
DPE = "A"
elif 50 < electric_consumption <= 90:
DPE = "B"
elif 90 < electric_consumption <= 150:
DPE = "C"
elif 150 < electric_consumption <= 230:
DPE = "D"
elif 230 < electric_consumption <= 330:
DPE = "E"
elif 330 < electric_consumption <= 450:
DPE = "F"
else:
DPE = "G"
return getattr(ENERGY_CLASS, DPE, NotAvailable)
return NotAvailable
def obj_details(self):
details = {}
dispo = Date(
Regexp(
CleanText('//p[has-class("OfferTop-dispo")]'),
r'.* (\d\d\/\d\d\/\d\d\d\d)',
default=datetime.date.today().isoformat()
)
)(self)
if dispo is not None:
details["dispo"] = dispo
priceMentions = CleanText(
'//p[has-class("OfferTop-mentions")]',
default=None
)(self)
if priceMentions is not None:
details["priceMentions"] = priceMentions
agency = CleanText(
'//p[has-class("OfferContact-address")]',
default=None
)(self)
if agency is not None:
details["agency"] = agency
for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'):
category = CleanText(
'./h3[has-class("OfferDetails-title--2")]',
default=None
)(item)
if not category:
continue
details[category] = {}
for detail_item in item.xpath('.//ul[has-class("List--data")]/li'):
detail_title = CleanText('.//span[has-class("List-data")]')(detail_item)
detail_value = CleanText('.//*[has-class("List-value")]')(detail_item)
details[category][detail_title] = detail_value
for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'):
detail_title = CleanText('.')(detail_item)
details[category][detail_title] = True
try:
electric_consumption = CleanDecimal(Regexp(
Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
r'https://dpe.foncia.net\/(\d+)\/.*'
))(self)
details["electric_consumption"] = (
'{} kWhEP/m².an'.format(electric_consumption)
)
except (RegexpError, XPathNotFound):
pass
return details
class SearchPage(HTMLPage):
def do_search(self, query, cities):
form = self.get_form('//form[@name="searchForm"]')
form['searchForm[type]'] = QUERY_TYPES.get(query.type, None)
form['searchForm[localisation]'] = cities
form['searchForm[type_bien][]'] = []
for house_type in query.house_types:
try:
form['searchForm[type_bien][]'].extend(
QUERY_HOUSE_TYPES[house_type]
)
except KeyError:
pass
form['searchForm[type_bien][]'] = [
x for x in form['searchForm[type_bien][]']
if x in AVAILABLE_TYPES.get(query.type, [])
]
if query.area_min:
form['searchForm[surface_min]'] = query.area_min
if query.area_max:
form['searchForm[surface_max]'] = query.area_max
if query.cost_min:
form['searchForm[prix_min]'] = query.cost_min
if query.cost_max:
form['searchForm[prix_max]'] = query.cost_max
if query.nb_rooms:
form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)]
form.submit()
def find_housing(self, query_type, housing):
form = self.get_form('//form[@name="searchForm"]')
form['searchForm[type]'] = query_type
form['searchForm[reference]'] = housing
form.submit()
class SearchResultsPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//article[has-class("TeaserOffer")]'
next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]')
class item(ItemElement):
klass = Housing
obj_id = Format(
'%s:%s',
Env('type'),
Attr('.//span[boolean(@data-reference)]', 'data-reference')
)
obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_house_type(self):
url = self.obj_url(self)
for house_type, types in QUERY_HOUSE_TYPES.items():
for type in types:
if ('/%s/' % type) in url:
return house_type
return NotLoaded
obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
obj_area = CleanDecimal(
Regexp(
CleanText(
'.//div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
),
r'(\d*\.*\d*) .*',
default=NotAvailable
),
default=NotAvailable
)
obj_cost = CleanDecimal(
'.//strong[has-class("TeaserOffer-price-num")]',
default=NotAvailable
)
obj_price_per_meter = PricePerMeterFilter()
obj_currency = Currency(
'.//strong[has-class("TeaserOffer-price-num")]'
)
obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')
def obj_photos(self):
url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self)
# If the used photo is a default no photo, the src is on the same domain.
if url[0] == '/':
return []
else:
return [HousingPhoto(url)]
obj_date = datetime.date.today()
def obj_utilities(self):
price = CleanText(
'.//strong[has-class("TeaserOffer-price-num")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_rooms = CleanDecimal(
'.//div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
default=NotLoaded
)
obj_bedrooms = CleanDecimal(
'.//div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
default=NotLoaded
)
def obj_details(self):
return {
"dispo": Date(
Attr('.//span[boolean(@data-dispo)]', 'data-dispo',
default=datetime.date.today().isoformat())
)(self),
"priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self)
}

95
modules/foncia/test.py

@ -0,0 +1,95 @@ @@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from weboob.capabilities.housing import (
Query, POSTS_TYPES, ADVERT_TYPES
)
from weboob.tools.capabilities.housing.housing_test import HousingTest
from weboob.tools.test import BackendTest
class FonciaTest(BackendTest, HousingTest):
MODULE = 'foncia'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"cost", "currency", "date", "location", "text", "details"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"rooms"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text", "phone",
"DPE", "details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"bedrooms",
"photos",
"rooms"
]
def test_foncia_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_foncia_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_foncia_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_foncia_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PERSONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)

24
modules/leboncoin/__init__.py

@ -0,0 +1,24 @@ @@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from .module import LeboncoinModule
__all__ = ['LeboncoinModule']

145
modules/leboncoin/browser.py