flatisfy/modules/explorimmo/pages.py

456 lines
17 KiB
Python
Raw Normal View History

2021-03-28 18:59:07 +02:00
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
2021-04-08 20:08:23 +02:00
# This file is part of a woob module.
2021-03-28 18:59:07 +02:00
#
2021-04-08 20:08:23 +02:00
# This woob module is free software: you can redistribute it and/or modify
2021-03-28 18:59:07 +02:00
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
2021-04-08 20:08:23 +02:00
# This woob module is distributed in the hope that it will be useful,
2021-03-28 18:59:07 +02:00
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
2021-04-08 20:08:23 +02:00
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
2021-03-28 18:59:07 +02:00
from __future__ import unicode_literals
import json
import math
import re
from decimal import Decimal
from datetime import datetime
2021-04-08 20:08:23 +02:00
from woob.browser.filters.json import Dict
from woob.browser.elements import ItemElement, ListElement, DictElement, method
from woob.browser.pages import JsonPage, HTMLPage, pagination
from woob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
2021-03-28 18:59:07 +02:00
Regexp, Env, BrowserURL, Filter,
Format)
2021-04-08 20:08:23 +02:00
from woob.browser.filters.html import Attr, CleanHTML, XPath
from woob.capabilities.base import NotAvailable, NotLoaded, Currency as BaseCurrency
from woob.capabilities.housing import (Housing, HousingPhoto, City,
2021-03-28 18:59:07 +02:00
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
2021-04-08 20:08:23 +02:00
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
from woob.tools.compat import unquote
2021-03-28 18:59:07 +02:00
class CitiesPage(JsonPage):
ENCODING = 'UTF-8'
def build_doc(self, content):
content = super(CitiesPage, self).build_doc(content)
if content:
return content
else:
return [{"locations": []}]
@method
class get_cities(DictElement):
item_xpath = '0/locations'
class item(ItemElement):
klass = City
obj_id = Dict('label')
obj_name = Dict('label')
class SearchPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//div[starts-with(@id, "bloc-vue-")]'
def next_page(self):
js_datas = CleanText(
'//div[@id="js-data"]/@data-rest-search-request'
)(self).split('?')[-1].split('&')
try:
resultsPerPage = next(
x for x in js_datas if 'resultsPerPage' in x
).split('=')[-1]
currentPageNumber = next(
x for x in js_datas if 'currentPageNumber' in x
).split('=')[-1]
resultCount = CleanText(
'(//div[@id="js-data"]/@data-result-count)[1]'
)(self)
totalPageNumber = math.ceil(
int(resultCount) / int(resultsPerPage)
)
next_page = int(currentPageNumber) + 1
if next_page <= totalPageNumber:
return self.page.url.replace(
'page=%s' % currentPageNumber,
'page=%d' % next_page
)
except StopIteration:
pass
class item(ItemElement):
klass = Housing
price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'
def is_agency(self):
agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
return 'annonce de particulier' not in agency.lower()
def condition(self):
if len(self.env['advert_types']) == 1:
is_agency = self.is_agency()
if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
return not is_agency
elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL:
return is_agency
return Attr('.', 'data-classified-id', default=False)(self)
obj_id = Attr('.', 'data-classified-id')
obj_type = Env('query_type')
obj_title = CleanText('./div/h2[@class="item-type"]')
def obj_advert_type(self):
if self.is_agency():
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = self.obj_title(self).split()[0].lower()
if type == "appartement" or type == "studio" or type == "chambre":
return HOUSE_TYPES.APART
elif type == "maison" or type == "villa":
return HOUSE_TYPES.HOUSE
elif type == "parking":
return HOUSE_TYPES.PARKING
elif type == "terrain":
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
def obj_location(self):
script = CleanText('./script')(self)
try:
# Should be standard JSON+LD data
script = json.loads(script)
except ValueError:
try:
# But explorimmo can't write JSON correctly and there
# is a trailing "}"
script = json.loads(script.strip().rstrip('}'))
except ValueError:
script = None
if not script:
return NotLoaded
try:
return '%s (%s)' % (
script['address']['addressLocality'],
script['address']['postalCode']
)
except (KeyError):
return NotLoaded
def obj_cost(self):
cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''),
r'de (.*) à .*',
default=0))(self)
if cost == 0:
return CleanDecimal(self.price_selector, default=NotAvailable)(self)
else:
return cost
obj_currency = Currency(price_selector)
def obj_utilities(self):
utilities = CleanText(
'./div/div/span[@class="price-label"]|'
'./div/div[@class="item-price-pdf"]|'
'./div/div/span[@class="item-price"]'
)(self)
if "CC" in utilities:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
obj_text = CleanText('./div/p[@itemprop="description"]')
obj_area = CleanDecimal(
Regexp(
obj_title,
r'(.*?)([\d,\.]*) m2(.*?)',
'\\2',
default=None
),
replace_dots=True,
default=NotLoaded
)
obj_url = Format(
"https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
CleanText('./@data-classified-id')
)
obj_price_per_meter = PricePerMeterFilter()
def obj_phone(self):
phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
replace=[('Téléphoner : ', '')],
default=NotLoaded)(self)
if '...' in phone:
return NotLoaded
return phone
def obj_details(self):
charges = CleanText('.//span[@class="price-fees"]',
default=None)(self)
if charges:
return {
"fees": charges.split(":")[1].strip()
}
else:
return NotLoaded
def obj_photos(self):
url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
if url:
url = unquote(url)
if "http://" in url[3:]:
rindex = url.rfind("?")
if rindex == -1:
rindex = None
url = url[url.find("http://", 3):rindex]
return [HousingPhoto(url)]
else:
return NotLoaded
class TypeDecimal(Filter):
def filter(self, el):
return Decimal(el)
class FromTimestamp(Filter):
def filter(self, el):
return datetime.fromtimestamp(el / 1000.0)
class PhonePage(JsonPage):
def get_phone(self):
return self.doc.get('phoneNumber')
class HousingPage2(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def is_agency(self):
return Dict('agency/isParticulier')(self) == 'false'
obj_id = Env('_id')
def obj_type(self):
transaction = Dict('characteristics/transaction')(self)
if transaction == 'location':
if Dict('characteristics/isFurnished')(self):
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif transaction == 'vente':
type = Dict('characteristics/estateType')(self).lower()
if 'viager' in type:
return POSTS_TYPES.VIAGER
else:
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_advert_type(self):
if self.is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = Dict('characteristics/estateType')(self).lower()
if 'appartement' in type:
return HOUSE_TYPES.APART
elif 'maison' in type:
return HOUSE_TYPES.HOUSE
elif 'parking' in type:
return HOUSE_TYPES.PARKING
elif 'terrain' in type:
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
obj_title = Dict('characteristics/titleWithTransaction')
obj_location = Format('%s %s %s', Dict('location/address'),
Dict('location/cityLabel'),
Dict('location/postalCode'))
def obj_cost(self):
cost = TypeDecimal(Dict('characteristics/price'))(self)
if cost == 0:
cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
return cost
obj_currency = BaseCurrency.get_currency('')
def obj_utilities(self):
are_fees_included = Dict('characteristics/areFeesIncluded',
default=None)(self)
if are_fees_included:
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_text = CleanHTML(Dict('characteristics/description'))
obj_url = BrowserURL('housing_html', _id=Env('_id'))
def obj_area(self):
area = TypeDecimal(Dict('characteristics/area'))(self)
if area == 0:
area = TypeDecimal(Dict('characteristics/areaMin'))(self)
return area
obj_date = FromTimestamp(Dict('characteristics/date'))
obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))
def obj_rooms(self):
# TODO: Why is roomCount a list?
rooms = Dict('characteristics/roomCount', default=[])(self)
if rooms:
return TypeDecimal(rooms[0])(self)
return NotAvailable
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in Dict('characteristics/images')(self):
m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl'))
if m:
photos.append(HousingPhoto(m.group(1)))
else:
photos.append(HousingPhoto(img.get('xl')))
return photos
def obj_DPE(self):
DPE = Dict(
'characteristics/energyConsumptionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_GES(self):
GES = Dict(
'characteristics/greenhouseGasEmissionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, GES, NotAvailable)
def obj_details(self):
details = {}
details['fees'] = Dict(
'characteristics/fees', default=NotAvailable
)(self)
details['agencyFees'] = Dict(
'characteristics/agencyFees', default=NotAvailable
)(self)
details['guarantee'] = Dict(
'characteristics/guarantee', default=NotAvailable
)(self)
details['bathrooms'] = Dict(
'characteristics/bathroomCount', default=NotAvailable
)(self)
details['creationDate'] = FromTimestamp(
Dict(
'characteristics/creationDate', default=NotAvailable
),
default=NotAvailable
)(self)
details['availabilityDate'] = Dict(
'characteristics/estateAvailabilityDate', default=NotAvailable
)(self)
details['exposure'] = Dict(
'characteristics/exposure', default=NotAvailable
)(self)
details['heatingType'] = Dict(
'characteristics/heatingType', default=NotAvailable
)(self)
details['floor'] = Dict(
'characteristics/floor', default=NotAvailable
)(self)
details['bedrooms'] = Dict(
'characteristics/bedroomCount', default=NotAvailable
)(self)
details['isFurnished'] = Dict(
'characteristics/isFurnished', default=NotAvailable
)(self)
rooms = Dict('characteristics/roomCount', default=[])(self)
if len(rooms):
details['rooms'] = rooms[0]
details['available'] = Dict(
'characteristics/isAvailable', default=NotAvailable
)(self)
agency = Dict('agency', default=NotAvailable)(self)
details['agency'] = ', '.join([
x for x in [
agency.get('corporateName', ''),
agency.get('corporateAddress', ''),
agency.get('corporatePostalCode', ''),
agency.get('corporateCity', '')
] if x
])
return details
def get_total_page(self):
return self.doc.get('pagination').get('total') if 'pagination' in self.doc else 0
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Env('_id')
obj_title = CleanText('//h1[@itemprop="name"]')
obj_location = CleanText('//span[@class="informations-localisation"]')
obj_cost = CleanDecimal('//span[@itemprop="price"]')
obj_currency = Currency('//span[@itemprop="price"]')
obj_text = CleanHTML('//div[@itemprop="description"]')
obj_url = BrowserURL('housing', _id=Env('_id'))
obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img)
photos.append(HousingPhoto(url))
return photos
def obj_details(self):
details = dict()
for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
key = CleanText('./span[@class="name"]')(item)
value = CleanText('./span[@class="value"]')(item)
if value and key:
details[key] = value
key = CleanText('//div[@class="title-dpe clearfix"]')(self)
value = CleanText('//div[@class="energy-consumption"]')(self)
if value and key:
details[key] = value
return details