263 lines
10 KiB
Python
263 lines
10 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2012 Romain Bignon
|
|
#
|
|
# This file is part of a woob module.
|
|
#
|
|
# This woob module is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This woob module is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
from woob.browser.pages import JsonPage, pagination, HTMLPage
|
|
from woob.browser.elements import ItemElement, DictElement, method
|
|
from woob.browser.filters.json import Dict
|
|
from woob.browser.filters.html import XPath
|
|
from woob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
|
|
Env, Regexp, Field, BrowserURL)
|
|
from woob.capabilities.base import NotAvailable, NotLoaded
|
|
from woob.capabilities.housing import (Housing, HousingPhoto, City,
|
|
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
|
|
ADVERT_TYPES)
|
|
from woob.capabilities.address import PostalAddress
|
|
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
|
|
from woob.tools.json import json
|
|
from woob.exceptions import ActionNeeded
|
|
from .constants import TYPES, RET
|
|
import codecs
|
|
|
|
|
|
class ErrorPage(HTMLPage):
|
|
def on_load(self):
|
|
raise ActionNeeded("Please resolve the captcha")
|
|
|
|
|
|
class CitiesPage(JsonPage):
|
|
@method
|
|
class iter_cities(DictElement):
|
|
ignore_duplicate = True
|
|
|
|
class item(ItemElement):
|
|
klass = City
|
|
|
|
obj_id = Dict('Params/ci')
|
|
obj_name = Dict('Display')
|
|
|
|
|
|
class SearchResultsPage(HTMLPage):
|
|
def __init__(self, *args, **kwargs):
|
|
HTMLPage.__init__(self, *args, **kwargs)
|
|
json_content = Regexp(CleanText('//script'),
|
|
r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]")(self.doc)
|
|
json_content = codecs.unicode_escape_decode(json_content)[0]
|
|
json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
|
|
self.doc = json.loads(json_content)
|
|
|
|
@pagination
|
|
@method
|
|
class iter_housings(DictElement):
|
|
item_xpath = 'cards/list'
|
|
|
|
def next_page(self):
|
|
page_nb = Dict('navigation/pagination/page')(self)
|
|
max_results = Dict('navigation/pagination/maxResults')(self)
|
|
results_per_page = Dict('navigation/pagination/resultsPerPage')(self)
|
|
|
|
if int(max_results) / int(results_per_page) > int(page_nb):
|
|
return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
|
|
|
|
# TODO handle bellesdemeures
|
|
|
|
class item(ItemElement):
|
|
klass = Housing
|
|
|
|
def condition(self):
|
|
return Dict('cardType')(self) not in ['advertising', 'localExpert'] and Dict('id', default=False)(self)
|
|
|
|
obj_id = Dict('id')
|
|
|
|
def obj_type(self):
|
|
idType = int(Env('query_type')(self))
|
|
type = next(k for k, v in TYPES.items() if v == idType)
|
|
if type == POSTS_TYPES.FURNISHED_RENT:
|
|
# SeLoger does not let us discriminate between furnished and not furnished.
|
|
return POSTS_TYPES.RENT
|
|
return type
|
|
|
|
def obj_title(self):
|
|
return "{} - {} - {}".format(Dict('estateType')(self),
|
|
" / ".join(Dict('tags')(self)),
|
|
Field('location')(self))
|
|
|
|
def obj_advert_type(self):
|
|
is_agency = Dict('contact/agencyId', default=False)(self)
|
|
if is_agency:
|
|
return ADVERT_TYPES.PROFESSIONAL
|
|
else:
|
|
return ADVERT_TYPES.PERSONAL
|
|
|
|
obj_utilities = UTILITIES.EXCLUDED
|
|
|
|
def obj_photos(self):
|
|
photos = []
|
|
for photo in Dict('photos')(self):
|
|
photos.append(HousingPhoto(photo))
|
|
return photos
|
|
|
|
def obj_location(self):
|
|
quartier = Dict('districtLabel')(self)
|
|
quartier = quartier if quartier else ''
|
|
ville = Dict('cityLabel')(self)
|
|
ville = ville if ville else ''
|
|
cp = Dict('zipCode')(self)
|
|
cp = cp if cp else ''
|
|
return u'%s %s (%s)' % (quartier, ville, cp)
|
|
|
|
obj_url = Dict('classifiedURL')
|
|
|
|
obj_text = Dict('description')
|
|
|
|
obj_cost = CleanDecimal(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
|
|
obj_currency = Currency(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
|
|
obj_price_per_meter = CleanDecimal(Dict('pricing/squareMeterPrice'), default=PricePerMeterFilter)
|
|
|
|
|
|
class HousingPage(HTMLPage):
|
|
@method
|
|
class get_housing(ItemElement):
|
|
klass = Housing
|
|
|
|
def parse(self, el):
|
|
json_content = Regexp(CleanText('//script'), "var ava_data = ({.+?});")(self)
|
|
json_content = json_content.replace("logged", "\"logged\"")
|
|
json_content = json_content.replace("lengthcarrousel", "\"lengthcarrousel\"")
|
|
json_content = json_content.replace("products", "\"products\"")
|
|
json_content = json_content.replace("// // ANNONCES_SIMILAIRE / RECO", "")
|
|
self.house_json_datas = json.loads(json_content)['products'][0]
|
|
|
|
obj_id = CleanText('//form[@name="central"]/input[@name="idannonce"]/@value')
|
|
|
|
def obj_house_type(self):
|
|
naturebien = CleanText('//form[@name="central"]/input[@name="naturebien"]/@value')(self)
|
|
try:
|
|
return next(k for k, v in RET.items() if v == naturebien)
|
|
except StopIteration:
|
|
return NotLoaded
|
|
|
|
def obj_type(self):
|
|
idType = int(CleanText('//form[@name="central"]/input[@name="idtt"]/@value')(self))
|
|
type = next(k for k, v in TYPES.items() if v == idType)
|
|
if type == POSTS_TYPES.FURNISHED_RENT:
|
|
# SeLoger does not let us discriminate between furnished and not furnished.
|
|
return POSTS_TYPES.RENT
|
|
return type
|
|
|
|
def obj_advert_type(self):
|
|
is_agency = (
|
|
CleanText('//form[@name="central"]/input[@name="nomagance"]/@value')(self) or
|
|
CleanText('//form[@name="central"]/input[@name="urlagence"]/@value')(self) or
|
|
CleanText('//form[@name="central"]/input[@name="adresseagence"]/@value')(self)
|
|
)
|
|
if is_agency:
|
|
return ADVERT_TYPES.PROFESSIONAL
|
|
else:
|
|
return ADVERT_TYPES.PERSONAL
|
|
|
|
def obj_photos(self):
|
|
photos = []
|
|
|
|
for photo in XPath('//div[@class="carrousel_slide"]/img/@src')(self):
|
|
photos.append(HousingPhoto("https:{}".format(photo)))
|
|
|
|
for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')(self):
|
|
p = json.loads(photo)
|
|
photos.append(HousingPhoto("https:{}".format(p['url'])))
|
|
|
|
return photos
|
|
|
|
obj_title = CleanText('//title[1]')
|
|
|
|
def obj_location(self):
|
|
quartier = Regexp(CleanText('//script'),
|
|
r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
|
|
ville = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
|
|
ville = ville if ville else ''
|
|
cp = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
|
|
cp = cp if cp else ''
|
|
return u'%s %s (%s)' % (quartier, ville, cp)
|
|
|
|
def obj_address(self):
|
|
p = PostalAddress()
|
|
|
|
p.street = Regexp(CleanText('//script'),
|
|
r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
|
|
p.postal_code = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
|
|
p.city = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
|
|
p.full_address = Field('location')(self)
|
|
return p
|
|
|
|
obj_text = CleanText('//form[@name="central"]/input[@name="description"]/@value')
|
|
|
|
obj_cost = CleanDecimal(CleanText('//a[@id="price"]'), default=NotLoaded)
|
|
obj_currency = Currency(CleanText('//a[@id="price"]'), default=NotLoaded)
|
|
obj_price_per_meter = PricePerMeterFilter()
|
|
|
|
obj_area = CleanDecimal('//form[@name="central"]/input[@name="surface"]/@value', replace_dots=True)
|
|
obj_url = CleanText('//form[@name="central"]/input[@name="urlannonce"]/@value')
|
|
obj_phone = CleanText('//div[@class="data-action"]/a[@data-phone]/@data-phone')
|
|
|
|
def obj_utilities(self):
|
|
mention = CleanText('//span[@class="detail_indice_prix"]', default="")(self)
|
|
if "(CC) Loyer mensuel charges comprises" in mention:
|
|
return UTILITIES.INCLUDED
|
|
else:
|
|
return UTILITIES.UNKNOWN
|
|
|
|
def obj_bedrooms(self):
|
|
return CleanDecimal(Dict('nb_chambres', default=NotLoaded))(self.house_json_datas)
|
|
|
|
def obj_rooms(self):
|
|
return CleanDecimal(Dict('nb_pieces', default=NotLoaded))(self.house_json_datas)
|
|
|
|
|
|
class HousingJsonPage(JsonPage):
|
|
@method
|
|
class get_housing(ItemElement):
|
|
klass = Housing
|
|
|
|
def obj_DPE(self):
|
|
DPE = Dict("energie", default="")(self)
|
|
if DPE['status'] > 0:
|
|
return NotAvailable
|
|
else:
|
|
return getattr(ENERGY_CLASS, DPE['lettre'], NotAvailable)
|
|
|
|
def obj_GES(self):
|
|
GES = Dict("ges", default="")(self)
|
|
if GES['status'] > 0:
|
|
return NotAvailable
|
|
else:
|
|
return getattr(ENERGY_CLASS, GES['lettre'], NotAvailable)
|
|
|
|
def obj_details(self):
|
|
details = {}
|
|
|
|
for c in Dict('categories')(self):
|
|
if c['criteria']:
|
|
details[c['name']] = ' / '.join([_['value'] for _ in c['criteria']])
|
|
|
|
for _, c in Dict('infos_acquereur')(self).items():
|
|
for key, value in c.items():
|
|
details[key] = value
|
|
|
|
return details
|