flatisfy/modules/seloger/pages.py

269 lines
9.6 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
from woob.browser.pages import JsonPage, pagination, HTMLPage
from woob.browser.elements import ItemElement, DictElement, method
from woob.browser.filters.json import Dict
from woob.browser.filters.html import XPath
from woob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
Env, Regexp, Field, BrowserURL)
from woob.capabilities.base import NotAvailable, NotLoaded
from woob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES)
from woob.capabilities.address import PostalAddress
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
from woob.tools.json import json
from woob.exceptions import ActionNeeded
from .constants import TYPES, RET
import codecs
import decimal
class ErrorPage(HTMLPage):
def on_load(self):
raise ActionNeeded("Please resolve the captcha")
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
ignore_duplicate = True
class item(ItemElement):
klass = City
obj_id = Dict('Params/ci')
obj_name = Dict('Display')
class SearchResultsPage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
json_content = Regexp(CleanText('//script'),
r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]")(self.doc)
json_content = codecs.unicode_escape_decode(json_content)[0]
json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
self.doc = json.loads(json_content)
@pagination
@method
class iter_housings(DictElement):
item_xpath = 'cards/list'
# Prevent DataError on same ids
ignore_duplicate = True
def next_page(self):
page_nb = Dict('navigation/pagination/page')(self)
max_results = Dict('navigation/counts/count')(self)
results_per_page = Dict('navigation/pagination/resultsPerPage')(self)
if int(max_results) / int(results_per_page) > int(page_nb):
return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
# TODO handle bellesdemeures
class item(ItemElement):
klass = Housing
def condition(self):
return (
Dict('cardType')(self) not in ['advertising', 'ali', 'localExpert']
and Dict('id', default=False)(self)
and Dict('classifiedURL', default=False)(self)
)
obj_id = Dict('id')
def obj_type(self):
idType = int(Env('query_type')(self))
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return type
def obj_title(self):
return "{} - {} - {}".format(Dict('estateType')(self),
" / ".join(Dict('tags')(self)),
Field('location')(self))
def obj_advert_type(self):
is_agency = Dict('contact/agencyId', default=False)(self)
if is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
obj_utilities = UTILITIES.EXCLUDED
def obj_photos(self):
photos = []
for photo in Dict('photos')(self):
photos.append(HousingPhoto(photo))
return photos
def obj_location(self):
quartier = Dict('districtLabel')(self)
quartier = quartier if quartier else ''
ville = Dict('cityLabel')(self)
ville = ville if ville else ''
cp = Dict('zipCode')(self)
cp = cp if cp else ''
return u'%s %s (%s)' % (quartier, ville, cp)
obj_url = Dict('classifiedURL')
obj_text = Dict('description')
obj_cost = CleanDecimal(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
obj_currency = Currency(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
obj_price_per_meter = CleanDecimal(Dict('pricing/squareMeterPrice'), default=PricePerMeterFilter)
class HousingPage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
json_content = Regexp(
CleanText('//script'),
r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);"
)(self.doc)
json_content = codecs.unicode_escape_decode(json_content)[0]
json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
self.doc = {
"advert": json.loads(json_content).get('advert', {}).get('mainAdvert', {}),
"agency": json.loads(json_content).get('agency', {})
}
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
self.agency_doc = el['agency']
self.el = el['advert']
obj_id = Dict('id')
def obj_house_type(self):
naturebien = Dict('propertyNatureId')(self)
try:
return next(k for k, v in RET.items() if v == naturebien)
except StopIteration:
return NotLoaded
def obj_type(self):
idType = Dict('idTransactionType')(self)
try:
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return type
except StopIteration:
return NotAvailable
def obj_advert_type(self):
if 'Agences' in self.agency_doc['type']:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_photos(self):
photos = []
for photo in Dict('photoList')(self):
photos.append(HousingPhoto("https:{}".format(photo['fullscreenUrl'])))
return photos
obj_title = Dict('title')
def obj_location(self):
address = Dict('address')(self)
return u'%s %s (%s)' % (address['neighbourhood'], address['city'],
address['zipCode'])
def obj_address(self):
address = Dict('address')(self)
p = PostalAddress()
p.street = address['street']
p.postal_code = address['zipCode']
p.city = address['city']
p.full_address = Field('location')(self)
return p
obj_text = Dict('description')
def obj_cost(self):
propertyPrice = Dict('propertyPrice')(self)
return decimal.Decimal(propertyPrice['prix'])
def obj_currency(self):
propertyPrice = Dict('propertyPrice')(self)
return propertyPrice['priceUnit']
obj_price_per_meter = PricePerMeterFilter()
obj_area = CleanDecimal(Dict('surface'))
def obj_url(self):
return self.page.url
def obj_phone(self):
return self.agency_doc.get('agencyPhoneNumber', {}).get('value',
NotAvailable)
def obj_utilities(self):
return NotLoaded # TODO
obj_bedrooms = CleanDecimal(Dict('bedroomCount'))
obj_rooms = CleanDecimal(Dict('numberOfRooms'))
class HousingJsonPage(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def obj_DPE(self):
DPE = Dict("energie", default="")(self)
if DPE['status'] > 0:
return NotAvailable
else:
return getattr(ENERGY_CLASS, DPE['lettre'], NotAvailable)
def obj_GES(self):
GES = Dict("ges", default="")(self)
if GES['status'] > 0:
return NotAvailable
else:
return getattr(ENERGY_CLASS, GES['lettre'], NotAvailable)
def obj_details(self):
details = {}
for c in Dict('categories')(self):
if c['criteria']:
details[c['name']] = ' / '.join([_['value'] for _ in c['criteria']])
for _, c in Dict('infos_acquereur')(self).items():
for key, value in c.items():
details[key] = value
return details