Flatisfy is your new companion to ease your search of a new housing :)

pages.py 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. # -*- coding: utf-8 -*-
  2. # Copyright(C) 2017 Phyks (Lucas Verney)
  3. #
  4. # This file is part of a weboob module.
  5. #
  6. # This weboob module is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU Affero General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This weboob module is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU Affero General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU Affero General Public License
  17. # along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
  18. from __future__ import unicode_literals
  19. import datetime
  20. from weboob.browser.pages import JsonPage, HTMLPage, pagination
  21. from weboob.browser.filters.standard import (
  22. CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError
  23. )
  24. from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound
  25. from weboob.browser.elements import ItemElement, ListElement, method
  26. from weboob.capabilities.base import NotAvailable, NotLoaded
  27. from weboob.capabilities.housing import (
  28. City, Housing, HousingPhoto,
  29. UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES
  30. )
  31. from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
  32. from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES
  33. class CitiesPage(JsonPage):
  34. def iter_cities(self):
  35. cities_list = self.doc
  36. if isinstance(self.doc, dict):
  37. cities_list = self.doc.values()
  38. for city in cities_list:
  39. city_obj = City()
  40. city_obj.id = city
  41. city_obj.name = city
  42. yield city_obj
  43. class HousingPage(HTMLPage):
  44. @method
  45. class get_housing(ItemElement):
  46. klass = Housing
  47. obj_id = Format(
  48. '%s:%s',
  49. Env('type'),
  50. Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')
  51. )
  52. obj_advert_type = ADVERT_TYPES.PROFESSIONAL
  53. def obj_type(self):
  54. type = Env('type')(self)
  55. if type == 'location':
  56. if 'appartement-meuble' in self.page.url:
  57. return POSTS_TYPES.FURNISHED_RENT
  58. else:
  59. return POSTS_TYPES.RENT
  60. elif type == 'achat':
  61. return POSTS_TYPES.SALE
  62. else:
  63. return NotAvailable
  64. def obj_url(self):
  65. return self.page.url
  66. def obj_house_type(self):
  67. url = self.obj_url()
  68. for house_type, types in QUERY_HOUSE_TYPES.items():
  69. for type in types:
  70. if ('/%s/' % type) in url:
  71. return house_type
  72. return NotAvailable
  73. obj_title = CleanText('//h1[has-class("OfferTop-title")]')
  74. obj_area = CleanDecimal(
  75. Regexp(
  76. CleanText(
  77. '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
  78. ),
  79. r'(\d*\.*\d*) .*',
  80. default=NotAvailable
  81. ),
  82. default=NotAvailable
  83. )
  84. obj_cost = CleanDecimal(
  85. '//span[has-class("OfferTop-price")]',
  86. default=NotAvailable
  87. )
  88. obj_price_per_meter = PricePerMeterFilter()
  89. obj_currency = Currency(
  90. '//span[has-class("OfferTop-price")]'
  91. )
  92. obj_location = Format(
  93. '%s - %s',
  94. CleanText('//p[@data-behat="adresseBien"]'),
  95. CleanText('//p[has-class("OfferTop-loc")]')
  96. )
  97. obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
  98. obj_phone = Regexp(
  99. Link(
  100. '//a[has-class("OfferContact-btn--tel")]'
  101. ),
  102. r'tel:(.*)'
  103. )
  104. def obj_photos(self):
  105. photos = []
  106. for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
  107. photo_url = Attr('.', 'src')(photo)
  108. photo_url = photo_url.replace('640/480', '800/600')
  109. photos.append(HousingPhoto(photo_url))
  110. return photos
  111. obj_date = datetime.date.today()
  112. def obj_utilities(self):
  113. price = CleanText(
  114. '//p[has-class("OfferTop-price")]'
  115. )(self)
  116. if "charges comprises" in price.lower():
  117. return UTILITIES.INCLUDED
  118. else:
  119. return UTILITIES.EXCLUDED
  120. obj_rooms = CleanDecimal(
  121. '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
  122. default=NotAvailable
  123. )
  124. obj_bedrooms = CleanDecimal(
  125. '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
  126. default=NotAvailable
  127. )
  128. def obj_DPE(self):
  129. try:
  130. electric_consumption = CleanDecimal(Regexp(
  131. Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
  132. r'https://dpe.foncia.net\/(\d+)\/.*'
  133. ))(self)
  134. except (RegexpError, XPathNotFound):
  135. electric_consumption = None
  136. DPE = ""
  137. if electric_consumption is not None:
  138. if electric_consumption <= 50:
  139. DPE = "A"
  140. elif 50 < electric_consumption <= 90:
  141. DPE = "B"
  142. elif 90 < electric_consumption <= 150:
  143. DPE = "C"
  144. elif 150 < electric_consumption <= 230:
  145. DPE = "D"
  146. elif 230 < electric_consumption <= 330:
  147. DPE = "E"
  148. elif 330 < electric_consumption <= 450:
  149. DPE = "F"
  150. else:
  151. DPE = "G"
  152. return getattr(ENERGY_CLASS, DPE, NotAvailable)
  153. return NotAvailable
  154. def obj_details(self):
  155. details = {}
  156. dispo = Date(
  157. Regexp(
  158. CleanText('//p[has-class("OfferTop-dispo")]'),
  159. r'.* (\d\d\/\d\d\/\d\d\d\d)',
  160. default=datetime.date.today().isoformat()
  161. )
  162. )(self)
  163. if dispo is not None:
  164. details["dispo"] = dispo
  165. priceMentions = CleanText(
  166. '//p[has-class("OfferTop-mentions")]',
  167. default=None
  168. )(self)
  169. if priceMentions is not None:
  170. details["priceMentions"] = priceMentions
  171. agency = CleanText(
  172. '//p[has-class("OfferContact-address")]',
  173. default=None
  174. )(self)
  175. if agency is not None:
  176. details["agency"] = agency
  177. for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'):
  178. category = CleanText(
  179. './h3[has-class("OfferDetails-title--2")]',
  180. default=None
  181. )(item)
  182. if not category:
  183. continue
  184. details[category] = {}
  185. for detail_item in item.xpath('.//ul[has-class("List--data")]/li'):
  186. detail_title = CleanText('.//span[has-class("List-data")]')(detail_item)
  187. detail_value = CleanText('.//*[has-class("List-value")]')(detail_item)
  188. details[category][detail_title] = detail_value
  189. for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'):
  190. detail_title = CleanText('.')(detail_item)
  191. details[category][detail_title] = True
  192. try:
  193. electric_consumption = CleanDecimal(Regexp(
  194. Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
  195. r'https://dpe.foncia.net\/(\d+)\/.*'
  196. ))(self)
  197. details["electric_consumption"] = (
  198. '{} kWhEP/m².an'.format(electric_consumption)
  199. )
  200. except (RegexpError, XPathNotFound):
  201. pass
  202. return details
  203. class SearchPage(HTMLPage):
  204. def do_search(self, query, cities):
  205. form = self.get_form('//form[@name="searchForm"]')
  206. form['searchForm[type]'] = QUERY_TYPES.get(query.type, None)
  207. form['searchForm[localisation]'] = cities
  208. form['searchForm[type_bien][]'] = []
  209. for house_type in query.house_types:
  210. try:
  211. form['searchForm[type_bien][]'].extend(
  212. QUERY_HOUSE_TYPES[house_type]
  213. )
  214. except KeyError:
  215. pass
  216. form['searchForm[type_bien][]'] = [
  217. x for x in form['searchForm[type_bien][]']
  218. if x in AVAILABLE_TYPES.get(query.type, [])
  219. ]
  220. if query.area_min:
  221. form['searchForm[surface_min]'] = query.area_min
  222. if query.area_max:
  223. form['searchForm[surface_max]'] = query.area_max
  224. if query.cost_min:
  225. form['searchForm[prix_min]'] = query.cost_min
  226. if query.cost_max:
  227. form['searchForm[prix_max]'] = query.cost_max
  228. if query.nb_rooms:
  229. form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)]
  230. form.submit()
  231. def find_housing(self, query_type, housing):
  232. form = self.get_form('//form[@name="searchForm"]')
  233. form['searchForm[type]'] = query_type
  234. form['searchForm[reference]'] = housing
  235. form.submit()
  236. class SearchResultsPage(HTMLPage):
  237. @pagination
  238. @method
  239. class iter_housings(ListElement):
  240. item_xpath = '//article[has-class("TeaserOffer")]'
  241. next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]')
  242. class item(ItemElement):
  243. klass = Housing
  244. obj_id = Format(
  245. '%s:%s',
  246. Env('type'),
  247. Attr('.//span[boolean(@data-reference)]', 'data-reference')
  248. )
  249. obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
  250. obj_type = Env('query_type')
  251. obj_advert_type = ADVERT_TYPES.PROFESSIONAL
  252. def obj_house_type(self):
  253. url = self.obj_url(self)
  254. for house_type, types in QUERY_HOUSE_TYPES.items():
  255. for type in types:
  256. if ('/%s/' % type) in url:
  257. return house_type
  258. return NotLoaded
  259. obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
  260. obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
  261. obj_area = CleanDecimal(
  262. Regexp(
  263. CleanText(
  264. './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
  265. ),
  266. r'(\d*\.*\d*) .*',
  267. default=NotAvailable
  268. ),
  269. default=NotAvailable
  270. )
  271. obj_cost = CleanDecimal(
  272. './/strong[has-class("TeaserOffer-price-num")]',
  273. default=NotAvailable
  274. )
  275. obj_price_per_meter = PricePerMeterFilter()
  276. obj_currency = Currency(
  277. './/strong[has-class("TeaserOffer-price-num")]'
  278. )
  279. obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
  280. obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')
  281. def obj_photos(self):
  282. url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self)
  283. # If the used photo is a default no photo, the src is on the same domain.
  284. if url[0] == '/':
  285. return []
  286. else:
  287. return [HousingPhoto(url)]
  288. obj_date = datetime.date.today()
  289. def obj_utilities(self):
  290. price = CleanText(
  291. './/strong[has-class("TeaserOffer-price-num")]'
  292. )(self)
  293. if "charges comprises" in price.lower():
  294. return UTILITIES.INCLUDED
  295. else:
  296. return UTILITIES.EXCLUDED
  297. obj_rooms = CleanDecimal(
  298. './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
  299. default=NotLoaded
  300. )
  301. obj_bedrooms = CleanDecimal(
  302. './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
  303. default=NotLoaded
  304. )
  305. def obj_details(self):
  306. return {
  307. "dispo": Date(
  308. Attr('.//span[boolean(@data-dispo)]', 'data-dispo',
  309. default=datetime.date.today().isoformat())
  310. )(self),
  311. "priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self)
  312. }