Flatisfy is your new companion to ease your search of a new housing :)

pages.py 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # -*- coding: utf-8 -*-
  2. # Copyright(C) 2012 Romain Bignon
  3. #
  4. # This file is part of a woob module.
  5. #
  6. # This woob module is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU Affero General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This woob module is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU Affero General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU Affero General Public License
  17. # along with this woob module. If not, see <http://www.gnu.org/licenses/>.
  18. from __future__ import unicode_literals
  19. from decimal import Decimal
  20. from woob.tools.date import parse_french_date
  21. from woob.browser.pages import HTMLPage, JsonPage, pagination
  22. from woob.browser.elements import ItemElement, ListElement, DictElement, method
  23. from woob.browser.filters.standard import (CleanText, CleanDecimal, Regexp,
  24. Env, BrowserURL, Format, Currency)
  25. from woob.browser.filters.html import Attr, Link, XPath, CleanHTML
  26. from woob.browser.filters.json import Dict
  27. from woob.capabilities.base import NotAvailable, NotLoaded
  28. from woob.capabilities.housing import (Housing, City, HousingPhoto,
  29. UTILITIES, ENERGY_CLASS, POSTS_TYPES,
  30. ADVERT_TYPES, HOUSE_TYPES)
  31. from woob.tools.capabilities.housing.housing import PricePerMeterFilter
  32. class CitiesPage(JsonPage):
  33. @method
  34. class iter_cities(DictElement):
  35. class item(ItemElement):
  36. klass = City
  37. obj_id = Dict('id')
  38. obj_name = Dict('name')
  39. class HousingPage(HTMLPage):
  40. @pagination
  41. @method
  42. class iter_housings(ListElement):
  43. item_xpath = '//div[has-class("search-list-item-alt")]'
  44. # Prevent DataError on same ids
  45. ignore_duplicate = True
  46. def next_page(self):
  47. return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self)
  48. class item(ItemElement):
  49. klass = Housing
  50. def condition(self):
  51. title = self.obj_title(self)
  52. isNotFurnishedOk = True
  53. if self.env['query_type'] == POSTS_TYPES.RENT:
  54. isNotFurnishedOk = 'meublé' not in title.lower()
  55. id = self.obj_id(self)
  56. if id is None:
  57. return False
  58. return (
  59. Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and
  60. isNotFurnishedOk
  61. )
  62. def parse(self, el):
  63. rooms_bedrooms_area = el.xpath(
  64. './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
  65. )
  66. self.env['rooms'] = NotLoaded
  67. self.env['bedrooms'] = NotLoaded
  68. self.env['area'] = NotLoaded
  69. for item in rooms_bedrooms_area:
  70. name = CleanText('.')(item)
  71. if 'chambre' in name.lower():
  72. name = 'bedrooms'
  73. value = CleanDecimal('.')(item)
  74. elif 'pièce' in name.lower():
  75. name = 'rooms'
  76. value = CleanDecimal('.')(item)
  77. else:
  78. name = 'area'
  79. value = CleanDecimal(
  80. Regexp(
  81. CleanText(
  82. '.'
  83. ),
  84. r'(\d*\.*\d*) .*'
  85. )
  86. )(item)
  87. self.env[name] = value
  88. obj_id = Regexp(
  89. Link('./div/a[has-class("item-title")]'), '/annonces/(.*)',
  90. default=None
  91. )
  92. obj_type = Env('query_type')
  93. obj_advert_type = ADVERT_TYPES.PERSONAL
  94. def obj_house_type(self):
  95. item_link = Link('./div/a[@class="item-title"]')(self)
  96. house_type = item_link.split('/')[-1].split('-')[0]
  97. if 'parking' in house_type:
  98. return HOUSE_TYPES.PARKING
  99. elif 'appartement' in house_type:
  100. return HOUSE_TYPES.APART
  101. elif 'terrain' in house_type:
  102. return HOUSE_TYPES.LAND
  103. elif 'maison' in house_type:
  104. return HOUSE_TYPES.HOUSE
  105. else:
  106. return HOUSE_TYPES.OTHER
  107. obj_title = CleanText('./div/a[has-class("item-title")]')
  108. obj_area = Env('area')
  109. obj_cost = CleanDecimal(CleanText('./div/a[has-class("item-title")]/span[@class="item-price"]'),
  110. replace_dots=True, default=Decimal(0))
  111. obj_currency = Currency(
  112. './div/a[@class="item-title"]/span[@class="item-price"]'
  113. )
  114. obj_utilities = UTILITIES.UNKNOWN
  115. obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded)
  116. def obj_location(self):
  117. return CleanText('./div/p[@class="item-description"]')(self).split(".")[0]
  118. obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')])
  119. obj_rooms = Env('rooms')
  120. obj_bedrooms = Env('bedrooms')
  121. obj_price_per_meter = PricePerMeterFilter()
  122. obj_url = Format(
  123. u'http://www.pap.fr%s',
  124. Link('./div/a[@class="item-title"]')
  125. )
  126. def obj_photos(self):
  127. photos = []
  128. for img in XPath('./a/img/@src')(self):
  129. if(
  130. img.endswith("visuel-nophoto.png") or
  131. img.endswith('miniature-video.png')
  132. ):
  133. continue
  134. photos.append(HousingPhoto(u'%s' % img))
  135. return photos
  136. @method
  137. class get_housing(ItemElement):
  138. klass = Housing
  139. def parse(self, el):
  140. rooms_bedrooms_area = el.xpath(
  141. './/ul[has-class("item-tags")]/li'
  142. )
  143. self.env['rooms'] = NotAvailable
  144. self.env['bedrooms'] = NotAvailable
  145. self.env['area'] = NotAvailable
  146. for item in rooms_bedrooms_area:
  147. name = CleanText('.')(item)
  148. if 'chambre' in name.lower():
  149. name = 'bedrooms'
  150. value = CleanDecimal('./strong')(item)
  151. elif 'pièce' in name.lower():
  152. name = 'rooms'
  153. value = CleanDecimal('./strong')(item)
  154. elif ' m²' in name and 'le m²' not in name:
  155. name = 'area'
  156. value = CleanDecimal(
  157. Regexp(
  158. CleanText(
  159. '.'
  160. ),
  161. r'(\d*\.*\d*) .*'
  162. )
  163. )(item)
  164. self.env[name] = value
  165. obj_id = Env('_id')
  166. def obj_type(self):
  167. prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
  168. if 'location' in prev_link:
  169. title = self.obj_title(self)
  170. if 'meublé' in title.lower():
  171. return POSTS_TYPES.FURNISHED_RENT
  172. else:
  173. return POSTS_TYPES.RENT
  174. elif 'vente' in prev_link:
  175. return POSTS_TYPES.SALE
  176. elif 'viager' in prev_link:
  177. return POSTS_TYPES.VIAGER
  178. else:
  179. return NotAvailable
  180. obj_advert_type = ADVERT_TYPES.PERSONAL
  181. def obj_house_type(self):
  182. prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
  183. house_type = prev_link.split('-')[-1]
  184. if 'parking' in house_type:
  185. return HOUSE_TYPES.PARKING
  186. elif 'appartement' in house_type:
  187. return HOUSE_TYPES.APART
  188. elif 'terrain' in house_type:
  189. return HOUSE_TYPES.LAND
  190. elif 'maison' in house_type:
  191. return HOUSE_TYPES.HOUSE
  192. else:
  193. return HOUSE_TYPES.OTHER
  194. obj_title = CleanText(
  195. '//h1[@class="item-title"]'
  196. )
  197. obj_cost = CleanDecimal(
  198. '//h1[@class="item-title"]/span[@class="item-price"]',
  199. replace_dots=True
  200. )
  201. obj_currency = Currency(
  202. '//h1[@class="item-title"]/span[@class="item-price"]'
  203. )
  204. obj_utilities = UTILITIES.UNKNOWN
  205. obj_area = Env('area')
  206. def obj_date(self):
  207. date = CleanText(
  208. '//p[@class="item-date"]'
  209. )(self).split("/")[-1].strip()
  210. return parse_french_date(date)
  211. obj_rooms = Env('rooms')
  212. obj_bedrooms = Env('bedrooms')
  213. obj_price_per_meter = PricePerMeterFilter()
  214. obj_location = CleanText('//div[has-class("item-description")]/h2')
  215. obj_text = CleanText(CleanHTML('//div[has-class("item-description")]/div/p'))
  216. def obj_station(self):
  217. return ", ".join([
  218. station.text
  219. for station in XPath(
  220. '//ul[has-class("item-transports")]//span[has-class("label")]'
  221. )(self)
  222. ])
  223. def obj_phone(self):
  224. phone = CleanText('(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]')(self)
  225. phone = phone.replace(' ', ', ')
  226. return phone
  227. obj_url = BrowserURL('housing', _id=Env('_id'))
  228. def obj_DPE(self):
  229. DPE = Attr(
  230. '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
  231. 'class',
  232. default=""
  233. )(self)
  234. if DPE:
  235. DPE = [x.replace("energy-rank-", "").upper()
  236. for x in DPE.split() if x.startswith("energy-rank-")][0]
  237. return getattr(ENERGY_CLASS, DPE, NotAvailable)
  238. def obj_photos(self):
  239. photos = []
  240. for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self):
  241. if not img.endswith('miniature-video.png'):
  242. photos.append(HousingPhoto(u'%s' % img))
  243. return photos