Browse Source

Fix SeLoger

Phyks (Lucas Verney) 7 months ago
parent
commit
f442d91188
1 changed files with 49 additions and 50 deletions
  1. 49
    50
      modules/seloger/pages.py

+ 49
- 50
modules/seloger/pages.py View File

@@ -34,6 +34,7 @@ from woob.tools.json import json
34 34
 from woob.exceptions import ActionNeeded
35 35
 from .constants import TYPES, RET
36 36
 import codecs
37
+import decimal
37 38
 
38 39
 
39 40
 class ErrorPage(HTMLPage):
@@ -66,6 +67,8 @@ class SearchResultsPage(HTMLPage):
66 67
     @method
67 68
     class iter_housings(DictElement):
68 69
         item_xpath = 'cards/list'
70
+        # Prevent DataError on same ids
71
+        ignore_duplicate = True
69 72
 
70 73
         def next_page(self):
71 74
             page_nb = Dict('navigation/pagination/page')(self)
@@ -136,29 +139,38 @@ class SearchResultsPage(HTMLPage):
136 139
 
137 140
 
138 141
 class HousingPage(HTMLPage):
142
+    def __init__(self, *args, **kwargs):
143
+        HTMLPage.__init__(self, *args, **kwargs)
144
+        json_content = Regexp(
145
+            CleanText('//script'),
146
+            r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);"
147
+        )(self.doc)
148
+        json_content = codecs.unicode_escape_decode(json_content)[0]
149
+        json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
150
+        self.doc = {
151
+            "advert": json.loads(json_content).get('advert', {}).get('mainAdvert', {}),
152
+            "agency": json.loads(json_content).get('agency', {})
153
+        }
154
+
139 155
     @method
140 156
     class get_housing(ItemElement):
141 157
         klass = Housing
142 158
 
143 159
         def parse(self, el):
144
-            json_content = Regexp(CleanText('//script'), "var ava_data = ({.+?});")(self)
145
-            json_content = json_content.replace("logged", "\"logged\"")
146
-            json_content = json_content.replace("lengthcarrousel", "\"lengthcarrousel\"")
147
-            json_content = json_content.replace("products", "\"products\"")
148
-            json_content = json_content.replace("// // ANNONCES_SIMILAIRE / RECO", "")
149
-            self.house_json_datas = json.loads(json_content)['products'][0]
160
+            self.agency_doc = el['agency']
161
+            self.el = el['advert']
150 162
 
151
-        obj_id = CleanText('//form[@name="central"]/input[@name="idannonce"]/@value')
163
+        obj_id = Dict('id')
152 164
 
153 165
         def obj_house_type(self):
154
-            naturebien = CleanText('//form[@name="central"]/input[@name="naturebien"]/@value')(self)
166
+            naturebien = Dict('propertyNatureId')(self)
155 167
             try:
156 168
                 return next(k for k, v in RET.items() if v == naturebien)
157 169
             except StopIteration:
158 170
                 return NotLoaded
159 171
 
160 172
         def obj_type(self):
161
-            idType = int(CleanText('//form[@name="central"]/input[@name="idtt"]/@value')(self))
173
+            idType = Dict('idTransactionType')(self)
162 174
             type = next(k for k, v in TYPES.items() if v == idType)
163 175
             if type == POSTS_TYPES.FURNISHED_RENT:
164 176
                 # SeLoger does not let us discriminate between furnished and not furnished.
@@ -166,12 +178,7 @@ class HousingPage(HTMLPage):
166 178
             return type
167 179
 
168 180
         def obj_advert_type(self):
169
-            is_agency = (
170
-                CleanText('//form[@name="central"]/input[@name="nomagance"]/@value')(self) or
171
-                CleanText('//form[@name="central"]/input[@name="urlagence"]/@value')(self) or
172
-                CleanText('//form[@name="central"]/input[@name="adresseagence"]/@value')(self)
173
-            )
174
-            if is_agency:
181
+            if 'Agences' in self.agency_doc['type']:
175 182
                 return ADVERT_TYPES.PROFESSIONAL
176 183
             else:
177 184
                 return ADVERT_TYPES.PERSONAL
@@ -179,58 +186,50 @@ class HousingPage(HTMLPage):
179 186
         def obj_photos(self):
180 187
             photos = []
181 188
 
182
-            for photo in XPath('//div[@class="carrousel_slide"]/img/@src')(self):
183
-                photos.append(HousingPhoto("https:{}".format(photo)))
184
-
185
-            for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')(self):
186
-                p = json.loads(photo)
187
-                photos.append(HousingPhoto("https:{}".format(p['url'])))
189
+            for photo in Dict('photoList')(self):
190
+                photos.append(HousingPhoto("https:{}".format(photo['fullscreenUrl'])))
188 191
 
189 192
             return photos
190 193
 
191
-        obj_title = CleanText('//title[1]')
194
+        obj_title = Dict('title')
192 195
 
193 196
         def obj_location(self):
194
-            quartier = Regexp(CleanText('//script'),
195
-                              r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
196
-            ville = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
197
-            ville = ville if ville else ''
198
-            cp = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
199
-            cp = cp if cp else ''
200
-            return u'%s %s (%s)' % (quartier, ville, cp)
197
+            address = Dict('address')(self)
198
+            return u'%s %s (%s)' % (address['neighbourhood'], address['city'],
199
+                                   address['zipCode'])
201 200
 
202 201
         def obj_address(self):
202
+            address = Dict('address')(self)
203 203
             p = PostalAddress()
204
-
205
-            p.street = Regexp(CleanText('//script'),
206
-                              r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
207
-            p.postal_code = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
208
-            p.city = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
204
+            p.street = address['street']
205
+            p.postal_code = address['zipCode']
206
+            p.city = address['city']
209 207
             p.full_address = Field('location')(self)
210 208
             return p
211 209
 
212
-        obj_text = CleanText('//form[@name="central"]/input[@name="description"]/@value')
210
+        obj_text = Dict('description')
211
+
212
+        def obj_cost(self):
213
+            propertyPrice = Dict('propertyPrice')(self)
214
+            return decimal.Decimal(propertyPrice['prix'])
215
+        def obj_currency(self):
216
+            propertyPrice = Dict('propertyPrice')(self)
217
+            return propertyPrice['priceUnit']
213 218
 
214
-        obj_cost = CleanDecimal(CleanText('//a[@id="price"]'), default=NotLoaded)
215
-        obj_currency = Currency(CleanText('//a[@id="price"]'), default=NotLoaded)
216 219
         obj_price_per_meter = PricePerMeterFilter()
217 220
 
218
-        obj_area = CleanDecimal('//form[@name="central"]/input[@name="surface"]/@value', replace_dots=True)
219
-        obj_url = CleanText('//form[@name="central"]/input[@name="urlannonce"]/@value')
220
-        obj_phone = CleanText('//div[@class="data-action"]/a[@data-phone]/@data-phone')
221
+        obj_area = CleanDecimal(Dict('surface'))
222
+        def obj_url(self):
223
+            return self.page.url
224
+        def obj_phone(self):
225
+            return self.agency_doc.get('agencyPhoneNumber', {}).get('value',
226
+                                                                    NotAvailable)
221 227
 
222 228
         def obj_utilities(self):
223
-            mention = CleanText('//span[@class="detail_indice_prix"]', default="")(self)
224
-            if "(CC) Loyer mensuel charges comprises" in mention:
225
-                return UTILITIES.INCLUDED
226
-            else:
227
-                return UTILITIES.UNKNOWN
228
-
229
-        def obj_bedrooms(self):
230
-            return CleanDecimal(Dict('nb_chambres', default=NotLoaded))(self.house_json_datas)
229
+            return NotLoaded  # TODO
231 230
 
232
-        def obj_rooms(self):
233
-            return CleanDecimal(Dict('nb_pieces', default=NotLoaded))(self.house_json_datas)
231
+        obj_bedrooms = CleanDecimal(Dict('bedroomCount'))
232
+        obj_rooms = CleanDecimal(Dict('numberOfRooms'))
234 233
 
235 234
 
236 235
 class HousingJsonPage(JsonPage):