Merge branch 'moar-tests' into 'master'

More duplicates tests

See merge request phyks/Flatisfy!23
This commit is contained in:
Phyks 2018-01-26 10:34:44 +01:00
commit e7218e90f3
15 changed files with 603 additions and 62 deletions

View File

@ -267,27 +267,6 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
assert flat1_phone == flat2_phone
n_common_items += 10 # Counts much more than the rest
# They should have at least one photo in common if there
# are some photos
if flat1.get("photos", []) and flat2.get("photos", []):
n_common_photos = find_number_common_photos(
flat1["photos"],
flat2["photos"],
photo_cache,
hash_threshold
)
assert n_common_photos > 1
min_number_photos = min(len(flat1["photos"]),
len(flat2["photos"]))
# Either all the photos are the same, or there are at least
# three common photos.
if n_common_photos == min_number_photos:
n_common_items += 15
else:
n_common_items += 5 * min(n_common_photos, 3)
# If the two flats are from the same website and have a
# different float part, consider they cannot be duplicates. See
# https://framagit.org/phyks/Flatisfy/issues/100.
@ -302,6 +281,24 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
)
if both_have_float_part and both_are_from_same_backend:
assert both_have_equal_float_part
if flat1.get("photos", []) and flat2.get("photos", []):
n_common_photos = find_number_common_photos(
flat1["photos"],
flat2["photos"],
photo_cache,
hash_threshold
)
min_number_photos = min(len(flat1["photos"]),
len(flat2["photos"]))
# Either all the photos are the same, or there are at least
# three common photos.
if n_common_photos == min_number_photos:
n_common_items += 15
else:
n_common_items += 5 * min(n_common_photos, 3)
except (AssertionError, TypeError):
# Skip and consider as not duplicates whenever the conditions
# are not met

View File

@ -0,0 +1,52 @@
{
"id": "122509451@seloger",
"url": "http://www.seloger.com/annonces/achat/appartement/rennes-35/cleunay-arsenal-redon/122509451.htm?p=",
"title": "Appartement 3 pi\u00e8ces 78m\u00b2 - Rennes",
"area": 78,
"cost": 211000,
"price_per_meter": 2705.128205128205128205128205,
"currency": "\u20ac",
"utilities": "",
"date": "2018-01-19T22:39:00",
"location": " Rennes (35000)",
"station": "Arsenal - Redon",
"text": "Appartement quartier Arsenal Redon, \u00e0 vendre, type 3 de 78 m\u00b2. Il se compose d'une entr\u00e9e, d'un salon-s\u00e9jour lumineux de 33 m\u00b2 orient\u00e9 sud donnant sur une terrasse, de deux chambres, d'une cuisine ind\u00e9pendante, d'une salle de bains et d'un toilette. Vous disposerez d'un garage ferm\u00e9. Situ\u00e9 entre le centre ville et la future station m\u00e9tro Mabilais (ligne B), proximit\u00e9 imm\u00e9diate des commerces, \u00e9coles.. Bien soumis au statut de la copropri\u00e9t\u00e9. Charges annuelles courantes: 962e Agence immobili\u00e8re ERA Rennes Aristide Briand Agent Commercial: Guillaume DE KERANFLECH RSAC: 818942955.",
"phone": null,
"photos": [{
"id": "0oj57y4pvtz7537ibvjq1agi9hrpctm96o30wknpc.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/o/j/5/0oj57y4pvtz7537ibvjq1agi9hrpctm96o30wknpc.jpg",
"data": null
}, {
"id": "0s0kr6fw0hbqkwm5m2oxhi8yysk6mfxb9ctcrx2bk.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/s/0/k/0s0kr6fw0hbqkwm5m2oxhi8yysk6mfxb9ctcrx2bk.jpg",
"data": null
}, {
"id": "0z8q9eq4rprqfymp0mpcezrq6kxp8uxknf5pgrx8g.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/z/8/q/0z8q9eq4rprqfymp0mpcezrq6kxp8uxknf5pgrx8g.jpg",
"data": null
}, {
"id": "01ti2ovzcuyx4e14qfqqgatynges1grnalb4eau4g.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/1/t/i/01ti2ovzcuyx4e14qfqqgatynges1grnalb4eau4g.jpg",
"data": null
}, {
"id": "250ckvp15x8eeetuynem2kj7x8z12y66kay9okf0g.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/2/5/0/c/250ckvp15x8eeetuynem2kj7x8z12y66kay9okf0g.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"Box": "1",
"Cuisine": "s\u00e9par\u00e9e",
"Pi\u00e8ces": "3",
"Etage": "RDC",
"Reference": "872GK-01",
"Chambres": "2",
"Chauffage": "individuel",
"Entr\u00e9e": "",
"Surface": "78 m\u00b2",
"Terrasse": "1",
"Etages": "5",
"Salle de S\u00e9jour": ""
}
}

View File

@ -0,0 +1,60 @@
{
"id": "123312807@seloger",
"url": "http://www.seloger.com/annonces/achat/appartement/rennes-35/brequigny/123312807.htm?p=",
"title": "Appartement 3 pi\u00e8ces 58m\u00b2 - Rennes",
"area": 58,
"cost": 131440,
"price_per_meter": 2266.206896551724137931034483,
"currency": "\u20ac",
"utilities": "",
"date": "2018-01-20T22:35:00",
"location": " Rennes (35200)",
"station": "Cl\u00e9menceau",
"text": "OGIMM vous propose \u00e0 l'achat un appartement de type 3 dans une petite copropri\u00e9t\u00e9 de 4 \u00e9tages. Bien situ\u00e9, proche du boulevard Cl\u00e9menceau et des Bus C5 et C3, de la rue de Nantes, il est en tr\u00e8s bon \u00e9tat et au calme. Il est compos\u00e9 de: une entr\u00e9e avec placards, une cuisine s\u00e9par\u00e9e am\u00e9nag\u00e9e et \u00e9quip\u00e9e (possibilit\u00e9 d'ouverture), d'un balcon loggia, d'un s\u00e9jour lumineux au sud, de 2 chambres, d'une salle d'eau et d'un WC s\u00e9par\u00e9. Pr\u00e9sence d'une cave et d'un parking ext\u00e9rieur. Station de M\u00e9tro la plus proche Cl\u00e9menceau. Copropri\u00e9t\u00e9 saine et bien tenue, les charges de 1745.88e par an comprenant le chauffage (avec compteur individuel), l'eau chaude et froide, et l'entretien de l'immeuble. Copropri\u00e9t\u00e9 de 16 appartements. Actuellement lou\u00e9 650e charges comprises. A voir rapidement ! Dont 6.00 % honoraires TTC \u00e0 la charge de l'acqu\u00e9reur.",
"phone": null,
"photos": [{
"id": "1ir7ortudferww8to788kd38lmlnpx52ia5st7280.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/i/r/7/1ir7ortudferww8to788kd38lmlnpx52ia5st7280.jpg",
"data": null
}, {
"id": "08wbr1ivnz26gnyeofyjg02zi0d1vd1eijszcrgg0.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/8/w/b/08wbr1ivnz26gnyeofyjg02zi0d1vd1eijszcrgg0.jpg",
"data": null
}, {
"id": "0np6439w3557sclwu7b4sq7h7hntm9tizwrrtdr7k.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/n/p/6/0np6439w3557sclwu7b4sq7h7hntm9tizwrrtdr7k.jpg",
"data": null
}, {
"id": "0rc6ac2jlit0r27d1tmy2y8pqbdzps7gnzzmdds00.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/r/c/6/0rc6ac2jlit0r27d1tmy2y8pqbdzps7gnzzmdds00.jpg",
"data": null
}, {
"id": "19ebzllpk308rw1ei43a0t59fnjxohnidtvc5thq8.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/9/e/b/19ebzllpk308rw1ei43a0t59fnjxohnidtvc5thq8.jpg",
"data": null
}, {
"id": "07ize6lu9ssyv1ltjiux8gs56rgbyweai9wboor9c.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/7/i/z/07ize6lu9ssyv1ltjiux8gs56rgbyweai9wboor9c.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"Cuisine": "s\u00e9par\u00e9e",
"Pi\u00e8ces": "3",
"Salle de S\u00e9jour": "",
"Reference": "OG9242",
"Chambres": "2",
"Salle d'eau": "1",
"Entr\u00e9e": "",
"Balcon": "1",
"Surface": "58 m\u00b2",
"Ann\u00e9e de construction": "1963",
"Calme": "",
"Etages": "4",
"Parking": "1",
"Rangements": "",
"Toilette": "1",
"Orientation": "Est, Sud"
}
}

View File

@ -0,0 +1,62 @@
{
"id": "123314207@seloger",
"url": "http://www.seloger.com/annonces/achat/appartement/rennes-35/brequigny/123314207.htm?p=",
"title": "Appartement 3 pi\u00e8ces 58m\u00b2 - Rennes",
"area": 58,
"cost": 131440,
"price_per_meter": 2266.206896551724137931034483,
"currency": "\u20ac",
"utilities": "",
"date": "2018-01-20T22:35:00",
"location": " Rennes (35200)",
"station": "Cl\u00e9menceau",
"text": "OGIMM vous propose \u00e0 l'achat un appartement de type 3 au 1er \u00e9tage d'une petite r\u00e9sidence de 4 \u00e9tages. Au calme, propre, il est proche de la rue de Nantes, des Bus C5 et C3. La station de M\u00e9tro la plus proche est Cl\u00e9menceau. Vous aurez: une entr\u00e9e avec placards, une cuisine am\u00e9nag\u00e9e et \u00e9quip\u00e9e, un balcon loggia, une salle d'eau, un WC s\u00e9par\u00e9, 2 chambres, une cave et un parking. Les charges de copropri\u00e9t\u00e9 de 1526.58e par an comprennent le chauffage et l'eau chaude et froide avec comptage individuel. Locataire en place avec un loyer de 650e par mois. Copropri\u00e9t\u00e9 de 12 appartements. A voir vite ! Dont 6.00 % honoraires TTC \u00e0 la charge de l'acqu\u00e9reur.",
"phone": null,
"photos": [{
"id": "18a4t9w050xd7welkm25tg5ytv0wjbflrkyun1p1c.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/8/a/4/18a4t9w050xd7welkm25tg5ytv0wjbflrkyun1p1c.jpg",
"data": null
}, {
"id": "21q7r77zylh8k4mdlumg3cfcgmd4y3ixr9ggipk3k.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/2/1/q/7/21q7r77zylh8k4mdlumg3cfcgmd4y3ixr9ggipk3k.jpg",
"data": null
}, {
"id": "0eysaqsq7ti47y42lakhzwr2s9jdkvwsvvoqfq8e8.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/e/y/s/0eysaqsq7ti47y42lakhzwr2s9jdkvwsvvoqfq8e8.jpg",
"data": null
}, {
"id": "02tt2n650l5m908yiqkre3vu0cl9cxwqtg26xtwqo.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/2/t/t/02tt2n650l5m908yiqkre3vu0cl9cxwqtg26xtwqo.jpg",
"data": null
}, {
"id": "03wsh6bojie9eunp1ef9tynop2zkanx1qgm6lq41s.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/3/w/s/03wsh6bojie9eunp1ef9tynop2zkanx1qgm6lq41s.jpg",
"data": null
}, {
"id": "170whetachmm8357xz30ll7e3flrrqedc3ld2u0hs.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/7/0/w/170whetachmm8357xz30ll7e3flrrqedc3ld2u0hs.jpg",
"data": null
}, {
"id": "1unpbelnbrnsxxoxy0zd0me8nf4jgd124yomnbvnk.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/u/n/p/1unpbelnbrnsxxoxy0zd0me8nf4jgd124yomnbvnk.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"Pi\u00e8ces": "3",
"Etage": "1",
"Reference": "OG9243",
"Chambres": "2",
"Salle d'eau": "1",
"Chauffage": "radiateur",
"Entr\u00e9e": "",
"Surface": "58 m\u00b2",
"Ann\u00e9e de construction": "1963",
"Calme": "",
"Etages": "4",
"Rangements": "",
"Toilette": "1",
"Orientation": "Est, Sud"
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -0,0 +1,67 @@
{
"id": "124910113@seloger",
"url": "http://www.seloger.com/annonces/achat/appartement/rennes-35/maurepas-patton/124910113.htm?p=",
"title": "Appartement 3 pi\u00e8ces 65m\u00b2 - Rennes",
"area": 65,
"cost": 145275,
"price_per_meter": 2235,
"currency": "\u20ac",
"utilities": "",
"date": "2018-01-20T02:09:00",
"location": "225 RUE DE FOUGERES Rennes (35700)",
"station": "",
"text": "Rennes en exclusivit\u00e9 rue de Foug\u00e8res - Grand Appartement 3 pi\u00e8ces avec Balcon dans une copropri\u00e9t\u00e9 avec ascenseur - Travaux \u00e0 pr\u00e9voir - 2 chambres - Cave et garage.",
"phone": null,
"photos": [{
"id": "17b055i7hici1wxr951unlycfs5rhai73sbnnv2ki.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/1/7/b/0/17b055i7hici1wxr951unlycfs5rhai73sbnnv2ki.jpg",
"data": null
}, {
"id": "1s5t0lal78twswu22mahad9vtc75y3s5utuit2yte.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/1/s/5/t/1s5t0lal78twswu22mahad9vtc75y3s5utuit2yte.jpg",
"data": null
}, {
"id": "282rrcholht5full009yb8a5k1xe2jx0yiwtqyite.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/2/8/2/r/282rrcholht5full009yb8a5k1xe2jx0yiwtqyite.jpg",
"data": null
}, {
"id": "0wskjpe0511ak2ynzxual2qa0fp3bmz3ccaoqc5oi.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/0/w/s/k/0wskjpe0511ak2ynzxual2qa0fp3bmz3ccaoqc5oi.jpg",
"data": null
}, {
"id": "0kfne4iignt712pcunkcu2u9e497vt6oi11l30hxe.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/0/k/f/n/0kfne4iignt712pcunkcu2u9e497vt6oi11l30hxe.jpg",
"data": null
}, {
"id": "1jvyyiua1l843w1ohymxcbs9gj9zxvtfiajjfvwle.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/1/j/v/y/1jvyyiua1l843w1ohymxcbs9gj9zxvtfiajjfvwle.jpg",
"data": null
}, {
"id": "1ihj8ufsfdxgfecq03c154hcsj5jo5ysts29wjnia.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/1/i/h/j/1ihj8ufsfdxgfecq03c154hcsj5jo5ysts29wjnia.jpg",
"data": null
}, {
"id": "1g9yb1xe0bc8se0w8jys8ouiscpwer6y6lccd1ltu.jpg",
"url": "https://v.seloger.com/s/cdn/x/visuels/1/g/9/y/1g9yb1xe0bc8se0w8jys8ouiscpwer6y6lccd1ltu.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"Box": "1",
"Pi\u00e8ces": "3",
"Etage": "1",
"Reference": "MT0135140",
"Chambres": "2",
"Salle d'eau": "1",
"Cave": "",
"Ascenseur": "",
"Surface": "65 m\u00b2",
"Balcon": "1",
"Travaux \u00c0 Pr\u00e9voir": "",
"Ann\u00e9e de construction": "1968",
"Toilettes S\u00e9par\u00e9es": "",
"Etages": "6",
"Toilette": "1"
}
}

View File

@ -0,0 +1,53 @@
{
"id": "127963747@seloger",
"url": "http://www.seloger.com/annonces/achat/appartement/rennes-35/127963747.htm?p=",
"title": "Appartement 3 pi\u00e8ces 78m\u00b2 - Rennes",
"area": 78,
"cost": 211000,
"price_per_meter": 2705.128205128205128205128205,
"currency": "\u20ac",
"utilities": "",
"date": "2018-01-17T17:54:00",
"location": " Rennes (35000)",
"station": "",
"text": "ARSENAL/REDON - CIT\u00c9 JUDICIAIRE. D'une surface de 78 m\u00b2, cet appartement de type T3 est compos\u00e9 au rez-de-chauss\u00e9e comme suit: cuisine am\u00e9nag\u00e9e, deux chambres, salon/salle \u00e0 manger, salle de bain, toilettes.. La belle et lumineuse pi\u00e8ce de vie de 33 m\u00b2 vous permettra d'envisager une disposition agr\u00e9able de votre int\u00e9rieur.. Id\u00e9alement situ\u00e9 dans un secteur recherch\u00e9. Tr\u00e8s bon \u00e9tat.. Un garage situ\u00e9 en sous-sol compl\u00e8te cet appartement.. Contacter Agence ORPI au 02.23.44.37. 47.. 211000 euros Honoraires \u00e0 la charge du vendeur.",
"phone": null,
"photos": [{
"id": "1d9ks91ml67r2zwwcytkg3l4jh4yc8ii3y4fa64u8.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/d/9/k/1d9ks91ml67r2zwwcytkg3l4jh4yc8ii3y4fa64u8.jpg",
"data": null
}, {
"id": "0a95gv0bukbrk77mhe0h4n14j9bx2zrkfikgh7h8g.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/0/a/9/5/0a95gv0bukbrk77mhe0h4n14j9bx2zrkfikgh7h8g.jpg",
"data": null
}, {
"id": "1hd329lc8srsdh71o3iyo2tuv8jw9jutnctvqnv9c.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/h/d/3/1hd329lc8srsdh71o3iyo2tuv8jw9jutnctvqnv9c.jpg",
"data": null
}, {
"id": "1lf8fyr5marcjalerkc914opcc29osb23z9c9648w.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/l/f/8/1lf8fyr5marcjalerkc914opcc29osb23z9c9648w.jpg",
"data": null
}, {
"id": "1yrk6jbek3h7q3f9a3g1vy0kqc2uh7z4yckznrx8g.jpg",
"url": "https://v.seloger.com/s/width/800/visuels/1/y/r/k/1yrk6jbek3h7q3f9a3g1vy0kqc2uh7z4yckznrx8g.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"Box": "1",
"Pi\u00e8ces": "3",
"Etage": "RDC",
"Reference": "114020E0PULC",
"Chambres": "2",
"Chauffage": "individuel",
"Toilette": "1",
"Salle de bain": "1",
"Ascenseur": "",
"Toilettes S\u00e9par\u00e9es": "",
"Surface": "78 m\u00b2",
"Salle \u00c0 Manger": "",
"Salle de s\u00e9jour": "33 m\u00b2"
}
}

View File

@ -0,0 +1,68 @@
{
"id": "128358415@seloger",
"url": "http://www.seloger.com/annonces/achat/maison/rennes-35/128358415.htm?p=",
"title": " 60m\u00b2 - Rennes",
"area": 60,
"cost": 179888,
"price_per_meter": 2998.133333333333333333333333,
"currency": "\u20ac",
"utilities": "",
"date": "2018-01-19T08:46:00",
"location": " Rennes (35000)",
"station": "",
"text": "I@D France - Sarah LECLERC vous propose: Pour les Amoureux de la Pierre, Maison de ville enti\u00e8rement r\u00e9nov\u00e9e avec go\u00fbt et modernit\u00e9, Poutres apparentes dans les 2 chambres, Cuisine am\u00e9nag\u00e9e ouverte sur le salon-salle \u00e0 manger de 30 M 2, Salle de douche, JARDINET et TERRASSE de 95 M 2 (possibilit\u00e9 jardin japonais).. Situ\u00e9e AU COEUR DE LA VILLE, \u00e0 proximit\u00e9 des \u00c9coles, des Commerces et du march\u00e9, tout peut se faire \u00e0 pied.. Ligne de bus \u00e0 proximit\u00e9 (ligne 61).. AUX PORTES DE RENNES (5mn).. Peut se vivre comme un appartement sans les charges de copropri\u00e9t\u00e9 ! BEAUCOUP DE CHARME POUR CE BIEN RARE SUR LE MARCHE !! Honoraires d'agence \u00e0 la charge du vendeur. Information d'affichage \u00e9nerg\u00e9tique sur ce bien: DPE VI indice 0 et GES VI indice 0. La pr\u00e9sente annonce immobili\u00e8re a \u00e9t\u00e9 r\u00e9dig\u00e9e sous la responsabilit\u00e9 \u00e9ditoriale de Mme Sarah LECLERC (ID 27387), Agent Commercial mandataire en immobilier immatricul\u00e9 au Registre Sp\u00e9cial des Agents Commerciaux (RSAC) du Tribunal de Commerce de rennes sous le num\u00e9ro 521558007.",
"phone": null,
"photos": [{
"id": "0j9kfrqnixlcnezpzsgz3g3vnekr6qj8rn7jcv22g.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/0/j/9/k/0j9kfrqnixlcnezpzsgz3g3vnekr6qj8rn7jcv22g.jpg",
"data": null
}, {
"id": "0yqp4d8arum1iy1pk9f1xh1req853dnhutgdjkcoo.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/0/y/q/p/0yqp4d8arum1iy1pk9f1xh1req853dnhutgdjkcoo.jpg",
"data": null
}, {
"id": "10a86qpr9k9wurb8itfnfgzo8eetxs6th2gmiv1o8.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/1/0/a/8/10a86qpr9k9wurb8itfnfgzo8eetxs6th2gmiv1o8.jpg",
"data": null
}, {
"id": "0eybdtrwgscy2dadq05naujq5okeotl5cyfuergvs.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/0/e/y/b/0eybdtrwgscy2dadq05naujq5okeotl5cyfuergvs.jpg",
"data": null
}, {
"id": "0maihs9wfff2xl3plqtq254n44gkaxlvejyrtnbqw.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/0/m/a/i/0maihs9wfff2xl3plqtq254n44gkaxlvejyrtnbqw.jpg",
"data": null
}, {
"id": "0cjgak7htwwtsl4to31rqqmyg5a73h6vwzserq2wo.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/0/c/j/g/0cjgak7htwwtsl4to31rqqmyg5a73h6vwzserq2wo.jpg",
"data": null
}, {
"id": "102tkunk4f87ksovtm7x6u1awoz65it97nabbx9a0.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/1/0/2/t/102tkunk4f87ksovtm7x6u1awoz65it97nabbx9a0.jpg",
"data": null
}, {
"id": "1kd6jjp93vv5wv5dw8964n7t823luy8jk3m4obkfs.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/1/k/d/6/1kd6jjp93vv5wv5dw8964n7t823luy8jk3m4obkfs.jpg",
"data": null
}, {
"id": "052a19zndeojbs4px73q8ns94g1uxi0exxqyltpo8.jpg",
"url": "https://v.seloger.com/s/height/800/visuels/0/5/2/a/052a19zndeojbs4px73q8ns94g1uxi0exxqyltpo8.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"Cuisine": "am\u00e9ricaine \u00e9quip\u00e9e",
"Pi\u00e8ces": "3",
"Etage": "1",
"Reference": "488187",
"Chambres": "2",
"Chauffage": "\u00e9lectrique radiateur",
"Terrain": "95 m\u00b2",
"Surface": "60 m\u00b2",
"Terrasse": "1",
"Ann\u00e9e de construction": "1870",
"Salle \u00c0 Manger": "",
"Salle de s\u00e9jour": "22 m\u00b2"
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -0,0 +1,44 @@
{
"id": "13783671@explorimmo",
"url": "http://www.explorimmo.com/annonce-13783671.html",
"title": "Vente appartement 3 pi\u00e8ces 65 m2",
"area": 65,
"cost": 145275,
"price_per_meter": 2235,
"currency": "EUR",
"utilities": "H.C.",
"date": "2017-11-10T02:04:00",
"location": "225 RUE DE FOUGERES Rennes 35700",
"station": null,
"text": "Rennes en exclusivit\u00e9 rue de Foug\u00e8res - Grand Appartement 3 pi\u00e8ces avec Balcon\ndans une copropri\u00e9t\u00e9 avec ascenseur - Travaux \u00e0 pr\u00e9voir - 2 chambres - Cave et\ngarage\n\n",
"phone": null,
"photos": [{
"id": "cb10f556708c4e858c1a45ec1dfda623.jpg",
"url": "http://thbr.figarocms.net/images/AXuL6XMCphsRrTYttb7yR2W3CCg=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/cb10f556708c4e858c1a45ec1dfda623.jpg",
"data": null
}, {
"id": "e2696eacce2d487e99e88c2b945cee34.jpg",
"url": "http://thbr.figarocms.net/images/0Va3M6bf1eFkJJzPXC--QIc6WTo=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/e2696eacce2d487e99e88c2b945cee34.jpg",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"available": true,
"heatingType": "",
"agency": "NEXITY LAMY, 6 avenue Jean Janvier, 35000, Rennes",
"bathrooms": 0,
"exposure": "Non pr\u00e9cis\u00e9",
"floor": "1",
"energy": "E",
"bedrooms": 2,
"greenhouseGasEmission": null,
"isFurnished": false,
"rooms": 3,
"fees": 0.0,
"creationDate": 1507712100000,
"agencyFees": 0.0,
"availabilityDate": null,
"guarantee": 0.0
}
}

View File

@ -0,0 +1,72 @@
{
"id": "14818297@explorimmo",
"url": "http://www.explorimmo.com/annonce-14818297.html",
"title": "Vente maison 3 pi\u00e8ces 60 m2",
"area": 60,
"cost": 179888,
"price_per_meter": 2998.133333333333333333333333,
"currency": "EUR",
"utilities": "H.C.",
"date": "2018-01-13T04:37:00",
"location": " Rennes 35000",
"station": null,
"text": "I@D France - Sarah LECLERC (06 01 43 20 02) vous propose : Pour les Amoureux\nde la Pierre, Maison de ville enti\u00e8rement r\u00e9nov\u00e9e avec go\u00fbt et modernit\u00e9,\nPoutres apparentes dans les 2 chambres, Cuisine am\u00e9nag\u00e9e ouverte sur le salon-\nsalle \u00e0 manger de 30 M 2 , Salle de douche, JARDINET et TERRASSE de 95 M 2\n(possibilit\u00e9 jardin japonais)... situ\u00e9e AU COEUR DE LA VILLE, \u00e0 proximit\u00e9 des\nEcoles, des Commerces et du march\u00e9, tout peut se faire \u00e0 pied... Ligne de bus\n\u00e0 proximit\u00e9 (ligne 61) ...AUX PORTES DE RENNES (5mn)... Peut se vivre comme un\nappartement sans les charges de copropri\u00e9t\u00e9 !BEAUCOUP DE CHARME POUR CE BIEN\nRARE SUR LE MARCHE !!!Honoraires d'agence \u00e0 la charge du vendeur.Information\nd'affichage \u00e9nerg\u00e9tique sur ce bien : DPE VI indice 0 et GES VI indice 0. La\npr\u00e9sente annonce immobili\u00e8re a \u00e9t\u00e9 r\u00e9dig\u00e9e sous la responsabilit\u00e9 \u00e9ditoriale\nde Mme Sarah LECLERC (ID 27387), Agent Commercial mandataire en immobilier\nimmatricul\u00e9 au Registre Sp\u00e9cial des Agents Commerciaux (RSAC) du Tribunal de\nCommerce de rennes sous le num\u00e9ro 521558007. Retrouvez tous nos biens sur\nnotre site internet. www.iadfrance.com\n\n",
"phone": null,
"photos": [{
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-1.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/ydkyhrlKomMs9N1Jjums21g1Yac=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-1.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-2.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/tSmULhY2QwgR-ssclatZ1p0fxIY=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-2.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-3.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/2KG56A1y_EvvCCpzb-ButCIB9Gc=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-3.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-4.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/aZC1B1yyb70R_YUw3yuMDep9Jjs=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-4.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-5.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/eTTgRXM9s61HPshBL8vaCKzCoHE=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-5.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-6.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/0PLHLenqeoN12WySQzcHfp4J81g=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-6.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-7.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/isxp6GKSDn-ZTCstKe8All5i-uk=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-7.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-8.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/M6-Tv19WAG4EnwvTzHIzylqV66I=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-8.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}, {
"id": "http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-9.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"url": "http://thbr.figarocms.net/external/vYv6ie0s_lXwighWdgrNJVHDROI=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/http%3A%2F%2Fpasserelle.static.iadfrance.com%2Fphotos%2Frealestate%2F2018-01%2Fproduct-477682-9.jpg%3Fbridge%3Dexplorimmo%26ts%3D201801200001",
"data": null
}],
"rooms": 3,
"bedrooms": 2,
"details": {
"available": true,
"heatingType": "chauffage_electrique",
"agency": "I@D FRANCE, Immeuble carr\u00e9 Haussmann IIIall\u00e9e de la ferme de Varatre, 77127, Lieusaint",
"bathrooms": 0,
"exposure": "Non pr\u00e9cis\u00e9",
"floor": "",
"energy": "",
"bedrooms": 2,
"greenhouseGasEmission": null,
"isFurnished": false,
"rooms": 3,
"fees": 0.0,
"creationDate": 1515718604000,
"agencyFees": 0.0,
"availabilityDate": null,
"guarantee": 0.0
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

View File

@ -114,6 +114,15 @@ class TestTexts(unittest.TestCase):
tools.normalize_string("avec ascenseur")
)
def test_whitespace_trim(self):
"""
Checks that trailing and beginning whitespaces are trimmed.
"""
self.assertEqual(
"rennes 35000",
tools.normalize_string(" Rennes 35000 ")
)
def test_accents(self):
"""
Checks accents are replaced.
@ -171,8 +180,8 @@ class TestPhotos(unittest.TestCase):
self.assertTrue(duplicates.compare_photos(
photo,
photo,
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
def test_different_photos(self):
@ -182,15 +191,15 @@ class TestPhotos(unittest.TestCase):
self.assertFalse(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertFalse(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
def test_matching_photos(self):
@ -200,29 +209,49 @@ class TestPhotos(unittest.TestCase):
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129@explorimmo.jpg"},
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-2@explorimmo.jpg"},
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-3@explorimmo.jpg"},
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-watermark@seloger.jpg"},
TestPhotos.IMAGE_CACHE,
TestPhotos.HASH_THRESHOLD
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
def test_matching_cropped_photos(self):
"""
Compares two matching photos with one being cropped.
"""
# Fixme: the image hash treshold should be 10 ideally
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "vertical.jpg"},
{"url": TESTS_DATA_DIR + "vertical-cropped.jpg"},
self.IMAGE_CACHE,
20
))
# Fixme: the image hash treshold should be 10 ideally
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "13783671@explorimmo.jpg"},
{"url": TESTS_DATA_DIR + "124910113@seloger.jpg"},
self.IMAGE_CACHE,
20
))
@ -276,11 +305,9 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score(
flat1, flat2,
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_prices(self):
"""
@ -292,11 +319,9 @@ class TestDuplicates(unittest.TestCase):
score = duplicates.get_duplicate_score(
flat1, flat2,
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_rooms(self):
"""
@ -309,11 +334,9 @@ class TestDuplicates(unittest.TestCase):
score = duplicates.get_duplicate_score(
flat1, flat2,
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_areas(self):
"""
@ -325,11 +348,9 @@ class TestDuplicates(unittest.TestCase):
score = duplicates.get_duplicate_score(
flat1, flat2,
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_areas_decimals(self):
"""
@ -343,11 +364,9 @@ class TestDuplicates(unittest.TestCase):
score = duplicates.get_duplicate_score(
flat1, flat2,
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_phones(self):
"""
@ -360,11 +379,9 @@ class TestDuplicates(unittest.TestCase):
score = duplicates.get_duplicate_score(
flat1, flat2,
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_real_duplicates(self):
"""
@ -378,12 +395,58 @@ class TestDuplicates(unittest.TestCase):
score = duplicates.get_duplicate_score(
flats[0], flats[1],
TestDuplicates.IMAGE_CACHE, TestDuplicates.HASH_THRESHOLD
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
flats = self.load_files(
"128358415@seloger",
"14818297@explorimmo"
)
score = duplicates.get_duplicate_score(
flats[0], flats[1],
self.IMAGE_CACHE, 20
)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
# Different number of photos, and some are cropped
flats = self.load_files(
"124910113@seloger",
"13783671@explorimmo"
)
score = duplicates.get_duplicate_score(
flats[0], flats[1],
self.IMAGE_CACHE, 20
)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
# Same flat, different agencies, texts and photos
flats = self.load_files(
"122509451@seloger",
"127963747@seloger"
)
score = duplicates.get_duplicate_score(
flats[0], flats[1],
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
# Fix me : should be TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS
self.assertGreaterEqual(score, 4)
# Really similar flats, but different
flats = self.load_files(
"123312807@seloger",
"123314207@seloger"
)
score = duplicates.get_duplicate_score(
flats[0], flats[1],
self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
def run():
"""

View File

@ -225,6 +225,9 @@ def normalize_string(string, lowercase=True, convert_arabic_numerals=True):
# Collapse multiple spaces, replace tabulations and newlines by space
string = re.sub(r"\s+", " ", string)
# Trim whitespaces
string = string.strip()
return string