Ignore duplicates by IDs, no need to even add the duplicate in db

This commit is contained in:
Lucas Verney 2018-01-10 16:46:47 +01:00
parent 35c88050df
commit 47ce1ba7e0
1 changed files with 2 additions and 2 deletions

View File

@ -162,7 +162,7 @@ def first_pass(flats_list, constraint, config):
# Handle duplicates based on ids # Handle duplicates based on ids
# Just remove them (no merge) as they should be the exact same object. # Just remove them (no merge) as they should be the exact same object.
flats_list, duplicates_by_id = duplicates.detect( flats_list, _ = duplicates.detect(
flats_list, key="id", merge=False, should_intersect=False flats_list, key="id", merge=False, should_intersect=False
) )
# Also merge duplicates based on urls (these may come from different # Also merge duplicates based on urls (these may come from different
@ -184,7 +184,7 @@ def first_pass(flats_list, constraint, config):
return { return {
"new": flats_list, "new": flats_list,
"ignored": ignored_list, "ignored": ignored_list,
"duplicate": duplicates_by_id + duplicates_by_urls "duplicate": duplicates_by_urls
} }
@tools.timeit @tools.timeit