Ignore duplicates by IDs, no need to even add the duplicate in db
This commit is contained in:
parent
35c88050df
commit
47ce1ba7e0
@ -162,7 +162,7 @@ def first_pass(flats_list, constraint, config):
|
|||||||
|
|
||||||
# Handle duplicates based on ids
|
# Handle duplicates based on ids
|
||||||
# Just remove them (no merge) as they should be the exact same object.
|
# Just remove them (no merge) as they should be the exact same object.
|
||||||
flats_list, duplicates_by_id = duplicates.detect(
|
flats_list, _ = duplicates.detect(
|
||||||
flats_list, key="id", merge=False, should_intersect=False
|
flats_list, key="id", merge=False, should_intersect=False
|
||||||
)
|
)
|
||||||
# Also merge duplicates based on urls (these may come from different
|
# Also merge duplicates based on urls (these may come from different
|
||||||
@ -184,7 +184,7 @@ def first_pass(flats_list, constraint, config):
|
|||||||
return {
|
return {
|
||||||
"new": flats_list,
|
"new": flats_list,
|
||||||
"ignored": ignored_list,
|
"ignored": ignored_list,
|
||||||
"duplicate": duplicates_by_id + duplicates_by_urls
|
"duplicate": duplicates_by_urls
|
||||||
}
|
}
|
||||||
|
|
||||||
@tools.timeit
|
@tools.timeit
|
||||||
|
Loading…
Reference in New Issue
Block a user