次の実装を試してください
私の実装では、反復回数を減らすために2つのペアの組み合わせのみを事前に並べ替えて選択していることに注意してください。これにより、キーのサイズが常に干し草と同じかそれ以下になります。
>>> my_list =[
{'oranges':'big','apples':'green'},
{'oranges':'big','apples':'green','bananas':'fresh'},
{'oranges':'big','apples':'red'},
{'oranges':'big','apples':'green','bananas':'rotten'}
]
#Create a function remove_dup, name it anything you want
def remove_dup(lst):
#import combinations for itertools, mainly to avoid multiple nested loops
from itertools import combinations
#Create a generator function dup_gen, name it anything you want
def dup_gen(lst):
#Now read the dict pairs, remember key is always shorter than hay in length
for key, hay in combinations(lst, 2):
#if key is in hay then set(key) - set(hay) = empty set
if not set(key) - set(hay):
#and if key is in hay, yield it
yield key
#sort the list of dict based on lengths after converting to a item tuple pairs
#Handle duplicate elements, thanks to DSM for pointing out this boundary case
#remove_dup([{1:2}, {1:2}]) == []
lst = sorted(set(tuple(e.items()) for e in lst), key = len)
#Now recreate the dictionary from the set difference of
#the original list and the elements generated by dup_gen
#Elements generated by dup_gen are the duplicates that needs to be removed
return [dict(e) for e in set(lst) - set(dup_gen(lst))]
remove_dup(my_list)
[{'apples': 'green', 'oranges': 'big', 'bananas': 'fresh'}, {'apples': 'green', 'oranges': 'big', 'bananas': 'rotten'}, {'apples': 'red', 'oranges': 'big'}]
remove_dup([{1:2}, {1:2}])
[{1: 2}]
remove_dup([{1:2}])
[{1: 2}]
remove_dup([])
[]
remove_dup([{1:2}, {1:3}])
[{1: 2}, {1: 3}]
より高速な実装
def remove_dup(lst):
#sort the list of dict based on lengths after converting to a item tuple pairs
#Handle duplicate elements, thanks to DSM for pointing out this boundary case
#remove_dup([{1:2}, {1:2}]) == []
lst = sorted(set(tuple(e.items()) for e in lst), key = len)
#Generate all the duplicates
dups = (key for key, hay in combinations(lst, 2) if not set(key).difference(hay))
#Now recreate the dictionary from the set difference of
#the original list and the duplicate elements
return [dict(e) for e in set(lst).difference(dups)]