標準ライブラリにこれが組み込まれているとは思わないので、上位 k 値を追跡するクラスを実装できます。これは、メインのディクショナリ オブジェクト (おそらく)と並行して最新の状態に保たれますCounter
。これをメイン辞書オブジェクトのサブクラスの属性として使用することもできます。
実装
class MostCommon(object):
"""Keep track the top-k key-value pairs.
Attributes:
top: Integer representing the top-k items to keep track of.
store: Dictionary of the top-k items.
min: The current minimum of any top-k item.
min_set: Set where keys are counts, and values are the set of
keys with that count.
"""
def __init__(self, top):
"""Create a new MostCommon object to track key-value paris.
Args:
top: Integer representing the top-k values to keep track of.
"""
self.top = top
self.store = dict()
self.min = None
self.min_set = defaultdict(set)
def _update_existing(self, key, value):
"""Update an item that is already one of the top-k values."""
# Currently handle values that are non-decreasing.
assert value > self.store[key]
self.min_set[self.store[key]].remove(key)
if self.store[key] == self.min: # Previously was the minimum.
if not self.min_set[self.store[key]]: # No more minimums.
del self.min_set[self.store[key]]
self.min_set[value].add(key)
self.min = min(self.min_set.keys())
self.min_set[value].add(key)
self.store[key] = value
def __contains__(self, key):
"""Boolean if the key is one of the top-k items."""
return key in self.store
def __setitem__(self, key, value):
"""Assign a value to a key.
The item won't be stored if it is less than the minimum (and
the store is already full). If the item is already in the store,
the value will be updated along with the `min` if necessary.
"""
# Store it if we aren't full yet.
if len(self.store) < self.top:
if key in self.store: # We already have this item.
self._update_existing(key, value)
else: # Brand new item.
self.store[key] = value
self.min_set[value].add(key)
if value < self.min or self.min is None:
self.min = value
else: # We're full. The value must be greater minimum to be added.
if value > self.min: # New item must be larger than current min.
if key in self.store: # We already have this item.
self._update_existing(key, value)
else: # Brand new item.
# Make room by removing one of the current minimums.
old = self.min_set[self.min].pop()
del self.store[old]
# Delete the set if there are no old minimums left.
if not self.min_set[self.min]:
del self.min_set[self.min]
# Add the new item.
self.min_set[value].add(key)
self.store[key] = value
self.min = min(self.min_set.keys())
def __repr__(self):
if len(self.store) < 10:
store = repr(self.store)
else:
length = len(self.store)
largest = max(self.store.itervalues())
store = '<len={length}, max={largest}>'.format(length=length,
largest=largest)
return ('{self.__class__.__name__}(top={self.top}, min={self.min}, '
'store={store})'.format(self=self, store=store))
使用例
>>> common = MostCommon(2)
>>> common
MostCommon(top=2, min=None, store={})
>>> common['a'] = 1
>>> common
MostCommon(top=2, min=1, store={'a': 1})
>>> 'a' in common
True
>>> common['b'] = 2
>>> common
MostCommon(top=2, min=1, store={'a': 1, 'b': 2})
>>> common['c'] = 3
>>> common
MostCommon(top=2, min=2, store={'c': 3, 'b': 2})
>>> 'a' in common
False
>>> common['b'] = 4
>>> common
MostCommon(top=2, min=3, store={'c': 3, 'b': 4})
値を更新した後のアクセスは確かにO(1)です
>>> counter = Counter()
>>> for x in permutations(xrange(10), 10):
counter[x] += 1
>>> common = MostCommon(1)
>>> for key, value in counter.iteritems():
common[key] = value
>>> common
MostCommon(top=1, min=1, store={(9, 7, 8, 0, 2, 6, 5, 4, 3, 1): 1})
>>> timeit('repr(common)', 'from __main__ import common', number=1)
1.3251570635475218e-05
アクセスは O(1) ですが、O(n) 操作である set-item 呼び出し中に最小値が変更された場合、n
は上位値の数です。Counter
これは、アクセスごとに O(n) であるよりも優れています。ここn
で、 は語彙全体のサイズです!