diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -1904,6 +1904,11 @@ for i in xrange(sets): setseq.append(random.randint(0, sys.maxint)) + def doinserts(): + d = util.lrucachedict(size) + for v in setseq: + d.insert(v, v) + def dosets(): d = util.lrucachedict(size) for v in setseq: @@ -1935,6 +1940,7 @@ benches = [ (doinit, b'init'), (dogets, b'gets'), + (doinserts, b'inserts'), (dosets, b'sets'), (domixed, b'mixed') ] diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -1209,7 +1209,7 @@ Holds a reference to nodes on either side as well as a key-value pair for the dictionary entry. """ - __slots__ = (u'next', u'prev', u'key', u'value') + __slots__ = (u'next', u'prev', u'key', u'value', u'cost') def __init__(self): self.next = None @@ -1217,10 +1217,13 @@ self.key = _notset self.value = None + self.cost = 0 def markempty(self): """Mark the node as emptied.""" self.key = _notset + self.value = None + self.cost = 0 class lrucachedict(object): """Dict that caches most recent accesses and sets. @@ -1233,6 +1236,10 @@ we recycle head.prev and make it the new head. Cache accesses result in the node being moved to before the existing head and being marked as the new head node. + + Items in the cache can be inserted with an optional "cost" value. This is + simply an integer that is specified by the caller. The cache can be queried + for the total cost of all items presently in the cache. """ def __init__(self, max): self._cache = {} @@ -1242,6 +1249,7 @@ head.next = head self._size = 1 self.capacity = max + self.totalcost = 0 def __len__(self): return len(self._cache) @@ -1261,11 +1269,15 @@ self._movetohead(node) return node.value - def __setitem__(self, k, v): + def insert(self, k, v, cost=0): + """Insert a new item in the cache with optional cost value.""" node = self._cache.get(k) # Replace existing value and mark as newest. if node is not None: + self.totalcost -= node.cost node.value = v + node.cost = cost + self.totalcost += cost self._movetohead(node) return @@ -1277,17 +1289,24 @@ # At capacity. Kill the old entry. if node.key is not _notset: + self.totalcost -= node.cost del self._cache[node.key] node.key = k node.value = v + node.cost = cost + self.totalcost += cost self._cache[k] = node # And mark it as newest entry. No need to adjust order since it # is already self._head.prev. self._head = node + def __setitem__(self, k, v): + self.insert(k, v) + def __delitem__(self, k): node = self._cache.pop(k) + self.totalcost -= node.cost node.markempty() # Temporarily mark as newest item before re-adjusting head to make @@ -1306,6 +1325,7 @@ def clear(self): n = self._head while n.key is not _notset: + self.totalcost -= n.cost n.markempty() n = n.next @@ -1336,7 +1356,7 @@ # We could potentially skip the first N items when decreasing capacity. # But let's keep it simple unless it is a performance problem. for i in range(len(self._cache)): - result[n.key] = n.value + result.insert(n.key, n.value, cost=n.cost) n = n.prev return result @@ -1359,6 +1379,7 @@ # And remove it from the cache and mark it as empty. del self._cache[n.key] + self.totalcost -= n.cost n.markempty() return key, value diff --git a/tests/test-lrucachedict.py b/tests/test-lrucachedict.py --- a/tests/test-lrucachedict.py +++ b/tests/test-lrucachedict.py @@ -12,27 +12,33 @@ def testsimple(self): d = util.lrucachedict(4) self.assertEqual(d.capacity, 4) - d['a'] = 'va' + d.insert('a', 'va', cost=2) d['b'] = 'vb' d['c'] = 'vc' - d['d'] = 'vd' + d.insert('d', 'vd', cost=42) self.assertEqual(d['a'], 'va') self.assertEqual(d['b'], 'vb') self.assertEqual(d['c'], 'vc') self.assertEqual(d['d'], 'vd') + self.assertEqual(d.totalcost, 44) + # 'a' should be dropped because it was least recently used. d['e'] = 've' self.assertNotIn('a', d) - self.assertIsNone(d.get('a')) + self.assertEqual(d.totalcost, 42) self.assertEqual(d['b'], 'vb') self.assertEqual(d['c'], 'vc') self.assertEqual(d['d'], 'vd') self.assertEqual(d['e'], 've') + # Replacing item with different cost adjusts totalcost. + d.insert('e', 've', cost=4) + self.assertEqual(d.totalcost, 46) + # Touch entries in some order (both get and set). d['e'] d['c'] = 'vc2' @@ -63,12 +69,13 @@ def testcopypartial(self): d = util.lrucachedict(4) - d['a'] = 'va' - d['b'] = 'vb' + d.insert('a', 'va', cost=4) + d.insert('b', 'vb', cost=2) dc = d.copy() self.assertEqual(len(dc), 2) + self.assertEqual(dc.totalcost, 6) for key in ('a', 'b'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) @@ -80,8 +87,10 @@ d['c'] = 'vc' del d['b'] + self.assertEqual(d.totalcost, 4) dc = d.copy() self.assertEqual(len(dc), 2) + self.assertEqual(dc.totalcost, 4) for key in ('a', 'c'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) @@ -93,7 +102,7 @@ def testcopyfull(self): d = util.lrucachedict(4) - d['a'] = 'va' + d.insert('a', 'va', cost=42) d['b'] = 'vb' d['c'] = 'vc' d['d'] = 'vd' @@ -104,6 +113,9 @@ self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) + self.assertEqual(d.totalcost, 42) + self.assertEqual(dc.totalcost, 42) + # 'a' should be dropped because it was least recently used. dc['e'] = 've' self.assertNotIn('a', dc) @@ -111,6 +123,9 @@ self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) + self.assertEqual(d.totalcost, 42) + self.assertEqual(dc.totalcost, 0) + # Contents and order of original dict should remain unchanged. dc['b'] = 'vb_new' @@ -120,25 +135,28 @@ def testcopydecreasecapacity(self): d = util.lrucachedict(5) - d['a'] = 'va' - d['b'] = 'vb' + d.insert('a', 'va', cost=4) + d.insert('b', 'vb', cost=2) d['c'] = 'vc' d['d'] = 'vd' dc = d.copy(2) + self.assertEqual(dc.totalcost, 0) for key in ('a', 'b'): self.assertNotIn(key, dc) for key in ('c', 'd'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) - dc['e'] = 've' + dc.insert('e', 've', cost=7) + self.assertEqual(dc.totalcost, 7) self.assertNotIn('c', dc) for key in ('d', 'e'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) # Original should remain unchanged. + self.assertEqual(d.totalcost, 6) for key in ('a', 'b', 'c', 'd'): self.assertIn(key, d) self.assertEqual(d[key], 'v%s' % key) @@ -174,14 +192,16 @@ def testpopoldest(self): d = util.lrucachedict(4) - d['a'] = 'va' - d['b'] = 'vb' + d.insert('a', 'va', cost=10) + d.insert('b', 'vb', cost=5) self.assertEqual(len(d), 2) self.assertEqual(d.popoldest(), ('a', 'va')) self.assertEqual(len(d), 1) + self.assertEqual(d.totalcost, 5) self.assertEqual(d.popoldest(), ('b', 'vb')) self.assertEqual(len(d), 0) + self.assertEqual(d.totalcost, 0) self.assertIsNone(d.popoldest()) d['a'] = 'va'