From: Avery Pennarun Date: Tue, 2 Feb 2010 02:10:08 +0000 (-0500) Subject: Use a heapq object to accelerate git.idxmerge(). X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=04e1c07492984b3e722c3c5d983cf23ecd1887c5;p=packages%2Fb%2Fbup.git Use a heapq object to accelerate git.idxmerge(). This greatly accelerates bup margin and bup midx when you're iterating through a large number of packs. --- diff --git a/cmd-margin.py b/cmd-margin.py index 010a840..b7f5bb2 100755 --- a/cmd-margin.py +++ b/cmd-margin.py @@ -23,6 +23,7 @@ longmatch = 0 for i in mi: if i == last: continue + #assert(str(i) >= last) pm = _hashsplit.bitmatch(last, i) longmatch = max(longmatch, pm) last = i diff --git a/git.py b/git.py index d7d7466..3196fd5 100644 --- a/git.py +++ b/git.py @@ -1,4 +1,5 @@ import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile +import heapq from helpers import * verbose = 0 @@ -273,26 +274,21 @@ def _shalist_sort_key(ent): def idxmerge(idxlist): total = sum([len(i) for i in idxlist]) iters = [iter(i) for i in idxlist] - iters = [[next(it), it] for it in iters] + heap = [(next(it), it) for it in iters] + heapq.heapify(heap) count = 0 - iters.sort() - while iters: + while heap: if (count % 10000) == 0: log('Merging: %.2f%% (%d/%d)\r' % (count*100.0/total, count, total)) - e = iters[0][0] + (e, it) = heap[0] yield e count += 1 - e = iters[0][0] = next(iters[0][1]) - if not e: - iters = iters[1:] + e = next(it) + if e: + heapq.heapreplace(heap, (e, it)) else: - i = 1 - while i < len(iters): - if iters[i][0] > e: - break - i += 1 - iters = iters[1:i] + [iters[0]] + iters[i:] + heapq.heappop(heap) log('Merging: %.2f%% (%d/%d), done.\n' % (100, total, total))