]> git.michaelhowe.org Git - packages/b/bup.git/commitdiff
Use a heapq object to accelerate git.idxmerge().
authorAvery Pennarun <apenwarr@gmail.com>
Tue, 2 Feb 2010 02:10:08 +0000 (21:10 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Tue, 2 Feb 2010 02:14:14 +0000 (21:14 -0500)
This greatly accelerates bup margin and bup midx when you're iterating
through a large number of packs.

cmd-margin.py
git.py

index 010a840e78fdc9c5730cf58af4b4bceb6114f238..b7f5bb24671278cc360038ba7dcc07de2c13214f 100755 (executable)
@@ -23,6 +23,7 @@ longmatch = 0
 for i in mi:
     if i == last:
         continue
+    #assert(str(i) >= last)
     pm = _hashsplit.bitmatch(last, i)
     longmatch = max(longmatch, pm)
     last = i
diff --git a/git.py b/git.py
index d7d74662bc6cc2486a6e61f420c9f031f5f4c47a..3196fd5ed7cb0cb77c52075206f0cd7e2531bde3 100644 (file)
--- a/git.py
+++ b/git.py
@@ -1,4 +1,5 @@
 import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
+import heapq
 from helpers import *
 
 verbose = 0
@@ -273,26 +274,21 @@ def _shalist_sort_key(ent):
 def idxmerge(idxlist):
     total = sum([len(i) for i in idxlist])
     iters = [iter(i) for i in idxlist]
-    iters = [[next(it), it] for it in iters]
+    heap = [(next(it), it) for it in iters]
+    heapq.heapify(heap)
     count = 0
-    iters.sort()
-    while iters:
+    while heap:
         if (count % 10000) == 0:
             log('Merging: %.2f%% (%d/%d)\r'
                 % (count*100.0/total, count, total))
-        e = iters[0][0]
+        (e, it) = heap[0]
         yield e
         count += 1
-        e = iters[0][0] = next(iters[0][1])
-        if not e:
-            iters = iters[1:]
+        e = next(it)
+        if e:
+            heapq.heapreplace(heap, (e, it))
         else:
-            i = 1
-            while i < len(iters):
-                if iters[i][0] > e:
-                    break
-                i += 1
-            iters = iters[1:i] + [iters[0]] + iters[i:]
+            heapq.heappop(heap)
     log('Merging: %.2f%% (%d/%d), done.\n' % (100, total, total))