]> git.michaelhowe.org Git - packages/b/bup.git/commitdiff
cmd/server: find .idx filenames more efficiently when needed.
authorAvery Pennarun <apenwarr@gmail.com>
Wed, 22 Dec 2010 18:49:20 +0000 (10:49 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Wed, 22 Dec 2010 18:52:10 +0000 (10:52 -0800)
Rather than mapping *all* the .idx files into memory at once just to look up
a single object, just open/read/close them sequentially.  This should
significantly increase the total repo size on a 32-bit system.  (Of course,
it's still not very ideal; we really should have some kind of fallback mode
for when our total set of indexes starts getting too big.)

Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
cmd/server-cmd.py
lib/bup/git.py

index 6a875ffc41a499f8dc108fc8d25148f1a37c2591..299e28dda9a1de76896cda171ffce4a1bdc13db1 100755 (executable)
@@ -77,7 +77,7 @@ def receive_objects(conn, junk):
         sha = git.calc_hash(type, content)
         oldpack = w.exists(sha)
         # FIXME: we only suggest a single index per cycle, because the client
-        # is currently dumb to download more than one per cycle anyway.
+        # is currently too dumb to download more than one per cycle anyway.
         # Actually we should fix the client, but this is a minor optimization
         # on the server side.
         if not suggested and \
@@ -88,13 +88,12 @@ def receive_objects(conn, junk):
             # fix that deficiency of midx files eventually, although it'll
             # make the files bigger.  This method is certainly not very
             # efficient.
-            w.objcache.refresh(skip_midx = True)
-            oldpack = w.objcache.exists(sha)
+            oldpack = w.objcache.packname_containing(sha)
             debug2('new suggestion: %r\n' % oldpack)
             assert(oldpack)
             assert(oldpack != True)
             assert(not oldpack.endswith('.midx'))
-            w.objcache.refresh(skip_midx = False)
+            w.objcache.refresh()
         if not suggested and oldpack:
             assert(oldpack.endswith('.idx'))
             (dir,name) = os.path.split(oldpack)
index fc03fe6d9c844004c790878488edf042f704ac02..1dbf61f69e0211435d97cfd928a2249e5b72e0bb 100644 (file)
@@ -429,6 +429,18 @@ class PackIdxList:
         debug1('PackIdxList: using %d index%s.\n'
             % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
 
+    def packname_containing(self, hash):
+        # figure out which pack contains a given hash.
+        # FIXME: if the midx file format would just *store* this information,
+        # we could calculate it a lot more efficiently.  But it's not needed
+        # often, so let's do it like this.
+        for f in os.listdir(self.dir):
+            if f.endswith('.idx'):
+                full = os.path.join(self.dir, f)
+                ix = open_idx(full)
+                if ix.exists(hash):
+                    return full
+
     def add(self, hash):
         """Insert an additional object in the list."""
         self.also[hash] = 1