From: Yung-Chin Oei Date: Wed, 26 Sep 2012 19:35:45 +0000 (+0100) Subject: git.py: avoid repeated string-copying in tree_decode() X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=8c4650dfba5424f483ed0b4c33988644023dbd88;p=packages%2Fb%2Fbup.git git.py: avoid repeated string-copying in tree_decode() git.tree_decode showed bad perfomance when dealing with large trees, because it required string-copying quadratically in the number of tree elements. By removing unnecessary copying, performance is improved at all tree sizes, and significantly so for larger trees. The problem became particularly apparent in combination with another bug in bup (patch for which forthcoming), that allowed trees to grow without bound when backing up sparse files. Reported-by: trebor Signed-off-by: Yung-Chin Oei Reviewed-by: Rob Browning --- diff --git a/lib/bup/git.py b/lib/bup/git.py index 8afe0b8..3406edf 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -155,13 +155,13 @@ def tree_decode(buf): """Generate a list of (mode,name,hash) from the git tree object in buf.""" ofs = 0 while ofs < len(buf): - z = buf[ofs:].find('\0') - assert(z > 0) - spl = buf[ofs:ofs+z].split(' ', 1) + z = buf.find('\0', ofs) + assert(z > ofs) + spl = buf[ofs:z].split(' ', 1) assert(len(spl) == 2) mode,name = spl - sha = buf[ofs+z+1:ofs+z+1+20] - ofs += z+1+20 + sha = buf[z+1:z+1+20] + ofs = z+1+20 yield (int(mode, 8), name, sha)