From: Avery Pennarun Date: Thu, 17 Feb 2011 02:34:36 +0000 (-0800) Subject: Merge branch 'bl/bloomcheck' into ap/cleanups X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=49b04a52d00c99b26dac76b88e400ef546ed4389;p=packages%2Fb%2Fbup.git Merge branch 'bl/bloomcheck' into ap/cleanups * bl/bloomcheck: Bail out immediately instead of redownloading .idx Add a --check behavior to verify bloom Defines/preprocessor lengths > magic numbers Conflicts: cmd/bloom-cmd.py --- 49b04a52d00c99b26dac76b88e400ef546ed4389 diff --cc Documentation/bup-bloom.md index b827b68,1501e2e..3d57c74 --- a/Documentation/bup-bloom.md +++ b/Documentation/bup-bloom.md @@@ -32,9 -28,17 +32,17 @@@ updates or regenerates it as needed -k, --hashes=*hashes* : number of hash functions to use only 4 and 5 are valid. - defaults to 5 for repositories < 2TiB and 4 otherwise. - see comments in git.py for more on this value. + defaults to 5 for repositories < 2 TiB, or 4 otherwise. + See comments in git.py for more on this value. + -c, --check=*idxfile* + : checks the bloom file (counterintuitively outfile) + against the specified .idx file, first checks that the + bloom filter is claiming to contain the .idx, then + checks that it does actually contain all of the objects + in the .idx. Does not write anything and ignores the + `-k` option. + # BUP Part of the `bup`(1) suite. diff --cc cmd/bloom-cmd.py index 5e3cd86,81ee392..033ad85 --- a/cmd/bloom-cmd.py +++ b/cmd/bloom-cmd.py @@@ -6,22 -6,37 +6,43 @@@ from bup.helpers import optspec = """ bup bloom [options...] -- -o,output= output bloom filename (default: auto-generated) -d,dir= input directory to look for idx files (default: auto-generated) -k,hashes= number of hash functions to use (4 or 5) (default: auto-generated) -c,check= an idx file to check against an existing bloom filter +f,force ignore existing bloom file and regenerate it from scratch +o,output= output bloom filename (default: auto) +d,dir= input directory to look for idx files (default: auto) +k,hashes= number of hash functions to use (4 or 5) (default: auto) ++c,check= check the given .idx file against the bloom filter """ + def check_bloom(path, bloomfilename, idx): ++ rbloomfilename = git.repo_rel(bloomfilename) ++ ridx = git.repo_rel(idx) + if not os.path.exists(bloomfilename): - log("bloom: %s not found to check\n" % bloomfilename) ++ log("bloom: %s: does not exist.\n" % rbloomfilename) + return - b = git.ShaBloom(bloomfilename) ++ b = bloom.ShaBloom(bloomfilename) + if not b.valid(): - log("bloom: %s could not be opened to check\n" % bloomfilename) ++ add_error("bloom: %r is invalid.\n" % rbloomfilename) + return + base = os.path.basename(idx) + if base not in b.idxnames: - log("bloom: filter does not contain %s, nothing to check\n" % idx) ++ log("bloom: %s does not contain the idx.\n" % rbloomfilename) + return + if base == idx: + idx = os.path.join(path, idx) - log("bloom: checking %s" % idx) ++ log("bloom: bloom file: %s\n" % rbloomfilename) ++ log("bloom: checking %s\n" % ridx) + for objsha in git.open_idx(idx): + if not b.exists(objsha): - add_error("bloom: ERROR: %s missing from bloom" ++ add_error("bloom: ERROR: object %s missing" + % str(objsha).encode('hex')) + + +_first = None def do_bloom(path, outfilename): + global _first - if not outfilename: - assert(path) - outfilename = os.path.join(path, 'bup.bloom') - b = None - if os.path.exists(outfilename): - b = git.ShaBloom(outfilename) + if os.path.exists(outfilename) and not opt.force: + b = bloom.ShaBloom(outfilename) if not b.valid(): debug1("bloom: Existing invalid bloom found, regenerating.\n") b = None @@@ -100,12 -109,22 +121,22 @@@ o = options.Options(optspec if extra: o.fatal('no positional parameters expected') - if opt.k and opt.k not in (4,5): - o.fatal('only k values of 4 and 5 are supported') - git.check_repo_or_die() -bloompath = opt.dir or git.repo('objects/pack') -if not opt.output: - assert(bloompath) -outfilename = opt.output or os.path.join(bloompath, 'bup.bloom') ++if not opt.check and opt.k and opt.k not in (4,5): ++ o.fatal('only k values of 4 and 5 are supported') + -if opt.check: - check_bloom(bloompath, outfilename, opt.check) -else: - if opt.k and opt.k not in (4,5): - o.fatal('only k values of 4 and 5 are supported') - - do_bloom(bloompath, outfilename) +paths = opt.dir and [opt.dir] or git.all_packdirs() +for path in paths: + debug1('bloom: scanning %s\n' % path) - do_bloom(path, opt.output) ++ outfilename = opt.output or os.path.join(path, 'bup.bloom') ++ if opt.check: ++ check_bloom(path, outfilename, opt.check) ++ else: ++ do_bloom(path, outfilename) + + if saved_errors: + log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) + sys.exit(1) - ++elif opt.check: ++ log('all tests passed.\n')