From b63d3e3da43d4a68d328518fcd3ae481334859b4 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 22 Sep 2010 07:13:18 -0700 Subject: [PATCH] cmd/split: print a progress counter. We don't know how many bytes we're going to split in total, but we can at least print the total number of bytes we've seen so far. Also fix cmd/random to *not* print progress messages by default, since my test situation is bup random 100M | bup split -b and they scribble over each other when they both print progress output. bup random now gets a '-v' option. Signed-off-by: Avery Pennarun --- Documentation/bup-random.md | 6 +++++- cmd/random-cmd.py | 4 +++- cmd/split-cmd.py | 21 ++++++++++++++++++-- lib/bup/_helpers.c | 6 +++--- lib/bup/hashsplit.py | 38 +++++++++++++++++++++++-------------- 5 files changed, 54 insertions(+), 21 deletions(-) diff --git a/Documentation/bup-random.md b/Documentation/bup-random.md index fe710f1..7a4c3e5 100644 --- a/Documentation/bup-random.md +++ b/Documentation/bup-random.md @@ -8,7 +8,7 @@ bup-random - generate a stream of random output # SYNOPSIS -bup random [-S seed] [-f] +bup random [-S seed] [-fv] # DESCRIPTION @@ -47,6 +47,10 @@ can be helpful when running microbenchmarks. : generate output even if stdout is a tty. (Generating random data to a tty is generally considered ill-advised, but you can do if you really want.) + +-v, --verbose +: print a progress message showing the number of bytes that + has been output so far. # EXAMPLES diff --git a/cmd/random-cmd.py b/cmd/random-cmd.py index 19732b9..873b511 100755 --- a/cmd/random-cmd.py +++ b/cmd/random-cmd.py @@ -8,6 +8,7 @@ bup random [-S seed] -- S,seed= optional random number seed [1] f,force print random data to stdout even if it's a tty +v,verbose print byte counter to stderr """ o = options.Options('bup random', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) @@ -21,7 +22,8 @@ handle_ctrl_c() if opt.force or (not os.isatty(1) and not atoi(os.environ.get('BUP_FORCE_TTY')) & 1): - _helpers.write_random(sys.stdout.fileno(), total, opt.seed) + _helpers.write_random(sys.stdout.fileno(), total, opt.seed, + opt.verbose and 1 or 0) else: log('error: not writing binary data to a terminal. Use -f to force.\n') sys.exit(1) diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index 2a72bd6..4155b24 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -58,6 +58,21 @@ else: date = time.time() +last_prog = total_bytes = 0 +def prog(filenum, nbytes): + global last_prog, total_bytes + total_bytes += nbytes + now = time.time() + if now - last_prog < 0.2: + return + if filenum > 0: + progress('Splitting: file #%d, %d kbytes\r' + % (filenum+1, total_bytes/1024)) + else: + progress('Splitting: %d kbytes\r' % (total_bytes/1024)) + last_prog = now + + is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") @@ -117,12 +132,14 @@ else: if pack_writer: shalist = hashsplit.split_to_shalist(pack_writer, files, - keep_boundaries=opt.keep_boundaries) + keep_boundaries=opt.keep_boundaries, + progress=prog) tree = pack_writer.new_tree(shalist) else: last = 0 for (blob, bits) in hashsplit.hashsplit_iter(files, - keep_boundaries=opt.keep_boundaries): + keep_boundaries=opt.keep_boundaries, + progress=prog): hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c index 75d2603..7a144d2 100644 --- a/lib/bup/_helpers.c +++ b/lib/bup/_helpers.c @@ -105,11 +105,11 @@ static PyObject *extract_bits(PyObject *self, PyObject *args) static PyObject *write_random(PyObject *self, PyObject *args) { uint32_t buf[1024/4]; - int fd = -1, seed = 0; + int fd = -1, seed = 0, verbose = 0; ssize_t ret; long long len = 0, kbytes = 0, written = 0; - if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed)) + if (!PyArg_ParseTuple(args, "iLii", &fd, &len, &seed, &verbose)) return NULL; srandom(seed); @@ -125,7 +125,7 @@ static PyObject *write_random(PyObject *self, PyObject *args) written += ret; if (ret < (int)sizeof(buf)) break; - if (kbytes/1024 > 0 && !(kbytes%1024)) + if (verbose && kbytes/1024 > 0 && !(kbytes%1024)) fprintf(stderr, "Random: %lld Mbytes\r", kbytes/1024); } diff --git a/lib/bup/hashsplit.py b/lib/bup/hashsplit.py index d73e9f6..5de6a3f 100644 --- a/lib/bup/hashsplit.py +++ b/lib/bup/hashsplit.py @@ -45,10 +45,13 @@ def splitbuf(buf): return (None, 0) -def blobiter(files): - for f in files: +def blobiter(files, progress=None): + for filenum,f in enumerate(files): ofs = 0 + b = '' while 1: + if progress: + progress(filenum, len(b)) fadvise_done(f, max(0, ofs - 1024*1024)) b = f.read(BLOB_HWM) ofs += len(b) @@ -72,10 +75,10 @@ def drainbuf(buf, finalize): yield (buf.get(buf.used()), 0) -def _hashsplit_iter(files): +def _hashsplit_iter(files, progress): assert(BLOB_HWM > BLOB_MAX) buf = Buf() - fi = blobiter(files) + fi = blobiter(files, progress) while 1: for i in drainbuf(buf, finalize=False): yield i @@ -89,23 +92,30 @@ def _hashsplit_iter(files): buf.put(bnew) -def _hashsplit_iter_keep_boundaries(files): - for f in files: - for i in _hashsplit_iter([f]): +def _hashsplit_iter_keep_boundaries(files, progress): + for real_filenum,f in enumerate(files): + if progress: + def prog(filenum, nbytes): + # the inner _hashsplit_iter doesn't know the real file count, + # so we'll replace it here. + return progress(real_filenum, nbytes) + else: + prog = None + for i in _hashsplit_iter([f], progress=prog): yield i -def hashsplit_iter(files, keep_boundaries): +def hashsplit_iter(files, keep_boundaries, progress): if keep_boundaries: - return _hashsplit_iter_keep_boundaries(files) + return _hashsplit_iter_keep_boundaries(files, progress) else: - return _hashsplit_iter(files) + return _hashsplit_iter(files, progress) total_split = 0 -def _split_to_blobs(w, files, keep_boundaries): +def _split_to_blobs(w, files, keep_boundaries, progress): global total_split - for (blob, bits) in hashsplit_iter(files, keep_boundaries): + for (blob, bits) in hashsplit_iter(files, keep_boundaries, progress): sha = w.new_blob(blob) total_split += len(blob) if w.outbytes >= max_pack_size or w.count >= max_pack_objects: @@ -140,8 +150,8 @@ def _squish(w, stacks, n): i += 1 -def split_to_shalist(w, files, keep_boundaries): - sl = _split_to_blobs(w, files, keep_boundaries) +def split_to_shalist(w, files, keep_boundaries, progress=None): + sl = _split_to_blobs(w, files, keep_boundaries, progress) if not fanout: shal = [] for (sha,size,bits) in sl: -- 2.39.5