all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin bup-drecurse \
- bup memtest randomgen$(EXT) _hashsplit$(SOEXT) \
+ bup-random \
+ bup memtest _hashsplit$(SOEXT) \
Documentation/all
%/all:
%/clean:
$(MAKE) -C $* clean
-randomgen$(EXT): randomgen.o
- $(CC) $(CFLAGS) -o $@ $<
-
_hashsplit$(SOEXT): _hashsplit.c csetup.py
@rm -f $@
python csetup.py build
}
+// I would have made this a lower-level function that just fills in a buffer
+// with random values, and then written those values from python. But that's
+// about 20% slower in my tests, and since we typically generate random
+// numbers for benchmarking other parts of bup, any slowness in generating
+// random bytes will make our benchmarks inaccurate. Plus nobody wants
+// pseudorandom bytes much except for this anyway.
+static PyObject *write_random(PyObject *self, PyObject *args)
+{
+ uint32_t buf[1024/4];
+ int fd = -1, seed = 0;
+ ssize_t ret;
+ long long len = 0, kbytes = 0, written = 0;
+
+ if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+ return NULL;
+
+ srandom(seed);
+
+ for (kbytes = len/1024; kbytes > 0; kbytes--)
+ {
+ int i;
+ for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
+ buf[i] = random();
+ ret = write(fd, buf, sizeof(buf));
+ if (ret < 0)
+ ret = 0;
+ written += ret;
+ if (ret < sizeof(buf))
+ break;
+ if (!(kbytes%1024))
+ fprintf(stderr, ".");
+ }
+
+ return Py_BuildValue("L", written);
+}
+
+
static PyMethodDef hashsplit_methods[] = {
{ "splitbuf", splitbuf, METH_VARARGS,
"Split a list of strings based on a rolling checksum." },
{ "bitmatch", bitmatch, METH_VARARGS,
"Count the number of matching prefix bits between two strings." },
+ { "write_random", write_random, METH_VARARGS,
+ "Write random bytes to the given file descriptor" },
{ NULL, NULL, 0, NULL }, // sentinel
};
--- /dev/null
+#!/usr/bin/env python
+import sys, mmap
+import options, _hashsplit
+from helpers import *
+
+optspec = """
+bup random [-S seed] <numbytes>
+--
+S,seed= optional random number seed (default 1)
+"""
+o = options.Options('bup random', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if len(extra) != 1:
+ log("bup random: exactly one argument expected\n")
+ o.usage()
+
+total = parse_num(extra[0])
+_hashsplit.write_random(sys.stdout.fileno(), total, opt.seed or 0)
git.verbose = opt.verbose - 1
opt.bench = 1
if opt.max_pack_size:
- hashsplit.max_pack_size = int(opt.max_pack_size)
+ hashsplit.max_pack_size = parse_num(opt.max_pack_size)
if opt.max_pack_objects:
- hashsplit.max_pack_objects = int(opt.max_pack_objects)
+ hashsplit.max_pack_objects = parse_num(opt.max_pack_objects)
if opt.fanout:
- hashsplit.fanout = int(opt.fanout)
+ hashsplit.fanout = parse_num(opt.fanout)
if opt.blobs:
hashsplit.fanout = 0
-import sys, os, pwd, subprocess, errno, socket, select, mmap, stat
+import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
def log(s):
return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
+def parse_num(s):
+ g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
+ if not g:
+ raise ValueError("can't parse %r as a number" % s)
+ (val, unit) = g.groups()
+ num = float(val)
+ unit = unit.lower()
+ if unit in ['t', 'tb']:
+ mult = 1024*1024*1024*1024
+ elif unit in ['g', 'gb']:
+ mult = 1024*1024*1024
+ elif unit in ['m', 'mb']:
+ mult = 1024*1024
+ elif unit in ['k', 'kb']:
+ mult = 1024
+ elif unit in ['', 'b']:
+ mult = 1
+ else:
+ raise ValueError("invalid unit %r in number %r" % (unit, s))
+ return int(num*mult)
+
+
# count the number of elements in an iterator (consumes the iterator)
def count(l):
return reduce(lambda x,y: x+1, l)
+++ /dev/null
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <assert.h>
-
-int main(int argc, char **argv)
-{
- if (argc != 2)
- {
- fprintf(stderr, "usage: %s <kbytes>\n", argv[0]);
- return 1;
- }
-
- int kbytes = atoi(argv[1]);
- uint32_t buf[1024/4];
- ssize_t written;
- int i;
-
- for (; kbytes > 0; kbytes--)
- {
- for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
- buf[i] = random();
- written = write(1, buf, sizeof(buf));
- assert(written = sizeof(buf)); // we'd die from SIGPIPE otherwise
- if (!(kbytes%1024))
- fprintf(stderr, ".");
- }
-
- return 0;
-}
--- /dev/null
+from helpers import *
+from wvtest import *
+
+@wvtest
+def test_parse_num():
+ pn = parse_num
+ WVPASSEQ(pn('1'), 1)
+ WVPASSEQ(pn('0'), 0)
+ WVPASSEQ(pn('1.5k'), 1536)
+ WVPASSEQ(pn('2 gb'), 2*1024*1024*1024)
+ WVPASSEQ(pn('1e+9 k'), 1000000000 * 1024)
+ WVPASSEQ(pn('-3e-3mb'), int(-0.003 * 1024 * 1024))