Replace randomgen with a new 'bup random' command.

author Avery Pennarun <apenwarr@gmail.com>

Thu, 11 Feb 2010 21:27:38 +0000 (16:27 -0500)

committer Avery Pennarun <apenwarr@gmail.com>

Thu, 11 Feb 2010 21:54:30 +0000 (16:54 -0500)
author Avery Pennarun <apenwarr@gmail.com>
Thu, 11 Feb 2010 21:27:38 +0000 (16:27 -0500)
committer Avery Pennarun <apenwarr@gmail.com>
Thu, 11 Feb 2010 21:54:30 +0000 (16:54 -0500)
diff --git a/Makefile b/Makefile

index 8793cb7109fae52e6114ffb9b1145f8b25de7a4d..c5a16c59ef4e4558661fd67af5b9a4208d715ff7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,8 @@ default: all
  
  all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
         bup-midx bup-fuse bup-ls bup-damage bup-fsck bup-margin bup-drecurse \
-       bup memtest randomgen$(EXT) _hashsplit$(SOEXT) \
+       bup-random \
+       bup memtest _hashsplit$(SOEXT) \
         Documentation/all
         
  %/all:
@@ -30,9 +31,6 @@ all: bup-split bup-join bup-save bup-init bup-server bup-index bup-tick \
  %/clean:
         $(MAKE) -C $* clean
  
-randomgen$(EXT): randomgen.o
-       $(CC) $(CFLAGS) -o $@ $<
-
  _hashsplit$(SOEXT): _hashsplit.c csetup.py
         @rm -f $@
         python csetup.py build
diff --git a/_hashsplit.c b/_hashsplit.c

index 933abe638fb9c358ef94b4750ef976e31a847d2e..732149f52507fd4b7847edcc09a1fdb60e99b09b 100644 (file)
--- a/_hashsplit.c
+++ b/_hashsplit.c
@@ -74,11 +74,50 @@ static PyObject *bitmatch(PyObject *self, PyObject *args)
  }
  
  
+// I would have made this a lower-level function that just fills in a buffer
+// with random values, and then written those values from python.  But that's
+// about 20% slower in my tests, and since we typically generate random
+// numbers for benchmarking other parts of bup, any slowness in generating
+// random bytes will make our benchmarks inaccurate.  Plus nobody wants
+// pseudorandom bytes much except for this anyway.
+static PyObject *write_random(PyObject *self, PyObject *args)
+{
+    uint32_t buf[1024/4];
+    int fd = -1, seed = 0;
+    ssize_t ret;
+    long long len = 0, kbytes = 0, written = 0;
+
+    if (!PyArg_ParseTuple(args, "iLi", &fd, &len, &seed))
+       return NULL;
+    
+    srandom(seed);
+    
+    for (kbytes = len/1024; kbytes > 0; kbytes--)
+    {
+       int i;
+       for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
+           buf[i] = random();
+       ret = write(fd, buf, sizeof(buf));
+       if (ret < 0)
+           ret = 0;
+       written += ret;
+       if (ret < sizeof(buf))
+           break;
+       if (!(kbytes%1024))
+           fprintf(stderr, ".");
+    }
+    
+    return Py_BuildValue("L", written);
+}
+
+
  static PyMethodDef hashsplit_methods[] = {
      { "splitbuf", splitbuf, METH_VARARGS,
         "Split a list of strings based on a rolling checksum." },
      { "bitmatch", bitmatch, METH_VARARGS,
         "Count the number of matching prefix bits between two strings." },
+    { "write_random", write_random, METH_VARARGS,
+       "Write random bytes to the given file descriptor" },
      { NULL, NULL, 0, NULL },  // sentinel
  };
  
diff --git a/cmd-random.py b/cmd-random.py

new file mode 100755 (executable)

index 0000000..dd61cf0
--- /dev/null
+++ b/cmd-random.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import sys, mmap
+import options, _hashsplit
+from helpers import *
+
+optspec = """
+bup random [-S seed] <numbytes>
+--
+S,seed=   optional random number seed (default 1)
+"""
+o = options.Options('bup random', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+
+if len(extra) != 1:
+    log("bup random: exactly one argument expected\n")
+    o.usage()
+
+total = parse_num(extra[0])
+_hashsplit.write_random(sys.stdout.fileno(), total, opt.seed or 0)
diff --git a/cmd-split.py b/cmd-split.py

index f462655a5e128fb95d212f5370f5da96cb8be442..8bc267e8a38cb8f2fc7fe407981c8974de283ec9 100755 (executable)
--- a/cmd-split.py
+++ b/cmd-split.py
@@ -32,11 +32,11 @@ if opt.verbose >= 2:
      git.verbose = opt.verbose - 1
      opt.bench = 1
  if opt.max_pack_size:
-    hashsplit.max_pack_size = int(opt.max_pack_size)
+    hashsplit.max_pack_size = parse_num(opt.max_pack_size)
  if opt.max_pack_objects:
-    hashsplit.max_pack_objects = int(opt.max_pack_objects)
+    hashsplit.max_pack_objects = parse_num(opt.max_pack_objects)
  if opt.fanout:
-    hashsplit.fanout = int(opt.fanout)
+    hashsplit.fanout = parse_num(opt.fanout)
  if opt.blobs:
      hashsplit.fanout = 0
  
diff --git a/helpers.py b/helpers.py

index d386185135d440f51347af85ac873ef1813c9a84..8201c3198c2023c09ab4d55c9894d65ba9503df3 100644 (file)
--- a/helpers.py
+++ b/helpers.py
@@ -1,4 +1,4 @@
-import sys, os, pwd, subprocess, errno, socket, select, mmap, stat
+import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re
  
  
  def log(s):
@@ -199,6 +199,28 @@ def mmap_readwrite(f, len = 0):
      return _mmap_do(f, len, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE)
  
  
+def parse_num(s):
+    g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
+    if not g:
+        raise ValueError("can't parse %r as a number" % s)
+    (val, unit) = g.groups()
+    num = float(val)
+    unit = unit.lower()
+    if unit in ['t', 'tb']:
+        mult = 1024*1024*1024*1024
+    elif unit in ['g', 'gb']:
+        mult = 1024*1024*1024
+    elif unit in ['m', 'mb']:
+        mult = 1024*1024
+    elif unit in ['k', 'kb']:
+        mult = 1024
+    elif unit in ['', 'b']:
+        mult = 1
+    else:
+        raise ValueError("invalid unit %r in number %r" % (unit, s))
+    return int(num*mult)
+
+
  # count the number of elements in an iterator (consumes the iterator)
  def count(l):
      return reduce(lambda x,y: x+1, l)
diff --git a/randomgen.c b/randomgen.c

deleted file mode 100644 (file)

index 00aae05..0000000
--- a/randomgen.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <assert.h>
-
-int main(int argc, char **argv)
-{
-    if (argc != 2)
-    {
-       fprintf(stderr, "usage: %s <kbytes>\n", argv[0]);
-       return 1;
-    }
-    
-    int kbytes = atoi(argv[1]);
-    uint32_t buf[1024/4];
-    ssize_t written;
-    int i;
-    
-    for (; kbytes > 0; kbytes--)
-    {
-       for (i = 0; i < sizeof(buf)/sizeof(buf[0]); i++)
-           buf[i] = random();
-       written = write(1, buf, sizeof(buf));
-       assert(written = sizeof(buf)); // we'd die from SIGPIPE otherwise
-       if (!(kbytes%1024))
-           fprintf(stderr, ".");
-    }
-    
-    return 0;
-}
diff --git a/t/thelpers.py b/t/thelpers.py

new file mode 100644 (file)

index 0000000..5e59bca
--- /dev/null
+++ b/t/thelpers.py
@@ -0,0 +1,12 @@
+from helpers import *
+from wvtest import *
+
+@wvtest
+def test_parse_num():
+    pn = parse_num
+    WVPASSEQ(pn('1'), 1)
+    WVPASSEQ(pn('0'), 0)
+    WVPASSEQ(pn('1.5k'), 1536)
+    WVPASSEQ(pn('2 gb'), 2*1024*1024*1024)
+    WVPASSEQ(pn('1e+9 k'), 1000000000 * 1024)
+    WVPASSEQ(pn('-3e-3mb'), int(-0.003 * 1024 * 1024))
author	Avery Pennarun <apenwarr@gmail.com>
	Thu, 11 Feb 2010 21:27:38 +0000 (16:27 -0500)
committer	Avery Pennarun <apenwarr@gmail.com>
	Thu, 11 Feb 2010 21:54:30 +0000 (16:54 -0500)
Makefile		patch \| blob \| history
_hashsplit.c		patch \| blob \| history
cmd-random.py	[new file with mode: 0755]	patch \| blob
cmd-split.py		patch \| blob \| history
helpers.py		patch \| blob \| history
randomgen.c	[deleted file]	patch \| blob \| history
t/thelpers.py	[new file with mode: 0644]	patch \| blob