aboutsummaryrefslogtreecommitdiff
path: root/tests/regtest_spec.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/regtest_spec.py')
-rwxr-xr-xtests/regtest_spec.py113
1 files changed, 113 insertions, 0 deletions
diff --git a/tests/regtest_spec.py b/tests/regtest_spec.py
new file mode 100755
index 0000000..5a29f39
--- /dev/null
+++ b/tests/regtest_spec.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+"""Print a test spec on stdout.
+
+Each line has parameters for a test case. The regtest.sh shell script reads
+these lines and runs parallel processes.
+
+We use Python data structures so the test cases are easier to read and edit.
+"""
+
+import optparse
+import sys
+
+#
+# TEST CONFIGURATION
+#
+
+DEMO = (
+ # (case_name distr num_unique_values num_clients values_per_client)
+ # (num_bits num_hashes num_cohorts)
+ # (p q f) (num_additional regexp_to_remove)
+ ('demo1 unif 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
+ ('demo2 gauss 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
+ ('demo3 exp 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
+ ('demo4 zipf1 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
+ ('demo5 zipf1.5 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
+)
+
+DISTRIBUTIONS = (
+ 'unif',
+ 'exp',
+ 'gauss',
+ 'zipf1',
+ 'zipf1.5',
+)
+
+DISTRIBUTION_PARAMS = (
+ # name, num unique values, num clients, values per client
+ ('tiny', 100, 1000, 1), # test for insufficient data
+ ('small', 100, 1000000, 1),
+ ('medium', 1000, 10000000, 1),
+ ('large', 10000, 100000000, 1),
+)
+
+# 'k, h, m' as in params file.
+BLOOMFILTER_PARAMS = {
+ '8x16': (8, 2, 16), # 16 cohorts, 8 bits each, 2 bits set in each
+ '8x32': (8, 2, 32), # 32 cohorts, 8 bits each, 2 bits set in each
+ '8x128': (8, 2, 128), # 128 cohorts, 8 bits each, 2 bits set in each
+ '128x128': (128, 2, 128), # 8 cohorts, 128 bits each, 2 bits set in each
+}
+
+# 'p, q, f' as in params file.
+PRIVACY_PARAMS = {
+ 'eps_1_1': (0.39, 0.61, 0.45), # eps_1 = 1, eps_inf = 5:
+ 'eps_1_5': (0.225, 0.775, 0.0), # eps_1 = 5, no eps_inf
+}
+
+# For deriving candidates from true inputs.
+MAP_REGEX_MISSING = {
+ 'sharp': 'NONE', # Categorical data
+ '10%': 'v[0-9]*9$', # missing every 10th string
+}
+
+# test configuration ->
+# (name modifier, Bloom filter, privacy params, fraction of extra,
+# regex missing)
+TEST_CONFIGS = [
+ ('typical', '8x128', 'eps_1_1', .2, '10%'),
+ ('sharp', '8x128', 'eps_1_1', .0, 'sharp'), # no extra candidates
+ ('loose', '8x128', 'eps_1_5', .2, '10%'), # loose privacy
+ ('over_x2', '8x128', 'eps_1_1', 2.0, '10%'), # overshoot by x2
+ ('over_x10', '8x128', 'eps_1_1', 10.0, '10%'), # overshoot by x10
+]
+
+#
+# END TEST CONFIGURATION
+#
+
+
+def main(argv):
+ rows = []
+
+ test_case = []
+ for (distr_params, num_values, num_clients,
+ num_reports_per_client) in DISTRIBUTION_PARAMS:
+ for distribution in DISTRIBUTIONS:
+ for (config_name, bloom_name, privacy_params, fr_extra,
+ regex_missing) in TEST_CONFIGS:
+ test_name = 'r-{}-{}-{}'.format(distribution, distr_params,
+ config_name)
+
+ params = (BLOOMFILTER_PARAMS[bloom_name]
+ + PRIVACY_PARAMS[privacy_params]
+ + tuple([int(num_values * fr_extra)])
+ + tuple([MAP_REGEX_MISSING[regex_missing]]))
+
+ test_case = (test_name, distribution, num_values, num_clients,
+ num_reports_per_client) + params
+ row_str = [str(element) for element in test_case]
+ rows.append(row_str)
+
+ for params in DEMO:
+ rows.append(params)
+
+ for row in rows:
+ print ' '.join(row)
+
+if __name__ == '__main__':
+ try:
+ main(sys.argv)
+ except RuntimeError, e:
+ print >>sys.stderr, 'FATAL: %s' % e
+ sys.exit(1)