Files
scylladb/test.py
Rafi Einstein 32525f2694 Space-Saving Top-k algorithm for handling stream summary statistics
Based on the following implementation ([2]) for the Space-Saving algorithm from [1].
[1] http://www.cse.ust.hk/~raywong/comp5331/References/EfficientComputationOfFrequentAndTop-kElementsInDataStreams.pdf
[2] https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/stream/StreamSummary.java

The algorithm keeps a map between keys seen and their counts, keeping a bound on the number of tracked keys.
Replacement policy evicts the key with the lowest count while inheriting its count, and recording an estimation
of the error which results from that.
This error estimation can be later used to prove if the distribution we arrived at corresponds to the real top-K,
which we can display alongside the results.
Accuracy depends on the number of tracked keys.

Introduced as part of 'nodetool toppartition' query implementation.

Refs #2811
Message-Id: <20181027220937.58077-1-rafie@scylladb.com>
2018-10-28 10:10:28 +02:00

307 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (C) 2015 ScyllaDB
#
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
#
import os
import sys
import argparse
import subprocess
import signal
import shlex
import threading
import concurrent.futures
import io
boost_tests = [
'bytes_ostream_test',
'chunked_vector_test',
'compress_test',
'continuous_data_consumer_test',
'types_test',
'keys_test',
'mutation_test',
'mvcc_test',
'schema_registry_test',
'range_test',
'mutation_reader_test',
'serialized_action_test',
'cql_query_test',
'secondary_index_test',
'storage_proxy_test',
'schema_change_test',
'sstable_mutation_test',
'sstable_resharding_test',
'commitlog_test',
'hash_test',
'test-serialization',
'cartesian_product_test',
'allocation_strategy_test',
'UUID_test',
'compound_test',
'murmur_hash_test',
'partitioner_test',
'frozen_mutation_test',
'canonical_mutation_test',
'gossiping_property_file_snitch_test',
'row_cache_test',
'cache_flat_mutation_reader_test',
'network_topology_strategy_test',
'query_processor_test',
'batchlog_manager_test',
'logalloc_test',
'log_heap_test',
'crc_test',
'flush_queue_test',
'config_test',
'dynamic_bitset_test',
'gossip_test',
'managed_vector_test',
'map_difference_test',
'memtable_test',
'mutation_query_test',
'snitch_reset_test',
'auth_test',
'idl_test',
'range_tombstone_list_test',
'mutation_fragment_test',
'flat_mutation_reader_test',
'anchorless_list_test',
'database_test',
'input_stream_test',
'nonwrapping_range_test',
'virtual_reader_test',
'counter_test',
'cell_locker_test',
'view_schema_test',
'view_build_test',
'view_complex_test',
'clustering_ranges_walker_test',
'vint_serialization_test',
'duration_test',
'loading_cache_test',
'castas_fcts_test',
'big_decimal_test',
'aggregate_fcts_test',
'role_manager_test',
'caching_options_test',
'auth_resource_test',
'cql_auth_query_test',
'enum_set_test',
'extensions_test',
'cql_auth_syntax_test',
'querier_cache',
'limiting_data_source_test',
'sstable_test',
'sstable_3_x_test',
'meta_test',
'reusable_buffer_test',
'multishard_writer_test',
'observable_test',
'transport_test',
'fragmented_temporary_buffer_test',
'auth_passwords_test',
'multishard_mutation_query_test',
'top_k_test',
]
other_tests = [
'memory_footprint',
]
def print_progress_succint(test_path, test_args, success, cookie):
if type(cookie) is int:
cookie = (0, 1, cookie)
last_len, n, n_total = cookie
if success:
status = "PASSED"
else:
status = "FAILED"
msg = "[{}/{}] {} {} {}".format(n, n_total, status, test_path, ' '.join(test_args))
if sys.stdout.isatty():
print('\r' + ' ' * last_len, end='')
last_len = len(msg)
print('\r' + msg, end='')
else:
print(msg)
return (last_len, n + 1, n_total)
def print_status_verbose(test_path, test_args, success, cookie):
if type(cookie) is int:
cookie = (1, cookie)
n, n_total = cookie
if success:
status = "PASSED"
else:
status = "FAILED"
msg = "[{}/{}] {} {} {}".format(n, n_total, status, test_path, ' '.join(test_args))
print(msg)
return (n + 1, n_total)
class Alarm(Exception):
pass
def alarm_handler(signum, frame):
raise Alarm
if __name__ == "__main__":
all_modes = ['debug', 'release']
sysmem = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
testmem = 2e9
default_num_jobs = ((sysmem - 4e9) // testmem)
parser = argparse.ArgumentParser(description="Scylla test runner")
parser.add_argument('--fast', action="store_true",
help="Run only fast tests")
parser.add_argument('--name', action="store",
help="Run only test whose name contains given string")
parser.add_argument('--mode', choices=all_modes,
help="Run only tests for given build mode")
parser.add_argument('--timeout', action="store", default="300", type=int,
help="timeout value for test execution")
parser.add_argument('--jenkins', action="store",
help="jenkins output file prefix")
parser.add_argument('--verbose', '-v', action='store_true', default=False,
help='Verbose reporting')
parser.add_argument('--jobs', '-j', action="store", default=default_num_jobs, type=int,
help="Number of jobs to use for running the tests")
args = parser.parse_args()
print_progress = print_status_verbose if args.verbose else print_progress_succint
custom_seastar_args = {
"sstable_test": ['-c1'],
"sstable_3_x_test": ['-c1'],
"mutation_reader_test": ['-c{}'.format(min(os.cpu_count(), 3)), '-m2G'],
}
test_to_run = []
modes_to_run = all_modes if not args.mode else [args.mode]
for mode in modes_to_run:
prefix = os.path.join('build', mode, 'tests')
standard_args = '--overprovisioned --unsafe-bypass-fsync 1 --blocked-reactor-notify-ms 2000000'.split()
seastar_args = '-c2 -m2G'.split()
for test in other_tests:
test_to_run.append((os.path.join(prefix, test), 'other', custom_seastar_args.get(test, seastar_args) + standard_args))
for test in boost_tests:
test_to_run.append((os.path.join(prefix, test), 'boost', custom_seastar_args.get(test, seastar_args) + standard_args))
if 'release' in modes_to_run:
test_to_run.append(('build/release/tests/lsa_async_eviction_test', 'other',
'-c1 -m200M --size 1024 --batch 3000 --count 2000000'.split() + standard_args))
test_to_run.append(('build/release/tests/lsa_sync_eviction_test', 'other',
'-c1 -m100M --count 10 --standard-object-size 3000000'.split() + standard_args))
test_to_run.append(('build/release/tests/lsa_sync_eviction_test', 'other',
'-c1 -m100M --count 24000 --standard-object-size 2048'.split() + standard_args))
test_to_run.append(('build/release/tests/lsa_sync_eviction_test', 'other',
'-c1 -m1G --count 4000000 --standard-object-size 128'.split() + standard_args))
test_to_run.append(('build/release/tests/row_cache_alloc_stress', 'other',
'-c1 -m2G'.split() + standard_args))
test_to_run.append(('build/release/tests/row_cache_stress_test', 'other', '-c1 -m1G --seconds 10'.split() + standard_args))
if args.name:
test_to_run = [t for t in test_to_run if args.name in t[0]]
failed_tests = []
n_total = len(test_to_run)
env = os.environ
# disable false positive due to new (with_alignment(...)) ...
env['ASAN_OPTIONS'] = 'alloc_dealloc_mismatch=0'
env['UBSAN_OPTIONS'] = 'print_stacktrace=1'
env['BOOST_TEST_CATCH_SYSTEM_ERRORS'] = 'no'
def run_test(path, type, exec_args):
boost_args = []
# avoid modifying in-place, it will change test_to_run
exec_args = exec_args + '--collectd 0'.split()
file = io.StringIO()
if args.jenkins and type == 'boost':
mode = 'release'
if path.startswith(os.path.join('build', 'debug')):
mode = 'debug'
xmlout = (args.jenkins + "." + mode + "." +
os.path.basename(path.split()[0]) + ".boost.xml")
boost_args += ['--report_level=no', '--logger=HRF,test_suite:XML,test_suite,' + xmlout]
if type == 'boost':
boost_args += ['--']
def report_error(out, report_subcause):
report_subcause()
if out:
print('=== stdout START ===', file=file)
print(out, file=file)
print('=== stdout END ===', file=file)
out = None
success = False
try:
out = subprocess.check_output([path] + boost_args + exec_args,
stderr=subprocess.STDOUT,
timeout=args.timeout,
env=env, preexec_fn=os.setsid)
success = True
except subprocess.TimeoutExpired as e:
def report_subcause():
print(' timed out', file=file)
report_error(e.output.decode(encoding='UTF-8'), report_subcause=report_subcause)
except subprocess.CalledProcessError as e:
def report_subcause():
print(' with error code {code}\n'.format(code=e.returncode), file=file)
report_error(e.output.decode(encoding='UTF-8'), report_subcause=report_subcause)
except Exception as e:
def report_subcause():
print(' with error {e}\n'.format(e=e), file=file)
report_error(e, report_subcause=report_subcause)
return (path, boost_args + exec_args, success, file.getvalue())
executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.jobs)
futures = []
for n, test in enumerate(test_to_run):
path = test[0]
test_type = test[1]
exec_args = test[2] if len(test) >= 3 else []
futures.append(executor.submit(run_test, path, test_type, exec_args))
cookie = n_total
for future in concurrent.futures.as_completed(futures):
test_path, test_args, success, out = future.result()
cookie = print_progress(test_path, test_args, success, cookie)
if not success:
failed_tests.append((test_path, test_args, out))
if not failed_tests:
print('\nOK.')
else:
print('\n\nOutput of the failed tests:')
for test, args, out in failed_tests:
print("Test {} {} failed:\n{}".format(test, ' '.join(args), out))
print('\n\nThe following test(s) have failed:')
for test, args, _ in failed_tests:
print(' {} {}'.format(test, ' '.join(args)))
print('\nSummary: {} of the total {} tests failed'.format(len(failed_tests), len(test_to_run)))
sys.exit(1)