The semaphore has detection and protection against regular resource leaks, where some resources go unaccounted for and are not released by the time the semaphore is destroyed. There is no detection or protection against negative leaks: where resources are "made up" of thin air. This kind of leaks looks benign at first sight, a few extra resources won't hurt anyone so long as this is a small amount. But turns out that even a single extra count resource can defeat a very important anti-deadlock protection in can_admit_read(): the special case which admits a new permit regardless of memory resources, when all original count resources all available. This check uses ==, so if resource > original, the protection is defeated indefinitely. Instead of just changing == to >=, we add detection of such negative leaks to signal(), via on_internal_error_noexcept(). At this time I still don't now how this negative leak happens (the code doesn't confess), with this detection, hopefully we'll get a clue from tests or the field. Note that on_internal_error_noexcept() will not generate a coredump, unless ScyllaDB is explicitely configured to do so. In production, it will just generate an error log with a backtrace. The detection also clams the _resources to _initial_resources, to prevent any damage from the negativae leak. I just noticed that there is no unit test for the deadlock protection described above, so one is added in this PR, even if only loosely related to the rest of the patch. Fixes: SCYLLADB-163 Closes scylladb/scylladb#27764
56 lines
1.9 KiB
YAML
56 lines
1.9 KiB
YAML
extra_scylla_cmdline_options:
|
|
- '--reactor-backend=linux-aio'
|
|
# A list of long tests, which should be started early
|
|
run_first:
|
|
- index_with_paging_test
|
|
- schema_changes_test
|
|
- sstable_conforms_to_mutation_source_test
|
|
- secondary_index_test
|
|
- mutation_reader_test
|
|
- multishard_combining_reader_as_mutation_source_test
|
|
- database_test
|
|
- cql_function_test
|
|
- memtable_test
|
|
# These test cannot work in case-by-case mode because
|
|
# some test-cases depend on each other
|
|
no_parallel_cases:
|
|
- logalloc_test
|
|
- logalloc_standard_allocator_segment_pool_backend_test
|
|
# Enable compaction groups on tests except on a few, listed below
|
|
all_can_run_compaction_groups_except:
|
|
- exceptions_optimized_test
|
|
- rate_limiter_test
|
|
- exceptions_fallback_test
|
|
- top_k_test
|
|
- reusable_buffer_test
|
|
# Custom command line arguments for some of the tests
|
|
custom_args:
|
|
mutation_reader_test:
|
|
- '-c3 -m2G'
|
|
sstable_test:
|
|
- '-c1 -m2G'
|
|
sstable_datafile_test:
|
|
- '-c1 -m2G'
|
|
sstable_compaction_test:
|
|
- '-c1 -m2G --logger-log-level compaction=debug --logger-log-level compaction_manager=debug'
|
|
sstable_3_x_test:
|
|
- '-c1 -m2G'
|
|
cql_query_test:
|
|
- '-c2 -m2G --fail-on-abandoned-failed-futures=true'
|
|
reader_concurrency_semaphore_test:
|
|
- '-c1 -m256M --logger-log-level testlog=trace:reader_concurrency_semaphore=trace'
|
|
multishard_query_test:
|
|
- '-c2 -m3G'
|
|
cache_algorithm_test:
|
|
- '-c1 -m256M'
|
|
commitlog_cleanup_test:
|
|
- '-c1 -m2G'
|
|
bloom_filter_test:
|
|
- '-c1'
|
|
s3_test:
|
|
- '-c2 -m2G --logger-log-level s3=trace --logger-log-level http=trace --logger-log-level default_http_retry_strategy=trace'
|
|
batchlog_manager_test:
|
|
- '-c2 -m2G --logger-log-level batchlog_manager=trace:debug_error_injection=trace:testlog=trace'
|
|
run_in_debug:
|
|
- logalloc_standard_allocator_segment_pool_backend_test
|