From 488d145dc880eb07cc0508185a889a5e51ea8ffe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Thu, 24 Feb 2022 09:53:18 +0200 Subject: [PATCH] tools/scylla-sstables: add validate-checksums operation Useful for determining whether sstables have been corrupted by factors outside of scylla, e.g. the I/O subsystem. --- tools/scylla-sstable.cc | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/scylla-sstable.cc b/tools/scylla-sstable.cc index c98cfb68b9..945de8b4d3 100644 --- a/tools/scylla-sstable.cc +++ b/tools/scylla-sstable.cc @@ -1416,6 +1416,13 @@ void dump_scylla_metadata_operation(schema_ptr schema, reader_permit permit, con writer.EndStream(); } +void validate_checksums_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, const bpo::variables_map&) { + for (auto& sst : sstables) { + const auto valid = sstables::validate_checksums(sst, permit, default_priority_class()).get(); + sst_log.info("validated the checksums of {}: {}", sst->get_filename(), valid ? "valid" : "invalid"); + } +} + template void sstable_consumer_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, const bpo::variables_map& vm) { const auto merge = vm.count("merge"); @@ -1935,6 +1942,30 @@ well defined internal order. )", {"merge"}, validate_operation}, + {"validate-checksums", + "Validate the checksums of the sstable(s)", +R"( +There are two kinds of checksums for sstable data files: +* The digest (full checksum), stored in the Digest.crc32 file. This is calculated + over the entire content of Data.db. +* The per-chunk checksum. For uncompressed sstables, this is stored in CRC.db, + for compressed sstables it is stored inline after each compressed chunk in + Data.db. + +During normal reads Scylla validates the per-chunk checksum for compressed +sstables. The digest and the per-chunk checksum of uncompressed sstables are not +checked on any code-paths currently. + +This operation reads the entire Data.db and validates both kind of checksums +against the data. Errors found are logged to stderr. The output just contains a +bool for each sstable that is true if the sstable matches all checksums. + +The content is dumped in JSON, using the following schema: + +$ROOT := { "$sstable_path": Bool, ... } + +)", + validate_checksums_operation}, }; } // anonymous namespace