diff --git a/tests/sstable_test.cc b/tests/sstable_test.cc index 367614fb34..35ad20fe29 100644 --- a/tests/sstable_test.cc +++ b/tests/sstable_test.cc @@ -37,6 +37,7 @@ #include #include "sstable_test.hh" #include "tmpdir.hh" +#include "partition_slice_builder.hh" #include "disk-error-handler.hh" @@ -947,3 +948,221 @@ SEASTAR_TEST_CASE(statistics_rewrite) { }); }, "tests/sstables/generation"); } + +// Tests for reading a large partition for which the index contains a +// "promoted index", i.e., a sample of the column names inside the partition, +// with which we can avoid reading the entire partition when we look only +// for a specific subset of columns. The test sstable for the read test was +// generated in Cassandra. + +static schema_ptr large_partition_schema() { + static thread_local auto s = [] { + schema_builder builder(make_lw_shared(schema( + generate_legacy_id("try1", "data"), "try1", "data", + // partition key + {{"t1", utf8_type}}, + // clustering key + {{"t2", utf8_type}}, + // regular columns + {{"t3", utf8_type}}, + // static columns + {}, + // regular column name type + utf8_type, + // comment + "" + ))); + return builder.build(schema_builder::compact_storage::no); + }(); + return s; +} + +static future> load_large_partition_sst() { + auto sst = make_lw_shared( + "try1", "data", "tests/sstables/large_partition", 3, + sstables::sstable::version_types::ka, big); + auto fut = sst->load(); + return std::move(fut).then([sst = std::move(sst)] { + return std::move(sst); + }); +} + +// This is a rudimentary test that reads an sstable exported from Cassandra +// which contains a promoted index. It just checks that the promoted index +// is read from disk, as an unparsed array, and doesn't actually use it to +// search for anything. +SEASTAR_TEST_CASE(promoted_index_read) { + return load_large_partition_sst().then([] (auto sstp) { + schema_ptr s = large_partition_schema(); + return sstables::test(sstp).read_indexes(0).then([sstp] (index_list vec) { + BOOST_REQUIRE(vec.size() == 1); + index_entry &e = vec[0]; + BOOST_REQUIRE(e.get_promoted_index_bytes().size() == 468); + }); + }); +} + +// Use an empty string for ck1, ck2, or both, for unbounded ranges. +static query::partition_slice make_partition_slice(const schema& s, sstring ck1, sstring ck2) { + std::experimental::optional b1; + if (!ck1.empty()) { + b1.emplace(clustering_key_prefix::from_single_value( + s, utf8_type->decompose(ck1))); + } + std::experimental::optional b2; + if (!ck2.empty()) { + b2.emplace(clustering_key_prefix::from_single_value( + s, utf8_type->decompose(ck2))); + } + return partition_slice_builder(s). + with_range(query::clustering_range(b1, b2)).build(); +} + +// Count the number of CQL rows in one partition between clustering key +// prefix ck1 to ck2. +static future count_rows(sstable_ptr sstp, schema_ptr s, sstring key, sstring ck1, sstring ck2) { + return seastar::async([sstp, s, key, ck1, ck2] () mutable { + auto ps = make_partition_slice(*s, ck1, ck2); + auto row = sstp->read_row(s, sstables::key(key.c_str()), + query::clustering_key_filtering_context::create(s, ps)).get0(); + if (!row) { + return 0; + } + int nrows = 0; + auto mfopt = (*row)().get0(); + while (mfopt) { + if (mfopt->is_clustering_row()) { + nrows++; + } + mfopt = (*row)().get0(); + } + return nrows; + }); +} + +// Count the number of CQL rows in one partition +static future count_rows(sstable_ptr sstp, schema_ptr s, sstring key) { + return seastar::async([sstp, s, key] () mutable { + auto row = sstp->read_row(s, sstables::key(key.c_str())).get0(); + if (!row) { + return 0; + } + int nrows = 0; + auto mfopt = (*row)().get0(); + while (mfopt) { + if (mfopt->is_clustering_row()) { + nrows++; + } + mfopt = (*row)().get0(); + } + return nrows; + }); +} + +// Count the number of CQL rows between clustering key prefix ck1 to ck2 +// in all partitions in the sstable (using sstable::read_range_rows). +static future count_rows(sstable_ptr sstp, schema_ptr s, sstring ck1, sstring ck2) { + return seastar::async([sstp, s, ck1, ck2] () mutable { + auto ps = make_partition_slice(*s, ck1, ck2); + auto reader = sstp->read_range_rows(s, query::full_partition_range, + query::clustering_key_filtering_context::create(s, ps)); + int nrows = 0; + auto smopt = reader.read().get0(); + while (smopt) { + auto mfopt = (*smopt)().get0(); + while (mfopt) { + if (mfopt->is_clustering_row()) { + nrows++; + } + mfopt = (*smopt)().get0(); + } + smopt = reader.read().get0(); + } + return nrows; + }); +} + +// This test reads, using sstable::read_row(), a slice (a range of clustering +// rows) from one large partition in an sstable written in Cassandra. +// This large partition includes 13520 clustering rows, and spans about +// 700 KB on disk. When we ask to read only a part of it, the promoted index +// (included in this sstable) may be used to allow reading only a part of the +// partition from disk. This test doesn't directly verify that the promoted +// index is actually used - and can work even without a promoted index +// support - but can be used to check that adding promoted index read supports +// did not break anything. +// To verify that the promoted index was actually used to reduce the size +// of read from disk, add printouts to the row reading code. +SEASTAR_TEST_CASE(sub_partition_read) { + schema_ptr s = large_partition_schema(); + return load_large_partition_sst().then([s] (auto sstp) { + return count_rows(sstp, s, "v1", "18wX", "18xB").then([] (int nrows) { + // there should be 5 rows (out of 13520 = 20*26*26) in this range: + // 18wX, 18wY, 18wZ, 18xA, 18xB. + BOOST_REQUIRE(nrows == 5); + }).then([sstp, s] () { + return count_rows(sstp, s, "v1", "13aB", "15aA").then([] (int nrows) { + // There should be 26*26*2 rows in this range. It spans two + // promoted-index blocks, so we get to test that case. + BOOST_REQUIRE(nrows == 2*26*26); + }); + }).then([sstp, s] () { + return count_rows(sstp, s, "v1", "10aB", "19aA").then([] (int nrows) { + // There should be 26*26*9 rows in this range. It spans many + // promoted-index blocks. + BOOST_REQUIRE(nrows == 9*26*26); + }); + }).then([sstp, s] () { + return count_rows(sstp, s, "v1", "0", "z").then([] (int nrows) { + // All rows, 20*26*26 of them, are in this range. It spans all + // the promoted-index blocks, but the range is still bounded + // on both sides + BOOST_REQUIRE(nrows == 20*26*26); + }); + }).then([sstp, s] () { + // range that is outside (after) the actual range of the data. + // No rows should match. + return count_rows(sstp, s, "v1", "y", "z").then([] (int nrows) { + BOOST_REQUIRE(nrows == 0); + }); + }).then([sstp, s] () { + // range that is outside (before) the actual range of the data. + // No rows should match. + return count_rows(sstp, s, "v1", "_a", "_b").then([] (int nrows) { + BOOST_REQUIRE(nrows == 0); + }); + }).then([sstp, s] () { + // half-infinite range + return count_rows(sstp, s, "v1", "", "10aA").then([] (int nrows) { + BOOST_REQUIRE(nrows == (1*26*26 + 1)); + }); + }).then([sstp, s] () { + // half-infinite range + return count_rows(sstp, s, "v1", "10aA", "").then([] (int nrows) { + BOOST_REQUIRE(nrows == 19*26*26); + }); + }).then([sstp, s] () { + // count all rows, but giving an explicit all-encompasing filter + return count_rows(sstp, s, "v1", "", "").then([] (int nrows) { + BOOST_REQUIRE(nrows == 20*26*26); + }); + }).then([sstp, s] () { + // count all rows, without a filter + return count_rows(sstp, s, "v1").then([] (int nrows) { + BOOST_REQUIRE(nrows == 20*26*26); + }); + }); + }); +} + +// Same as previous test, just using read_range_rows instead of read_row +// to read parts of potentially more than one partition (in this particular +// sstable, there is actually just one partition). +SEASTAR_TEST_CASE(sub_partitions_read) { + schema_ptr s = large_partition_schema(); + return load_large_partition_sst().then([s] (auto sstp) { + return count_rows(sstp, s, "18wX", "18xB").then([] (int nrows) { + BOOST_REQUIRE(nrows == 5); + }); + }); +} diff --git a/tests/sstables/large_partition/try1-data-ka-3-CompressionInfo.db b/tests/sstables/large_partition/try1-data-ka-3-CompressionInfo.db new file mode 100644 index 0000000000..8cb58ac8e5 Binary files /dev/null and b/tests/sstables/large_partition/try1-data-ka-3-CompressionInfo.db differ diff --git a/tests/sstables/large_partition/try1-data-ka-3-Data.db b/tests/sstables/large_partition/try1-data-ka-3-Data.db new file mode 100644 index 0000000000..34297464d7 Binary files /dev/null and b/tests/sstables/large_partition/try1-data-ka-3-Data.db differ diff --git a/tests/sstables/large_partition/try1-data-ka-3-Digest.sha1 b/tests/sstables/large_partition/try1-data-ka-3-Digest.sha1 new file mode 100644 index 0000000000..94824cffc9 --- /dev/null +++ b/tests/sstables/large_partition/try1-data-ka-3-Digest.sha1 @@ -0,0 +1 @@ +2833048369 \ No newline at end of file diff --git a/tests/sstables/large_partition/try1-data-ka-3-Filter.db b/tests/sstables/large_partition/try1-data-ka-3-Filter.db new file mode 100644 index 0000000000..208dd70cff Binary files /dev/null and b/tests/sstables/large_partition/try1-data-ka-3-Filter.db differ diff --git a/tests/sstables/large_partition/try1-data-ka-3-Index.db b/tests/sstables/large_partition/try1-data-ka-3-Index.db new file mode 100644 index 0000000000..9e915de05b Binary files /dev/null and b/tests/sstables/large_partition/try1-data-ka-3-Index.db differ diff --git a/tests/sstables/large_partition/try1-data-ka-3-Statistics.db b/tests/sstables/large_partition/try1-data-ka-3-Statistics.db new file mode 100644 index 0000000000..53c7af5283 Binary files /dev/null and b/tests/sstables/large_partition/try1-data-ka-3-Statistics.db differ diff --git a/tests/sstables/large_partition/try1-data-ka-3-Summary.db b/tests/sstables/large_partition/try1-data-ka-3-Summary.db new file mode 100644 index 0000000000..56c5b4b115 Binary files /dev/null and b/tests/sstables/large_partition/try1-data-ka-3-Summary.db differ diff --git a/tests/sstables/large_partition/try1-data-ka-3-TOC.txt b/tests/sstables/large_partition/try1-data-ka-3-TOC.txt new file mode 100644 index 0000000000..4d010745d4 --- /dev/null +++ b/tests/sstables/large_partition/try1-data-ka-3-TOC.txt @@ -0,0 +1,8 @@ +Data.db +CompressionInfo.db +Index.db +Summary.db +Statistics.db +Digest.sha1 +TOC.txt +Filter.db