Files
scylladb/tests/sstable_assertions.hh
Vladimir Krivopalov 3a9cb54c76 Merge the pair of index_readers into just one tracking a range.
Historically, we had two index_readers per a sstable_mutation_reader,
one for the lower bound and one for the upper bound. Most of public
members of the index_reader class were only called on either of those.
With the changes introduced in #2981, two readers are even more tied
together as they now have a shared-per-pair list of index pages that
needs proper cleanup and was protruding woefully into the caller code.

This fix re-structures index_reader so that it now keeps track of both
lower and upper bounds. The shared_index_lists structure is encapsulated
within index_reader and becomes an internal detail rather than a
liability.

Fixes #3220.

Tests: unit (debug, release)
+
Tested using cassandra-stress commands from #3189.

perf_fast_forward results indicate there is no performance degradation
caused by thix fix.

=========================== Baseline ===================================
running: large-partition-skips
Testing scanning large partition with skips.
Reads whole range interleaving reads with skips according to read-skip pattern:
read    skip      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
1       0         0.494458   1000000    2022418   1018     126960      27       0        0        0        0        0        0        0  97.6%
1       1         1.754717    500000     284946    997     127064       6       0        0        3        3        0        0        0  99.9%
1       8         0.551664    111112     201413    997     127064       6       0        0        3        3        0        0        0  99.7%
1       16        0.383888     58824     153232   1001     127080      10       0        0        5        5        0        0        0  99.5%
1       32        0.289073     30304     104832    997     127064      28       0        0        3        3        0        0        0  99.3%
1       64        0.236963     15385      64926    997     127064     122       0        0        3        3        0        0        0  99.2%
1       256       0.172901      3892      22510    997     127064     217       0        0        3        3        0        0        0  95.5%
1       1024      0.117570       976       8301    997     127064     235       0        0        3        3        0        0        0  49.0%
1       4096      0.085811       245       2855    664      27172     375     274        0        3        3        0        0        0  21.4%
64      1         0.512781    984616    1920149   1142     127064     139       0        0        3        3        0        0        0  98.7%
64      8         0.479232    888896    1854833   1001     127080      10       0        0        5        5        0        0        0  99.6%
64      16        0.451193    800000    1773078    997     127064       6       0        0        3        3        0        0        0  99.6%
64      32        0.408684    666688    1631305    997     127064       6       0        0        3        3        0        0        0  99.5%
64      64        0.351906    500032    1420924    997     127064      14       0        0        3        3        0        0        0  99.5%
64      256       0.227008    200000     881026    997     127064     211       0        0        3        3        0        0        0  99.1%
64      1024      0.125803     58880     468032    997     127064     290       0        0        3        3        0        0        0  65.1%
64      4096      0.098155     15424     157139    703      27856     401     267        0        3        3        0        0        0  25.8%

running: large-partition-slicing
Testing slicing of large partition:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.000701         1       1427      9        296       6       4        0        3        3        0        0        0  12.4%
0       32        0.000698        32      45827      9        296       6       3        0        3        3        0        0        0  13.9%
0       256       0.000808       256     316920     10        328       6       3        0        3        3        0        0        0  24.9%
0       4096      0.004368      4096     937697     25        808      14       3        0        3        3        0        0        0  45.9%
500000  1         0.001196         1        836     13        412       9       4        0        3        3        0        0        0  22.7%
500000  32        0.001200        32      26664     13        412       9       4        0        3        3        0        0        0  22.2%
500000  256       0.001503       256     170338     14        444      10       4        0        3        3        0        0        0  25.3%
500000  4096      0.004351      4096     941465     30        956      20       4        0        3        3        0        0        0  50.7%

running: large-partition-slicing-clustering-keys
Testing slicing of large partition using clustering keys:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.000625         1       1601      7        176       6       0        0        3        3        0        0        0  23.2%
0       32        0.000604        32      53016      7        176       6       0        0        3        3        0        0        0  24.7%
0       256       0.000695       256     368498      8        180       6       0        0        3        3        0        0        0  36.4%
0       4096      0.004083      4096    1003106     20        692      12       1        0        3        3        0        0        0  47.0%
500000  1         0.001198         1        835     12        516       9       3        0        3        3        0        0        0  22.8%
500000  32        0.000981        32      32631     12        388       9       3        0        3        3        0        0        0  29.2%
500000  256       0.001320       256     194011     13        384      10       3        0        3        3        0        0        0  29.0%
500000  4096      0.003944      4096    1038567     25        840      17       2        0        3        3        0        0        0  52.2%

running: large-partition-slicing-single-key-reader
Testing slicing of large partition, single-partition reader:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.000849         1       1178      9        488       6       0        0        3        3        0        0        0  16.5%
0       32        0.000661        32      48415      9        296       6       0        0        3        3        0        0        0  22.2%
0       256       0.000756       256     338648     10        328       6       0        0        3        3        0        0        0  33.3%
0       4096      0.004147      4096     987610     22        840      12       1        0        3        3        0        0        0  47.9%
500000  1         0.001041         1        960     13        476       9       3        0        3        3        0        0        0  25.9%
500000  32        0.001020        32      31375     13        412       9       3        0        3        3        0        0        0  29.1%
500000  256       0.001265       256     202373     14        444      10       3        0        3        3        0        0        0  32.0%
500000  4096      0.004121      4096     994014     30        988      18       3        0        3        3        0        0        0  52.7%

running: large-partition-select-few-rows
Testing selecting few rows from a large partition:
stride  rows      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
1000000 1         0.000668         1       1498      9        296       6       4        0        3        3        0        0        0  19.8%
500000  2         0.000976         2       2048     13        412       9       4        0        3        3        0        0        0  29.0%
250000  4         0.001408         4       2842     18        572      12       6        0        3        3        0        0        0  28.8%
125000  8         0.002004         8       3993     29        912      19      10        0        3        3        0        0        0  34.0%
62500   16        0.002883        16       5551     50       1584      32      18        0        3        3        0        0        0  41.9%
2       500000    1.053215    500000     474737   1138     127080     120       0        0        5        5        0        0        0  99.7%

running: large-partition-forwarding
Testing forwarding with clustering restriction in a large partition:
pk-scan   time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
yes       0.002717         2        736     24       2684       8      16        0        3        3        0        0        0  19.7%
no        0.001004         2       1992     13        412       8       2        0        3        3        0        0        0  30.2%

running: small-partition-skips
Testing scanning small partitions with skips.
Reads whole range interleaving reads with skips according to read-skip pattern:
   read    skip      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
-> 1       0         1.466523   1000000     681885   1369     139732      33       1        0        0        0        0        0        0  99.7%
-> 1       1        12.792183    500000      39086   6235     177736    5155       0        0     5123     7663        0        0        0  96.4%
-> 1       8         3.451431    111112      32193   6235     177736    5155       0        0     5123     9673        0        0        0  84.8%
-> 1       16        2.223815     58824      26452   6234     177704    5154       0        0     5122     9965        0        0        0  75.0%
-> 1       32        1.512511     30304      20036   6233     177680    5155       1        0     5123    10090        0        0        0  61.8%
-> 1       64        1.129465     15385      13621   6227     177464    5154       0        0     5122    10159        0        0        0  49.5%
-> 1       256       0.733282      3892       5308   6211     175464    5178      24        0     5122    10220        0        0        0  33.8%
-> 1       1024      0.397302       976       2457   5946     142152    5369     217        0     5120    10235        0        0        0  32.1%
-> 1       4096      0.187746       245       1305   5499      81992    5296     142        0     5122    10240        0        0        0  46.8%
-> 64      1         2.428488    984616     405444   7332     177736    5155      25        0     5123     5208        0        0        0  79.9%
-> 64      8         2.262876    888896     392817   6235     177736    5155       0        0     5123     5654        0        0        0  78.1%
-> 64      16        2.137544    800000     374261   6234     177732    5154       0        0     5122     6110        0        0        0  77.1%
-> 64      32        1.862466    666688     357960   6235     177736    5155       0        0     5123     6844        0        0        0  73.7%
-> 64      64        1.547757    500032     323069   6234     177728    5155       0        0     5123     7651        0        0        0  68.7%
-> 64      256       0.914612    200000     218672   6233     177704    5154       0        0     5122     9202        0        0        0  55.5%
-> 64      1024      0.475472     58880     123835   6229     177492    5154       5        0     5122     9930        0        0        0  45.4%
-> 64      4096      0.271239     15424      56865   6158     169480    5257     114        0     5115    10142        0        0        0  44.1%

running: small-partition-slicing
Testing slicing small partitions:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.003209         1        312      3        260       2       7        0        1        1        0        0        0  15.5%
0       32        0.004205        32       7610     16       1428      10       0        0        5        5        0        0        0  15.7%
0       256       0.009830       256      26042     97       8572      62       0        0       31       31        0        0        0  18.7%
0       4096      0.015471      4096     264748    100       8704      64       0        0       32       32        0        0        0  48.4%
500000  1         0.003654         1        274     34        492      33       0        0       32       64        0        0        0  28.7%
500000  32        0.004287        32       7464     40       1260      36       0        0       32       64        0        0        0  26.0%
500000  256       0.009598       256      26673    100       8748      64       4        0       32       64        0        0        0  20.6%
500000  4096      0.014151      4096     289449    119       7892      85       0        0       53       64        0        0        0  54.1%

========================  With the patch ================================
running: large-partition-skips
Testing scanning large partition with skips.
Reads whole range interleaving reads with skips according to read-skip pattern:
read    skip      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
1       0         0.468887   1000000    2132711   1018     126960      29       0        0        0        0        0        0        0  98.4%
1       1         1.735113    500000     288166   1001     127080      10       0        0        5        5        0        0        0  99.9%
1       8         0.535616    111112     207447    997     127064       6       0        0        3        3        0        0        0  99.6%
1       16        0.365487     58824     160947   1001     127080      15       0        0        5        5        0        0        0  99.5%
1       32        0.272208     30304     111326    997     127064      21       0        0        3        3        0        0        0  99.3%
1       64        0.224049     15385      68668    997     127064     208       0        0        3        3        0        0        0  99.1%
1       256       0.159247      3892      24440    997     127064     250       0        0        3        3        0        0        0  94.7%
1       1024      0.102107       976       9559    997     127064     292       0        0        3        3        0        0        0  53.6%
1       4096      0.084310       245       2906    664      27172     371     273        0        3        3        0        0        0  20.2%
64      1         0.508340    984616    1936923   1142     127064     129       0        0        3        3        0        0        0  98.1%
64      8         0.470369    888896    1889786    997     127064       6       0        0        3        3        0        0        0  99.6%
64      16        0.439917    800000    1818526   1001     127080      10       0        0        5        5        0        0        0  99.6%
64      32        0.397938    666688    1675358    997     127064       6       0        0        3        3        0        0        0  99.5%
64      64        0.344144    500032    1452972    997     127064      18       0        0        3        3        0        0        0  99.4%
64      256       0.219996    200000     909107    997     127064     251       0        0        3        3        0        0        0  99.1%
64      1024      0.124294     58880     473715    997     127064     284       1        0        3        3        0        0        0  62.2%
64      4096      0.097580     15424     158065    703      27856     400     267        0        3        3        0        0        0  25.3%

running: large-partition-slicing
Testing slicing of large partition:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.000733         1       1365      9        296       6       4        0        3        3        0        0        0  19.3%
0       32        0.000705        32      45417      9        296       6       3        0        3        3        0        0        0  15.3%
0       256       0.000830       256     308364     10        328       6       3        0        3        3        0        0        0  26.7%
0       4096      0.004631      4096     884529     25        808      14       3        0        3        3        0        0        0  48.1%
500000  1         0.001184         1        845     13        412       9       4        0        3        3        0        0        0  23.7%
500000  32        0.001199        32      26690     13        412       9       4        0        3        3        0        0        0  21.9%
500000  256       0.001530       256     167296     14        444      10       4        0        3        3        0        0        0  26.8%
500000  4096      0.004379      4096     935474     30        956      19       4        0        3        3        0        0        0  51.5%

running: large-partition-slicing-clustering-keys
Testing slicing of large partition using clustering keys:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.000620         1       1614      7        176       6       0        0        3        3        0        0        0  27.4%
0       32        0.000625        32      51218      7        176       6       0        0        3        3        0        0        0  27.0%
0       256       0.000701       256     365148      8        180       6       0        0        3        3        0        0        0  35.2%
0       4096      0.004063      4096    1008130     20        692      12       1        0        3        3        0        0        0  47.6%
500000  1         0.001208         1        827     12        516       9       3        0        3        3        0        0        0  24.3%
500000  32        0.000973        32      32876     12        388       9       3        0        3        3        0        0        0  28.7%
500000  256       0.001315       256     194612     13        384      10       3        0        3        3        0        0        0  29.0%
500000  4096      0.003950      4096    1037068     25        840      17       2        0        3        3        0        0        0  52.7%

running: large-partition-slicing-single-key-reader
Testing slicing of large partition, single-partition reader:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.000844         1       1185      9        488       6       0        0        3        3        0        0        0  16.5%
0       32        0.000656        32      48753      9        296       6       0        0        3        3        0        0        0  23.1%
0       256       0.000751       256     341011     10        328       6       0        0        3        3        0        0        0  34.0%
0       4096      0.004173      4096     981632     22        840      12       1        0        3        3        0        0        0  47.0%
500000  1         0.001036         1        966     13        476       9       3        0        3        3        0        0        0  25.4%
500000  32        0.001014        32      31573     13        412       9       3        0        3        3        0        0        0  27.4%
500000  256       0.001280       256     200044     14        444      10       3        0        3        3        0        0        0  31.8%
500000  4096      0.004081      4096    1003746     30        988      18       3        0        3        3        0        0        0  51.6%

running: large-partition-select-few-rows
Testing selecting few rows from a large partition:
stride  rows      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
1000000 1         0.000668         1       1498      9        296       6       3        0        3        3        0        0        0  21.7%
500000  2         0.000958         2       2088     13        412       9       4        0        3        3        0        0        0  27.7%
250000  4         0.001495         4       2676     18        572      12       6        0        3        3        0        0        0  25.8%
125000  8         0.002069         8       3866     29        912      19      10        0        3        3        0        0        0  30.8%
62500   16        0.002856        16       5603     50       1584      32      18        0        3        3        0        0        0  41.7%
2       500000    1.063129    500000     470310   1138     127080     120       0        0        5        5        0        0        0  99.7%

running: large-partition-forwarding
Testing forwarding with clustering restriction in a large partition:
pk-scan   time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
yes       0.002567         2        779     24       2684       8      16        0        3        3        0        0        0  21.5%
no        0.001013         2       1975     13        412       8       2        0        3        3        0        0        0  28.9%

running: small-partition-skips
Testing scanning small partitions with skips.
Reads whole range interleaving reads with skips according to read-skip pattern:
   read    skip      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
-> 1       0         1.349959   1000000     740763   1369     139732      33       1        0        0        0        0        0        0  99.7%
-> 1       1        12.640751    500000      39555   8144     191168    7064       0        0     7032    11481        0        0        0  96.2%
-> 1       8         3.404269    111112      32639   6651     180660    5571       0        0     5539    10505        0        0        0  84.5%
-> 1       16        2.175424     58824      27040   6434     179116    5354       0        0     5322    10365        0        0        0  74.3%
-> 1       32        1.493365     30304      20292   6335     178404    5257       0        0     5225    10294        0        0        0  61.1%
-> 1       64        1.112168     15385      13833   6256     177672    5183       0        0     5151    10217        0        0        0  48.7%
-> 1       256       0.719282      3892       5411   6211     175464    5178      24        0     5122    10220        0        0        0  33.3%
-> 1       1024      0.393236       976       2482   5946     142152    5369     217        0     5120    10235        0        0        0  30.7%
-> 1       4096      0.185284       245       1322   5499      81992    5296     142        0     5122    10240        0        0        0  44.7%
-> 64      1         2.356711    984616     417792   7361     177944    5184      21        0     5152     5266        0        0        0  79.1%
-> 64      8         2.192331    888896     405457   6253     177868    5173       0        0     5141     5690        0        0        0  77.2%
-> 64      16        2.029835    800000     394121   6245     177812    5165       0        0     5133     6132        0        0        0  75.7%
-> 64      32        1.806448    666688     369060   6245     177808    5165       0        0     5133     6864        0        0        0  72.6%
-> 64      64        1.508492    500032     331478   6242     177788    5163       0        0     5131     7667        0        0        0  67.7%
-> 64      256       0.892881    200000     223994   6233     177704    5154       0        0     5122     9202        0        0        0  54.2%
-> 64      1024      0.465715     58880     126429   6229     177492    5154       0        0     5122     9930        0        0        0  44.0%
-> 64      4096      0.266582     15424      57858   6158     169480    5257     114        0     5115    10142        0        0        0  42.3%

running: small-partition-slicing
Testing slicing small partitions:
offset  read      time (s)     frags     frag/s    aio      (KiB) blocked dropped  idx hit idx miss  idx blk    c hit   c miss    c blk    cpu
0       1         0.003113         1        321      3        260       2       0        0        1        1        0        0        0  13.4%
0       32        0.004166        32       7682     16       1428      10       0        0        5        5        0        0        0  14.9%
0       256       0.009813       256      26088     97       8572      62       0        0       31       31        0        0        0  18.4%
0       4096      0.014798      4096     276794    100       8704      64       0        0       32       32        0        0        0  46.3%
500000  1         0.003700         1        270     34        492      33       0        0       32       64        0        0        0  28.4%
500000  32        0.004030        32       7940     40       1260      36       0        0       32       64        0        0        0  27.8%
500000  256       0.009514       256      26908    100       8748      64       0        0       32       64        0        0        0  20.2%
500000  4096      0.013368      4096     306413    119       7892      85       0        0       53       64        0        0        0  53.6%

Signed-off-by: Vladimir Krivopalov <vladimir@scylladb.com>
Message-Id: <a72818f79ca4081a606424545b0053fa581d49e7.1522173144.git.vladimir@scylladb.com>
2018-03-29 15:23:31 +03:00

91 lines
3.2 KiB
C++

/*
* Copyright (C) 2017 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "dht/i_partitioner.hh"
#include "schema.hh"
#include "sstables/index_reader.hh"
class index_reader_assertions {
std::unique_ptr<sstables::index_reader> _r;
public:
index_reader_assertions(std::unique_ptr<sstables::index_reader> r)
: _r(std::move(r))
{ }
index_reader_assertions& has_monotonic_positions(const schema& s) {
auto pos_cmp = position_in_partition::composite_less_compare(s);
auto rp_cmp = dht::ring_position_comparator(s);
auto prev = dht::ring_position::min();
_r->read_lower_partition_data().get();
while (!_r->eof()) {
auto& e = _r->current_lower_partition_entry();
auto k = e.get_decorated_key();
auto token = dht::token(k.token());
auto rp = dht::ring_position(token, k.key().to_partition_key(s));
if (!rp_cmp(prev, rp)) {
BOOST_FAIL(sprint("Partitions have invalid order: %s >= %s", prev, rp));
}
prev = rp;
while (e.get_read_pi_blocks_count() < e.get_total_pi_blocks_count()) {
e.get_next_pi_blocks().get();
auto* infos = e.get_pi_blocks();
if (infos->empty()) {
continue;
}
auto& prev = (*infos)[0];
for (size_t i = 1; i < infos->size(); ++i) {
auto& cur = (*infos)[i];
if (pos_cmp(cur.start(s), prev.end(s))) {
std::cout << "promoted index:\n";
for (auto& e : *infos) {
std::cout << " " << e.start(s) << "-" << e.end(s)
<< ": +" << e.offset() << " len=" << e.width() << std::endl;
}
BOOST_FAIL(sprint("Index blocks are not monotonic: %s >= %s", prev.end(s), cur.start(s)));
}
cur = prev;
}
}
_r->advance_lower_to_next_partition().get();
}
return *this;
}
index_reader_assertions& is_empty(const schema& s) {
_r->read_lower_partition_data().get();
while (!_r->eof()) {
BOOST_REQUIRE(_r->current_lower_partition_entry().get_total_pi_blocks_count() == 0);
_r->advance_lower_to_next_partition().get();
}
return *this;
}
};
inline
index_reader_assertions assert_that(std::unique_ptr<sstables::index_reader> r) {
return { std::move(r) };
}