Compare commits

..

171 Commits

Author SHA1 Message Date
Hagit Segev
b0f656302c release: prepare for 4.1.11 2021-01-05 10:13:34 +02:00
Benny Halevy
e05e7b2a98 compaction: compaction_writer: destroy shared_sstable after the sstable_writer
sstable_writer may depend on the sstable throughout its whole lifecycle.
If the sstable is freed before the sstable_writer we might hit use-after-free
as in the follwing case:
```
std::_Deque_iterator<sstables::compression::segmented_offsets::bucket, sstables::compression::segmented_offsets::bucket&, sstables::compression::segmented_offsets::bucket*>::operator+=(long) at /usr/include/c++/10/bits/stl_deque.h:240
 (inlined by) std::operator+(std::_Deque_iterator<sstables::compression::segmented_offsets::bucket, sstables::compression::segmented_offsets::bucket&, sstables::compression::segmented_offsets::bucket*> const&, long) at /usr/include/c++/10/bits/stl_deque.h:378
 (inlined by) std::_Deque_iterator<sstables::compression::segmented_offsets::bucket, sstables::compression::segmented_offsets::bucket&, sstables::compression::segmented_offsets::bucket*>::operator[](long) const at /usr/include/c++/10/bits/stl_deque.h:252
 (inlined by) std::deque<sstables::compression::segmented_offsets::bucket, std::allocator<sstables::compression::segmented_offsets::bucket> >::operator[](unsigned long) at /usr/include/c++/10/bits/stl_deque.h:1327
 (inlined by) sstables::compression::segmented_offsets::push_back(unsigned long, sstables::compression::segmented_offsets::state&) at ./sstables/compress.cc:214
sstables::compression::segmented_offsets::writer::push_back(unsigned long) at ./sstables/compress.hh:123
 (inlined by) compressed_file_data_sink_impl<crc32_utils, (compressed_checksum_mode)1>::put(seastar::temporary_buffer<char>) at ./sstables/compress.cc:519
seastar::output_stream<char>::put(seastar::temporary_buffer<char>) at table.cc:?
 (inlined by) seastar::output_stream<char>::put(seastar::temporary_buffer<char>) at ././seastar/include/seastar/core/iostream-impl.hh:432
seastar::output_stream<char>::flush() at table.cc:?
seastar::output_stream<char>::close() at table.cc:?
sstables::file_writer::close() at sstables.cc:?
sstables::mc::writer::~writer() at writer.cc:?
 (inlined by) sstables::mc::writer::~writer() at ./sstables/mx/writer.cc:790
sstables::mc::writer::~writer() at writer.cc:?
flat_mutation_reader::impl::consumer_adapter<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >::~consumer_adapter() at compaction.cc:?
 (inlined by) std::_Optional_payload_base<sstables::compaction_writer>::_M_destroy() at /usr/include/c++/10/optional:260
 (inlined by) std::_Optional_payload_base<sstables::compaction_writer>::_M_reset() at /usr/include/c++/10/optional:280
 (inlined by) std::_Optional_payload<sstables::compaction_writer, false, false, false>::~_Optional_payload() at /usr/include/c++/10/optional:401
 (inlined by) std::_Optional_base<sstables::compaction_writer, false, false>::~_Optional_base() at /usr/include/c++/10/optional:474
 (inlined by) std::optional<sstables::compaction_writer>::~optional() at /usr/include/c++/10/optional:659
 (inlined by) sstables::compacting_sstable_writer::~compacting_sstable_writer() at ./sstables/compaction.cc:229
 (inlined by) compact_mutation<(emit_only_live_rows)0, (compact_for_sstables)1, sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>::~compact_mutation() at ././mutation_compactor.hh:468
 (inlined by) compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>::~compact_for_compaction() at ././mutation_compactor.hh:538
 (inlined by) std::default_delete<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >::operator()(compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>*) const at /usr/include/c++/10/bits/unique_ptr.h:85
 (inlined by) std::unique_ptr<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>, std::default_delete<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >::~unique_ptr() at /usr/include/c++/10/bits/unique_ptr.h:361
 (inlined by) stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >::~stable_flattened_mutations_consumer() at ././mutation_reader.hh:342
 (inlined by) flat_mutation_reader::impl::consumer_adapter<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >::~consumer_adapter() at ././flat_mutation_reader.hh:201
auto flat_mutation_reader::impl::consume_in_thread<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter>(stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter, std::chrono::time_point<seastar::lowres_clock, std::chrono::duration<long, std::ratio<1l, 1000l> > >) at ././flat_mutation_reader.hh:272
 (inlined by) auto flat_mutation_reader::consume_in_thread<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter>(stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter, std::chrono::time_point<seastar::lowres_clock, std::chrono::duration<long, std::ratio<1l, 1000l> > >) at ././flat_mutation_reader.hh:383
 (inlined by) auto flat_mutation_reader::consume_in_thread<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >(stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, std::chrono::time_point<seastar::lowres_clock, std::chrono::duration<long, std::ratio<1l, 1000l> > >) at ././flat_mutation_reader.hh:389
 (inlined by) seastar::future<void> sstables::compaction::setup<noop_compacted_fragments_consumer>(noop_compacted_fragments_consumer)::{lambda(flat_mutation_reader)#1}::operator()(flat_mutation_reader)::{lambda()#1}::operator()() at ./sstables/compaction.cc:612
```

What happens here is that:

    compressed_file_data_sink_impl(output_stream<char> out, sstables::compression* cm, sstables::local_compression lc)
            : _out(std::move(out))
            , _compression_metadata(cm)
            , _offsets(_compression_metadata->offsets.get_writer())
            , _compression(lc)
            , _full_checksum(ChecksumType::init_checksum())

_compression_metadata points to a buffer held by the sstable object.
and _compression_metadata->offsets.get_writer returns a writer that keeps
a reference to the segmented_offsets in the sstables::compression
that is used in the ~writer -> close path.

Fixes #7821

Signed-off-by: Benny Halevy <bhalevy@scylladb.com>
Message-Id: <20201227145726.33319-1-bhalevy@scylladb.com>
(cherry picked from commit 8a745a0ee0)
2021-01-04 15:12:33 +02:00
Avi Kivity
ae0f3ef543 Revert "Merge 'Move temporaries to value view' from Piotr S"
This reverts commit b34a1d9576. It causes
regressions in processing of bind variables.

Fixes #7761.
2020-12-24 12:42:42 +02:00
Gleb Natapov
2a6a072857 mutation_writer: pass exceptions through feed_writer
feed_writer() eats exception and transforms it into an end of stream
instead. Downstream validators hate when this happens.

Fixes #7482
Message-Id: <20201216090038.GB3244976@scylladb.com>

(cherry picked from commit 61520a33d6)
2020-12-16 17:20:32 +02:00
Aleksandr Bykov
da1a5b6542 dist: scylla_util: fix aws_instance.ebs_disks method
aws_instance.ebs_disks() method should return ebs disk
instead of ephemeral

Signed-off-by: Aleksandr Bykov <alex.bykov@scylladb.com>

Closes #7780

(cherry picked from commit e74dc311e7)
2020-12-16 11:59:12 +02:00
Avi Kivity
b85aa0e8a6 Update seastar submodule
* seastar 9d8d82a095...6fb1399ba1 (1):
  > sharded: Do not hang on never set freed promise

Fixes #6606.
2020-12-15 16:52:38 +02:00
Calle Wilund
8ccdd5c50f token_metadata: Prune empty racks on endpoint change
Fixes #6459

When moving or removing endpoints, we should ensure
that the set of available racks reflect the nodes
known, i.e. match what would be the result of a
reboot + create sets initially.
Message-Id: <20200519153300.15391-1-calle@scylladb.com>

(cherry picked from commit 7ce4a8b458)
2020-12-15 16:31:46 +02:00
Takuya ASADA
f7ffea4638 node_exporter_install: stop service before force installing
Stop node-exporter.service before re-install it, to avoid 'Text file busy' error.

Fixes #6782

(cherry picked from commit ef05ea8e91)
2020-12-15 16:28:36 +02:00
Avi Kivity
fb40e375bf dist: rpm: uninstall tuned when installing scylla-kernel-conf
tuned 2.11.0-9 and later writes to kerned.sched_wakeup_granularity_ns
and other sysctl tunables that we so laboriously tuned, dropping
performance by a factor of 5 (due to increased latency). Fix by
obsoleting tuned during install (in effect, we are a better tuned,
at least for us).

Not needed for .deb, since debian/ubunto do not install tuned by
default.

Fixes #7696

Closes #7776

(cherry picked from commit 615b8e8184)
2020-12-12 14:32:59 +02:00
Eliran Sinvani
9ea2a61d63 consistency level: fix wrong quorum calculation whe RF = 0
We used to calculate the number of endpoints for quorum and local_quorum
unconditionally as ((rf / 2) + 1). This formula doesn't take into
account the corner case where RF = 0, in this situation quorum should
also be 0.
This commit adds the missing corner case.

Tests: Unit Tests (dev)
Fixes #6905

Closes #7296

(cherry picked from commit 925cdc9ae1)
2020-11-29 16:45:26 +02:00
Avi Kivity
6898fcd40f Update seastar submodule for precalculated TLS DH parameters
* seastar d4df4fa6de...9d8d82a095 (1):
  > TLS: Use "known" (precalculated) DH parameters if available

Fixes #6191.
2020-11-29 14:36:40 +02:00
Asias He
4df08e331b repair: Make repair_writer a shared pointer
The future of the fiber that writes data into sstables inside
the repair_writer is stored in _writer_done like below:

class repair_writer {
   _writer_done[node_idx] =
      mutation_writer::distribute_reader_and_consume_on_shards().then([this] {
         ...
      }).handle_exception([this] {
         ...
      });
}

The fiber access repair_writer object in the error handling path. We
wait for the _writer_done to finish before we destroy repair_meta
object which contains the repair_writer object to avoid the fiber
accessing already freed repair_writer object.

To be safer, we can make repair_writer a shared pointer and take a
reference in the distribute_reader_and_consume_on_shards code path.

Fixes #7406

Closes #7430

(cherry picked from commit 289a08072a)
2020-11-29 13:30:06 +02:00
Pavel Emelyanov
7b1fb86a28 query_pager: Fix continuation handling for noop visitor
Before updating the _last_[cp]key (for subsequent .fetch_page())
the pager checks is 'if the pager is not exhausted OR the result
has data'.

The check seems broken: if the pager is not exhausted, but the
result is empty the call for keys will unconditionally try to
reference the last element from empty vector. The not exhausted
condition for empty result can happen if the short_read is set,
which, in turn, unconditionally happens upon meeting partition
end when visiting the partition with result builder.

The correct check should be 'if the pager is not exhausted AND
the result has data': the _last_[pc]key-s should be taken for
continuation (not exhausted), but can be taken if the result is
not empty (has data).

fixes: #7263
tests: unit(dev), but tests don't trigger this corner case

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
Message-Id: <20200921124329.21209-1-xemul@scylladb.com>
(cherry picked from commit 550fc734d9)
2020-11-29 12:01:43 +02:00
Takuya ASADA
f7be22ccb2 install.sh: set PATH for relocatable CLI tools in python thunk
We currently set PATH for relocatable CLI tools in scylla_util.run() and
scylla_util.out(), but it doesn't work for perftune.py, since it's not part of
Scylla, does not use scylla_util module.
We can set PATH in python thunk instead, it can set PATH for all python scripts.

Fixes #7350

(cherry picked from commit 5867af4edd)
2020-11-29 11:54:53 +02:00
Bentsi Magidovich
26b5a34f96 scylla_util.py: fix exception handling in curl
Retry mechanism didn't work when URLError happend. For example:

  urllib.error.URLError: <urlopen error [Errno 101] Network is unreachable>

Let's catch URLError instead of HTTP since URLError is a base exception
for all exceptions in the urllib module.

Fixes: #7569

Closes #7567

(cherry picked from commit 956b97b2a8)
2020-11-29 11:48:42 +02:00
Takuya ASADA
10a65ba2fb dist/redhat: packaging dependencies.conf as normal file, not ghost
When we introduced dependencies.conf, we mistakenly added it on rpm as %ghost,
but it should be normal file, should be installed normally on package installation.

Fixes #7703

Closes #7704

(cherry picked from commit ba4d54efa3)
2020-11-29 11:40:27 +02:00
Takuya ASADA
be60e3ca52 install.sh: apply sysctl.d files on non-packaging installation
We don't apply sysctl.d files on non-packaging installation, apply them
just like rpm/deb taking care of that.

Fixes #7702

Closes #7705

(cherry picked from commit 5f81f97773)
2020-11-29 11:35:51 +02:00
Avi Kivity
5485c902fe dist: sysctl: configure more inotify instances
Since f3bcd4d205 ("Merge 'Support SSL Certificate Hot
Reloading' from Calle"), we reload certificates as they are
modified on disk. This uses inotify, which is limited by a
sysctl fs.inotify.max_user_instances, with a default of 128.

This is enough for 64 shards only, if both rpc and cql are
encrypted; above that startup fails.

Increase to 1200, which is enough for 6 instances * 200 shards.

Fixes #7700.

Closes #7701

(cherry picked from commit 390e07d591)
2020-11-29 11:04:57 +02:00
Hagit Segev
01c822301f release: prepare for 4.1.10 2020-11-19 18:07:49 +02:00
Raphael S. Carvalho
415b271a39 compaction: Make sure a partition is filtered out only by producer
If interposer consumer is enabled, partition filtering will be done by the
consumer instead, but that's not possible because only the producer is able
to skip to the next partition if the current one is filtered out, so scylla
crashes when that happens with a bad function call in queue_reader.
This is a regression which started here: 55a8b6e3c9

To fix this problem, let's make sure that partition filtering will only
happen on the producer side.

Fixes #7590.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20201111221513.312283-1-raphaelsc@scylladb.com>
(cherry picked from commit 13fa2bec4c)
2020-11-19 14:08:47 +02:00
Piotr Dulikowski
b7274ab44a hints: don't read hint files when it's not allowed to send
When there are hint files to be sent and the target endpoint is DOWN,
end_point_hints_manager works in the following loop:

- It reads the first hint file in the queue,
- For each hint in the file it decides that it won't be sent because the
  target endpoint is DOWN,
- After realizing that there are some unsent hints, it decides to retry
  this operation after sleeping 1 second.

This causes the first segment to be wholly read over and over again,
with 1 second pauses, until the target endpoint becomes UP or leaves the
cluster. This causes unnecessary I/O load in the streaming scheduling
group.

This patch adds a check which prevents end_point_hints_manager from
reading the first hint file at all when it is not allowed to send hints.

First observed in #6964

Tests:
- unit(dev)
- hinted handoff dtests

Closes #7407

(cherry picked from commit 77a0f1a153)
2020-11-16 14:30:26 +02:00
Botond Dénes
b144b93cd8 mutation_reader: queue_reader: don't set EOS flag on abort
If the consumer happens to check the EOS flag before it hits the
exception injected by the abort (by calling fill_buffer()), they can
think the stream ended normally and expect it to be valid. However this
is not guaranteed when the reader is aborted. To avoid consumers falsely
thinking the stream ended normally, don't set the EOS flag on abort at
all.

Additionally make sure the producer is aborted too on abort. In theory
this is not needed as they are the one initiating the abort, but better
to be safe then sorry.

Fixes: #7411
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20201102100732.35132-1-bdenes@scylladb.com>
(cherry picked from commit f5323b29d9)
2020-11-15 11:08:07 +02:00
Botond Dénes
7325996510 types: validate(): linearize values lazily
Instead of eagerly linearizing all values as they are passed to
validate(), defer linearization to those validators that actually need
linearized values. Linearizing large values puts pressure on the memory
allocator with large contiguous allocation requests. This is something
we are trying to actively avoid, especially if it is not really neaded.
Turns out the types, whose validators really want linearized values are
a minority, as most validators just look at the size of the value, and
some like bytes don't need validation at all, while usually having large
values.

This is achieved by templating the validator struct on the view and
using the FragmentedRange concept to treat all passed in views
(`bytes_view` and `fragmented_temporary_buffer_view`) uniformly.
This patch makes no attempt at converting existing validators to work
with fragmented buffers, only trivial cases are converted. The major
offenders still left are ascii/utf8 and collections.

Fixes: #7318

Tests: unit(dev)
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20201007054524.909420-1-bdenes@scylladb.com>
(cherry picked from commit db56ae695c)

[avi: squashed ed6775c585 ("types: adjust
      validation_visitor construction for clang") as gcc 9 in scylla 4.1
      suffers from the same problem as clang 11]
2020-11-11 12:31:36 +02:00
Piotr Sarna
fb14fae79b Merge 'Backport PR #7469 to 4.2' from Eliran Sinvani
This is a backport of PR #7469 that did not apply cleanly to 4.2 with a trivial conflict, another commit that touched one of the files but in a completely different region.

Closes #7480

* github.com:scylladb/scylla:
  materialized views: add a base table reference if missing
  view info: support partial match between base and view for only reading from view.
  view info: guard against null dereference of the base info

(cherry picked from commit c74ba1bc36)
2020-11-09 15:22:11 +02:00
Avi Kivity
bb49a5ac06 Merge 'storage_proxy: add a separate smp_group for hints' from Eliran
Hints writes are handled by storage_proxy in the exact same way
regular writes are, which in turn means that the same smp service
group is used for both. The problem is that it can lead to a priority
inversion where writes of the lower priority  kind occupies a lot of
the semaphores units making the higher priority writes wait for an
empty slot.
This series adds a separate smp group for hints as well as a field
to pass the correct smp group to mutate_locally functions, and
then uses this field to properly classify the writes.

Fixes #7177

* eliransin-hint_priority_inversion:
  Storage proxy: use hints smp group in mutate locally
  Storage proxy: add a dedicated smp group for hints

(cherry picked from commit c075539fea)

[avi: replace std::bind_front() which is not available with this
      compiler with a lambda that does the same]
2020-11-08 20:46:45 +02:00
Pavel Solodovnikov
947d3a13a3 storage_proxy: un-hardcode force sync flag for mutate_locally(mutation) overload
Corresponding overload of `storage_proxy::mutate_locally`
was hardcoded to pass `db::commitlog::force_sync::no` to the
`database::apply`. Unhardcode it and substitute `force_sync::no`
to all existing call sites (as it were before).

`force_sync::yes` will be used later for paxos learn writes
when trying to apply mutations upgraded from an obsolete
schema version (similar to the current case when applying
locally a `frozen_mutation` stored in accepted proposal).

Tests: unit(dev)

Signed-off-by: Pavel Solodovnikov <pa.solodovnikov@scylladb.com>
Message-Id: <20200716124915.464789-1-pa.solodovnikov@scylladb.com>
(cherry picked from commit 5ff5df1afd)

Prerequisite for #7177.
2020-11-08 19:47:11 +02:00
Amnon Heiman
b096d64aa7 scyllatop/livedata.py: Safe iteration over metrics
This patch change the code that iterates over the metrics to use a copy
of the metrics names to make it safe to remove the metrics from the
metrics object.

Fixes #7488

Signed-off-by: Amnon Heiman <amnon@scylladb.com>
(cherry picked from commit 52db99f25f)
2020-11-08 19:16:25 +02:00
Calle Wilund
ce8a0f3886 partition_version: Change range_tombstones() to return chunked_vector
Refs #7364

The number of tombstones can be large. As a stopgap measure to
just returning a source range (with keepalive), we can at least
alleviate the problem by using a chunked vector.

Closes #7433

(cherry picked from commit 4b65d67a1a)
2020-11-08 14:38:45 +02:00
Tomasz Grabiec
41344d8ee6 sstables: ka/la: Fix abort when next_partition() is called with certain reader state
Cleanup compaction is using consume_pausable_in_thread() to skip over
disowned partitions, which uses flat_mutation_reader::next_partition().

The implementation of next_partition() for the sstable reader has a
bug which may cause the following assertion failure:

  scylla: sstables/mp_row_consumer.hh:422: row_consumer::proceed sstables::mp_row_consumer_k_l::flush(): Assertion `!_ready' failed.

This happens when the sstable reader's buffer gets full when we reach
the partition end. The last fragment of the partition won't be pushed
into the buffer but will stay in the _ready variable. When
next_partition() is called in this state, _ready will not be cleared
and the fragment will be carried over to the next partition. This will
cause assertion failure when the reader attempts to emit the first
fragment of the next partition.

The fix is to clear _ready when entering a partition, just like we
clear _range_tombstones there.

Fixes #7553.
Message-Id: <1604534702-12777-1-git-send-email-tgrabiec@scylladb.com>

(cherry picked from commit fb9b5cae05)
2020-11-08 14:32:58 +02:00
Avi Kivity
db6303dba0 Merge "Fix TWCS compaction aggressiveness due to data segregation" from Raphael
"
After data segregation feature, anything that cause out-of-order writes,
like read repair, can result in small updates to past time windows.
This causes compaction to be very aggressive because whenever a past time
window is updated like that, that time window is recompacted into a
single SSTable.
Users expect that once a window is closed, it will no longer be written
to, but that has changed since the introduction of the data segregation
future. We didn't anticipate the write amplification issues that the
feature would cause. To fix this problem, let's perform size-tiered
compaction on the windows that are no longer active and were updated
because data was segregated. The current behavior where the last active
window is merged into one file is kept. But thereafter, that same
window will only be compacted using STCS.

Fixes #6928.
"

* 'fix_twcs_agressiveness_after_data_segregation_v2' of github.com:raphaelsc/scylla:
  compaction/twcs: improve further debug messages
  compaction/twcs: Improve debug log which shows all windows
  test: Check that TWCS properly performs size-tiered compaction on past windows
  compaction/twcs: Make task estimation take into account the size-tiered behavior
  compaction/stcs: Export static function that estimates pending tasks
  compaction/stcs: Make get_buckets() static
  compact/twcs: Perform size-tiered compaction on past time windows
  compaction/twcs: Make strategy easier to extend by removing duplicated knowledge
  compaction/twcs: Make newest_bucket() non-static
  compaction/twcs: Move TWCS implementation into source file

(cherry picked from commit 6f986df458)
2020-11-05 20:32:42 +02:00
Glauber Costa
964cbb95a7 twcs: move implementations to its own file
LCS and SCTS already have their own files, reducing the clutter in
compaction_strategy.cc. Do the same for TWCS. I am doing this in
preparation to add more functions.

Signed-off-by: Glauber Costa <glauber@scylladb.com>
Message-Id: <20200611230906.409023-6-glauber@scylladb.com>
(cherry picked from commit b0a0c207c3)

Prerequisite for #6928.
2020-11-05 20:20:30 +02:00
Avi Kivity
b34a1d9576 Merge 'Move temporaries to value view' from Piotr S
"
Issue https://github.com/scylladb/scylla/issues/7019 describes a problem of an ever-growing map of temporary values stored in query_options. In order to mitigate this kind of problems, the storage for temporary values is moved from an external data structure to the value views itself. This way, the temporary lives only as long as it's accessible and is automatically destroyed once a request finishes. The downside is that each temporary is now allocated separately, while previously they were bundled in a single byte stream.

Tests: unit(dev)
Fixes https://github.com/scylladb/scylla/issues/7019
"

7055297649 ("cql3: remove query_options::linearize and _temporaries")
is reverted from this backport since linearize() is still used in
this branch.

* psarna-move_temporaries_to_value_view:
  cql3: remove query_options::linearize and _temporaries
  cql3: remove make_temporary helper function
  cql3: store temporaries in-place instead of in query_options
  cql3: add temporary_value to value view
  cql3: allow moving data out of raw_value
  cql3: split values.hh into a .cc file

(cherry picked from commit 2b308a973f)
2020-11-05 19:48:01 +02:00
Piotr Sarna
15ef930268 schema_tables: fix fixing old secondary index schemas
Old secondary index schemas did not have their idx_token column
marked as computed, and there already exists code which updates
them. Unfortunately, the fix itself contains an error and doesn't
fire if computed columns are not yet supported by the whole cluster,
which is a very common situation during upgrades.

Fixes #7515

Closes #7516

(cherry picked from commit b66c285f94)
2020-11-05 17:53:28 +02:00
Avi Kivity
fe57128fe0 Merge 'Fix ignoring cells after null in appending hash' from Piotr Sarna
"
This series fixes a bug in `appending_hash<row>` that caused it to ignore any cells after the first NULL. It also adds a cluster feature which starts using the new hashing only after the whole cluster is aware of it. The series comes with tests, which reproduce the issue.

Fixes #4567
Based on #4574
"

* psarna-fix_ignoring_cells_after_null_in_appending_hash:
  test: extend mutation_test for NULL values
  tests/mutation: add reproducer for #4567
  gms: add a cluster feature for fixed hashing
  digest: add null values to row digest
  mutation_partition: fix formatting
  appending_hash<row>: make publicly visible

(cherry picked from commit 0e03c979d2)
2020-11-04 20:45:06 +02:00
Yaron Kaikov
b80dab6d58 release: prepare for 4.1.9 2020-10-26 18:13:22 +02:00
Botond Dénes
04d52631b2 reader_permit: reader_resources: make true RAII class
Currently in all cases we first deduct the to-be-consumed resources,
then construct the `reader_resources` class to protect it (release it on
destruction). This is error prone as it relies on no exception being
thrown while constructing the `reader_resources`. Albeit the
`reader_resources` constructor is `noexcept` right now this might change
in the future and as the call sites relying on this are disconnected
from the declaration, the one modifying them might not notice.
To make this safe going forward, make the `reader_resources` a true RAII
class, consuming the units in its constructor and releasing them in its
destructor.

Refs: #7256

Tests: unit(dev)
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20200922150625.1253798-1-bdenes@scylladb.com>
(cherry picked from commit a0107ba1c6)
Message-Id: <20200924081408.236353-1-bdenes@scylladb.com>
2020-10-19 15:04:53 +03:00
Takuya ASADA
dfc9f789cf install.sh: set LC_ALL=en_US.UTF-8 on python3 thunk
scylla-python3 causes segfault when non-default locale specified.
As workaround for this, we need to set LC_ALL=en_US.UTF_8 on python3 thunk.

Fixes #7408

Closes #7414

(cherry picked from commit ff129ee030)
2020-10-18 15:02:46 +03:00
Avi Kivity
c1236c02df Update seastar submodule
* seastar 88b6f0172c...d4df4fa6de (1):
  > append_challenged_posix_file_impl: allow destructing file with no queued work

Fixes #7285.
2020-10-12 15:13:17 +03:00
Gleb Natapov
0eb2f5c378 lwt: do not return unavailable exception from the 'learn' stage
Unavailable exception means that operation was not started and it can be
retried safely. If lwt fails in the learn stage though it most
certainly means that its effect will be observable already. The patch
returns timeout exception instead which means uncertainty.

Fixes #7258

Message-Id: <20201001130724.GA2283830@scylladb.com>
(cherry picked from commit 3e8dbb3c09)
2020-10-07 11:00:08 +02:00
Avi Kivity
0cc6d41ee6 Merge "materialized views: Fix undefined behavior on base table schema changes" from Tomasz
"
The view_info object, which is attached to the schema object of the
view, contains a data structure called
"base_non_pk_columns_in_view_pk". This data structure contains column
ids of the base table so is valid only for a particular version of the
base table schema. This data structure is used by materialized view
code to interpret mutations of the base table, those coming from base
table writes, or reads of the base table done as part of view updates
or view building.

The base table schema version of that data structure must match the
schema version of the mutation fragments, otherwise we hit undefined
behavior. This may include aborts, exceptions, segfaults, or data
corruption (e.g. writes landing in the wrong column in the view).

Before this patch, we could get schema version mismatch here after the
base table was altered. That's because the view schema did not change
when the base table was altered.

Another problem was that view building was using the current table's schema
to interpret the fragments and invoke view building. That's incorrect for two
reasons. First, fragments generated by a reader must be accessed only using
the reader's schema. Second, base_non_pk_columns_in_view_pk of the recorded
view ptrs may not longer match the current base table schema, which is used
to generate the view updates.

Part of the fix is to extract base_non_pk_columns_in_view_pk into a
third entity called base_dependent_view_info, which changes both on
base table schema changes and view schema changes.

It is managed by a shared pointer so that we can take immutable
snapshots of it, just like with schema_ptr. When starting the view
update, the base table schema_ptr and the corresponding
base_dependent_view_info have to match. So we must obtain them
atomically, and base_dependent_view_info cannot change during update.

Also, whenever the base table schema changes, we must update
base_dependent_view_infos of all attached views (atomically) so that
it matches the base table schema.

Fixes #7061.

Tests:

  - unit (dev)
  - [v1] manual (reproduced using scylla binary and cqlsh)
"

* tag 'mv-schema-mismatch-fix-v2' of github.com:tgrabiec/scylla:
  db: view: Refactor view_info::initialize_base_dependent_fields()
  tests: mv: Test dropping columns from base table
  db: view: Fix incorrect schema access during view building after base table schema changes
  schema: Call on_internal_error() when out of range id is passed to column_at()
  db: views: Fix undefined behavior on base table schema changes
  db: views: Introduce has_base_non_pk_columns_in_view_pk()

(cherry picked from commit 3daa49f098)
2020-10-06 16:49:08 +03:00
Juliusz Stasiewicz
1ecc447f42 tracing: Fix error on slow batches
`trace_keyspace_helper::make_slow_query_mutation_data` expected a
"query" key in its parameters, which does not appear in case of
e.g. batches of prepared statements. This is example of failing
`record.parameters`:
```
...{"query[0]" : "INSERT INTO ks.tbl (pk, i) values (?, ?);"},
{"query[1]" : "INSERT INTO ks.tbl (pk, i) values (?, ?);"}...
```

In such case Scylla recorded no trace and said:
```
ERROR 2020-09-28 10:09:36,696 [shard 3] trace_keyspace_helper - No
"query" parameter set for a session requesting a slow_query_log record
```

Fix here is to leave query empty if not found. The users can still
retrieve the query contents from existing info.

Fixes #5843

Closes #7293

(cherry picked from commit 0afa738a8f)
2020-10-04 18:04:42 +03:00
Tomasz Grabiec
7f3ffbc1c8 Merge "evictable_reader: validate buffer on reader recreation" from Botond
This series backports the evictable reader validation patchset (merged
as 97c99ea9f to master) to 4.1.

I only had to do changes to the tests.

Tests: unit(dev), some exception safety tests are failing with or
without my patchset

* https://github.com/denesb/scylla.git denesb/evictable-reader-validate-buffer/backport-4.1:
  mutation_reader_test: add unit test for evictable reader self-validation
  evictable_reader: validate buffer after recreation the underlying
  evictable_reader: update_next_position(): only use peek'd position on partition boundary
  mutation_reader_test: add unit test for evictable reader range tombstone trimming
  evictable_reader: trim range tombstones to the read clustering range
  position_in_partition_view: add position_in_partition_view before_key() overload
  flat_mutation_reader: add buffer() accessor
2020-10-02 11:50:29 +02:00
Botond Dénes
6a02d120ec mutation_reader_test: add unit test for evictable reader self-validation
Add both positive (where the validation should succeed) and negative
(where the validation should fail) tests, covering all validation cases.

(cherry picked from commit 076c27318b)
2020-10-02 09:45:20 +03:00
Botond Dénes
d820997452 evictable_reader: validate buffer after recreation the underlying
The reader recreation mechanism is a very delicate and error-prone one,
as proven by the countless bugs it had. Most of these bugs were related
to the recreated reader not continuing the read from the expected
position, inserting out-of-order fragments into the stream.
This patch adds a defense mechanism against such bugs by validating the
start position of the recreated reader. Several things are checked:
* The partition is the expected one -- the one we were in the middle of
  or the next if we stopped at partition boundaries.
* The partition is in the read range.
* The first fragment in the partition is the expected one -- has a
  an equal or larger position than the next expected fragment.
* The fragment is in the clustering range as defined by the slice.

As these validations are only done on the slow-path of recreating an
evicted reader, no performance impact is expected.

(cherry picked from commit 0b0ae18a14)
2020-10-02 09:38:04 +03:00
Botond Dénes
e1e57d224b evictable_reader: update_next_position(): only use peek'd position on partition boundary
`evictable_reader::update_next_position()` is used to record the position the
reader will continue from, in the next buffer fill. This position is used to
create the partition slice when the underlying reader is evicted and has
to be recreated. There is an optimization in this method -- if the
underlying's buffer is not empty we peek at the first fragment in it and
use it as the next position. This is however problematic for buffer
validation on reader recreation (introduced in the next patch), because
using the next row's position as the next pos will allow for range
tombstones to be emitted with before_key(next_pos.key()), which will
trigger the validation. Instead of working around this, just drop this
optimization for mid-partition positions, it is inconsequential anyway.
We keep it for where it is important, when we detect that we are at a
partition boundary. In this case we can avoid reading the current
partition altogether when recreating the reader.

(cherry picked from commit 91020eef73)
2020-10-02 09:38:04 +03:00
Botond Dénes
763e063356 mutation_reader_test: add unit test for evictable reader range tombstone trimming
(cherry picked from commit d1b0573e1c)
2020-10-02 09:37:57 +03:00
Botond Dénes
a8f966aafa evictable_reader: trim range tombstones to the read clustering range
Currently mutation sources are allowed to emit range tombstones that are
out-of the clustering read range if they are relevant to it. For example
a read of a clustering range [ck100, +inf), might start with:

    range_tombstone{start={ck1, -1}, end={ck200, 1}},
    clustering_row{ck100}

The range tombstone is relevant to the range and the first row of the
range so it is emitted as first, but its position (start) is outside the
read range. This is normally fine, but it poses a problem for evictable
reader. When the underlying reader is evicted and has to be recreated
from a certain clustering position, this results in out-of-order
mutation fragments being inserted into the middle of the stream. This is
not fine anymore as the monotonicity guarantee of the stream is
violated. The real solution would be to require all mutation sources to
trim range tombstones to their read range, but this is a lot of work.
Until that is done, as a workaround we do this trimming in the evictable
reader itself.

(cherry picked from commit 4f2e7a18e2)
2020-10-02 08:59:55 +03:00
Botond Dénes
1a3c8a0ec5 position_in_partition_view: add position_in_partition_view before_key() overload
(cherry picked from commit d7d93aef49)
2020-10-02 08:59:55 +03:00
Botond Dénes
268821223c flat_mutation_reader: add buffer() accessor
To allow outsiders to inspect the contents of the reader's buffer.

(cherry picked from commit ab59e7c725)
2020-10-02 08:59:55 +03:00
Tomasz Grabiec
6c43a0dc29 schema: Fix race in schema version recalculation leading to stale schema version in gossip
Migration manager installs several feature change listeners:

    if (this_shard_id() == 0) {
        _feature_listeners.push_back(_feat.cluster_supports_view_virtual_columns().when_enabled(update_schema));
        _feature_listeners.push_back(_feat.cluster_supports_digest_insensitive_to_expiry().when_enabled(update_schema));
        _feature_listeners.push_back(_feat.cluster_supports_cdc().when_enabled(update_schema));
        _feature_listeners.push_back(_feat.cluster_supports_per_table_partitioners().when_enabled(update_schema));
    }

They will call update_schema_version_and_announce() when features are enabled, which does this:

    return update_schema_version(proxy, features).then([] (utils::UUID uuid) {
        return announce_schema_version(uuid);
    });

So it first updates the schema version and then publishes it via
gossip in announce_schema_version(). It is possible that the
announce_schema_version() part of the first schema change will be
deferred and will execute after the other four calls to
update_schema_version_and_announce(). It will install the old schema
version in gossip instead of the more recent one.

The fix is to serialize schema digest calculation and publishing.

Fixes #7200

(cherry picked from commit 1a57d641d1)
2020-10-01 18:18:21 +02:00
Yaron Kaikov
8399aac6bc release: prepare for 4.1.8 2020-09-28 20:25:06 +03:00
Avi Kivity
b1a70d0ad4 Update sesatar submodule
* seastar 15cd93729f...88b6f0172c (1):
  > lz4_fragmented_compressor: Fix buffer requirements

Fixes #6925.
2020-09-23 11:55:54 +03:00
Yaron Kaikov
2251a1c577 release: prepare for 4.1.7 2020-09-17 21:30:34 +03:00
Nadav Har'El
f8c7c485d2 alternator: fix corruption of PutItem operation in case of contention
This patch fixes a bug noted in issue #7218 - where PutItem operations
sometimes lose part of the item's data - some attributes were lost,
and the name of other attributes replaced by empty strings. The problem
happened when the write-isolation policy was LWT and there was contention
of writes to the same partition (not necessarily the same item).

To use CAS (a.k.a. LWT), Alternator builds an alternator::rmw_operation
object with an apply() function which takes the old contents of the item
(if needed) and a timestamp, and builds a mutation that the CAS should
apply. In the case of the PutItem operation, we wrongly assumed that apply()
will be called only once - so as an optimization the strings saved in the
put_item_operation were moved into the returned mutation. But this
optimization is wrong - when there is contention, apply() may be called
again when the changed proposed by the previous one was not accepted by
the Paxos protocol.

The fix is to change the one place where put_item_operation *moved* strings
out of the saved operations into the mutations, to be a copy. But to prevent
this sort of bug from reoccuring in future code, this patch enlists the
compiler to help us verify that it can't happen: The apply() function is
marked "const" - it can use the information in the operation to build the
mutation, but it can never modify this information or move things out of it,
so it will be fine to call this function twice.

The single output field that apply() does write (_return_attributes) is
marked "mutable" to allow the const apply() to write to it anyway. Because
apply() might be called twice, it is important that if some apply()
implementation sometimes sets _return_attributes, then it must always
set it (even if to the default, empty, value) on every call to apply().

The const apply() means that the compiler verfies for us that I didn't
forget to fix additional wrong std::move()s. Additionally, a test I wrote
to easily reproduce issue #7218 (which I will submit as a dtest later)
passes after this fix.

Fixes #7218.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200916064906.333420-1-nyh@scylladb.com>
(cherry picked from commit 5e8bdf6877)
2020-09-16 21:26:59 +03:00
Benny Halevy
d60bed1953 test: cql_query_test: test_cache_bypass: use table stats
test is currently flaky since system reads can happen
in the background and disturb the global row cache stats.

Use the table's row_cache stats instead.

Fixes #6773

Test: cql_query_test.test_cache_bypass(dev, debug)

Credit-to: Botond Dénes <bdenes@scylladb.com>
Signed-off-by: Benny Halevy <bhalevy@scylladb.com>
Message-Id: <20200811140521.421813-1-bhalevy@scylladb.com>
(cherry picked from commit 6deba1d0b4)
2020-09-16 18:19:30 +03:00
Dejan Mircevski
259203a394 cql3: Fix NULL reference in get_column_defs_for_filtering
There was a typo in get_column_defs_for_filtering(): it checked the
wrong pointer before dereferencing.  Add a test exposing the NULL
dereference and fix the typo.

Tests: unit (dev)

Fixes #7198.

Signed-off-by: Dejan Mircevski <dejan@scylladb.com>
(cherry picked from commit 9d02f10c71)
2020-09-16 15:47:04 +03:00
Avi Kivity
5f284633d4 reconcilable_result_builder: don't aggrevate out-of-memory condition during recovery
Consider an unpaged query that consumes all of available memory, despite
fea5067dfa which limits them (perhaps the
user raised the limit, or this is a system query). Eventually we will see a
bad_alloc which will abort the query and destroy this reconcilable_result_builder.

During destruction, we first destroy _memory_accounter, and then _result.
Destroying _memory_accounter resumes some continuations which can then
allocate memory synchronously when increasing the task queue to accomodate
them. We will then crash. Had we not crashed, we would immediately afterwards
release _result, freeing all the memory that we would ever need.

Fix by making _result the last member, so it is freed first.

Fixes #7240.

(cherry picked from commit 9421cfded4)
2020-09-16 15:40:58 +03:00
Asias He
66cc4be8f6 storage_service: Fix a TOKENS update race for replace operation
In commit 7d86a3b208 (storage_service:
Make replacing node take writes), application state of TOKENS of the
replacing node is added into gossip and propagated to the cluster after
the initial start of gossip service. This can cause a race below

1. The replacing node replaces the old dead node with the same ip address
2. The replacing node starts gossip without application state of the TOKENS
3. Other nodes in the cluster replace the application states of old dead node's
   version with the new replacing node's version
4. replacing node dies
5. replace operation is performed again, the TOKENS application state is
   not preset and replace operation fails.

To fix, we can always add TOKENS application state when the
gossip service starts.

Fixes: #7166
Backports: 4.1 and 4.2
(cherry picked from commit 3ba6e3d264)
2020-09-10 13:13:58 +03:00
Avi Kivity
9ca6aa5535 Merge "Fix repair stalls in get_sync_boundary and apply_rows_on_master_in_thread" from Asias
"
This path set fixes stalls in repair that are caused by std::list merge and clear operations during test_latency_read_with_nemesis test.

Fixes #6940
Fixes #6975
Fixes #6976
"

* 'fix_repair_list_stall_merge_clear_v2' of github.com:asias/scylla:
  repair: Fix stall in apply_rows_on_master_in_thread and apply_rows_on_follower
  repair: Use clear_gently in get_sync_boundary to avoid stall
  utils: Add clear_gently
  repair: Use merge_to_gently to merge two lists
  utils: Add merge_to_gently

(cherry picked from commit 4547949420)
2020-09-10 13:13:54 +03:00
Avi Kivity
6e63db8c72 repair: apply_rows_on_follower(): remove copy of repair_rows list
We copy a list, which was reported to generate a 15ms stall.

This is easily fixed by moving it instead, which is safe since this is
the last use of the variable.

Fixes #7115.

(cherry picked from commit 6ff12b7f79)
2020-09-10 11:53:29 +03:00
Avi Kivity
803da18727 Update seastar submodule
* seastar 18275cbc0e...15cd93729f (1):
  > core/reactor: complete_timers(): restore previous scheduling group

Fixes #7184.
2020-09-07 11:33:06 +03:00
Raphael S. Carvalho
165d89860e compaction: Prevent non-regular compaction from picking compacting SSTables
After 8014c7124, cleanup can potentially pick a compacting SSTable.
Upgrade and scrub can also pick a compacting SSTable.
The problem is that table::candidates_for_compaction() was badly named.
It misleads the user into thinking that the SSTables returned are perfect
candidates for compaction, but manager still need to filter out the
compacting SSTables from the returned set. So it's being renamed.

When the same SSTable is compacted in parallel, the strategy invariant
can be broken like overlapping being introduced in LCS, and also
some deletion failures as more than one compaction process would try
to delete the same files.

Let's fix scrub, cleanup and ugprade by calling the manager function
which gets the correct candidates for compaction.

Fixes #6938.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200811200135.25421-1-raphaelsc@scylladb.com>
(cherry picked from commit 11df96718a)
2020-09-06 18:40:56 +03:00
Takuya ASADA
4a5116a0ae aws: update enhanced networking supported instance list
Sync enhanced networking supported instance list to latest one.

Reference: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html

Fixes #6991

(cherry picked from commit 7cccb018b8)
2020-09-06 18:21:28 +03:00
Yaron Kaikov
6d9ff622df release: prepare for 4.1.6 2020-08-30 21:34:46 +03:00
Nadav Har'El
65bc33c921 redis: fix another use-after-free crash in "exists" command
Never trust Occam's Razor - it turns out that the use-after-free bug in the
"exists" command was caused by two separate bugs. We fixed one in commit
9636a33993, but there is a second one fixed in
this patch.

The problem fixed here was that a "service_permit" object, which is designed to
be copied around from place to place (it contains a shared pointer, so is cheap
to copy), was saved by reference, and the reference was to a function argument
and was destroyed prematurely.

This time I tested *many times* that that test_strings.py passes on both dev and
debug builds.

Note that test/run/redis still fails in a debug build, but due to a different
problem.

Fixes #6469

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Reviewed-by: Benny Halevy <bhalevy@scylladb.com>
Message-Id: <20200825183313.120331-1-nyh@scylladb.com>
(cherry picked from commit 868194cd17)
2020-08-27 12:25:03 +03:00
Nadav Har'El
5e90f06ca2 redis: fix use-after-free crash in "exists" command
A missing "&" caused the key stored in a long-living command to be copied
and the copy quickly freed - and then used after freed.
This caused the test test_strings.py::test_exists_multiple_existent_key for
this feature to frequently crash.

Fixes #6469

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200823190141.88816-1-nyh@scylladb.com>
(cherry picked from commit 9636a33993)
2020-08-27 12:25:03 +03:00
Asias He
2036de3245 compaction_manager: Avoid stall in perform_cleanup
The following stall was seen during a cleanup operation:

scylla: Reactor stalled for 16262 ms on shard 4.

| std::_MakeUniq<locator::tokens_iterator_impl>::__single_object std::make_unique<locator::tokens_iterator_impl, locator::tokens_iterator_impl&>(locator::tokens_iterator_impl&) at /usr/include/fmt/format.h:1158
|  (inlined by) locator::token_metadata::tokens_iterator::tokens_iterator(locator::token_metadata::tokens_iterator const&) at ./locator/token_metadata.cc:1602
| locator::simple_strategy::calculate_natural_endpoints(dht::token const&, locator::token_metadata&) const at simple_strategy.cc:?
|  (inlined by) locator::simple_strategy::calculate_natural_endpoints(dht::token const&, locator::token_metadata&) const at ./locator/simple_strategy.cc:56
| locator::abstract_replication_strategy::get_ranges(gms::inet_address, locator::token_metadata&) const at /usr/include/fmt/format.h:1158
| locator::abstract_replication_strategy::get_ranges(gms::inet_address) const at /usr/include/fmt/format.h:1158
| service::storage_service::get_ranges_for_endpoint(seastar::basic_sstring<char, unsigned int, 15u, true> const&, gms::inet_address const&) const at /usr/include/fmt/format.h:1158
| service::storage_service::get_local_ranges(seastar::basic_sstring<char, unsigned int, 15u, true> const&) const at /usr/include/fmt/format.h:1158
|  (inlined by) operator() at ./sstables/compaction_manager.cc:691
|  (inlined by) _M_invoke at /usr/include/c++/9/bits/std_function.h:286
| std::function<std::vector<seastar::lw_shared_ptr<sstables::sstable>, std::allocator<seastar::lw_shared_ptr<sstables::sstable> > > (table const&)>::operator()(table const&) const at /usr/include/fmt/format.h:1158
|  (inlined by) compaction_manager::rewrite_sstables(table*, sstables::compaction_options, std::function<std::vector<seastar::lw_shared_ptr<sstables::sstable>, std::allocator<seastar::lw_shared_ptr<sstables::sstable> > > (table const&)>) at ./sstables/compaction_manager.cc:604
| compaction_manager::perform_cleanup(table*) at /usr/include/fmt/format.h:1158

To fix, we furturize the function to get local ranges and sstables.

In addition, this patch removes the dependency to global storage_service object.

Fixes #6662

(cherry picked from commit 07e253542d)
2020-08-27 12:25:03 +03:00
Raphael S. Carvalho
0924e4d92f sstables: optimize procedure that checks if a sstable needs cleanup
needs_cleanup() returns true if a sstable needs cleanup.

Turns out it's very slow because it iterates through all the local
ranges for all sstables in the set, making its complexity:
	O(num_sstables * local_ranges)

We can optimize it by taking into account that abstract_replication_strategy
documents that get_ranges() will return a list of ranges that is sorted
and non-overlapping. Compaction for cleanup already takes advantage of that
when checking if a given partition can be actually purged.

So needs_cleanup() can be optimized into O(num_sstables * log(local_ranges)).

With num_sstables=1000, RF=3, then local_ranges=256(num_tokens)*3, it means
the max # of checks performed will go from 768000 to ~9584.

Fixes #6730.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200629171355.45118-2-raphaelsc@scylladb.com>
(cherry picked from commit cf352e7c14)
2020-08-27 12:25:03 +03:00
Raphael S. Carvalho
b8313775c5 sstables: export needs_cleanup()
May be needed elsewhere, like in an unit test.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200629171355.45118-1-raphaelsc@scylladb.com>
(cherry picked from commit a9eebdc778)
2020-08-27 12:25:02 +03:00
Asias He
ec0002a67f abstract_replication_strategy: Add get_ranges_in_thread
Add a version that runs inside a seastar thread. The benefit is that
get_ranges can yield to avoid stalls.

Refs #6662

(cherry picked from commit 94995acedb)
2020-08-27 12:24:55 +03:00
Asias He
ebdf5f9e55 gossip: Fix race between shutdown message handler and apply_state_locally
1. The node1 is shutdown
2. The node1 sends shutdown message to node2
3. The node2 receives gossip shutdown message but the handler yields
4. The node1 is restarted
5. The node1 sends new gossip endpoint_state to node2, node2 applies the state
   in apply_state_locally and calls gossiper::handle_major_state_change
   and then calls gossiper::mark_alive
6. The shutdown message handler in step 3 resumes and sets status of node1 to SHUTDOWN
7. The gossiper::mark_alive fiber in step 5 resumes and calls gossiper::real_mark_alive,
   node2 will skip to mark node1 as alive because the status of node1 is
   SHUTDOWN. As a result, node1 is alive but it is not marked as UP by node2.

To fix, we serialize the two operations.

Fixes #7032

(cherry picked from commit e6ceec1685)
2020-08-27 11:15:59 +03:00
Nadav Har'El
32c0e4f110 alternator test: configurable temporary directory
The test/alternator/run script creates a temporary directory for the Scylla
database in /tmp. The assumption was that this is the fastest disk (usually
even a ramdisk) on the test machine, and we didn't need anything else from
it.

But it turns out that on some systems, /tmp is actually a slow disk, so
this patch adds a way to configure the temporary directory - if the TMPDIR
environment variable exists, it is used instead of /tmp. As before this
patch, a temporary subdirectry is created in $TMPDIR, and this subdirectory
is automatically deleted when the test ends.

The test.py script already passes an appropriate TMPDIR (testlog/$mode),
which after this patch the Alternator test will use instead of /tmp.

Fixes #6750

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200713193023.788634-1-nyh@scylladb.com>
(cherry picked from commit 8e3be5e7d6)
2020-08-26 19:37:38 +03:00
Nadav Har'El
5f48444a98 alternator: fix order conditions on binary attributes
We implemented the order operators (LT, GT, LE, GE, BETWEEN) incorrectly
for binary attributes: DynamoDB requires that the bytes be treated as
unsigned for the purpose of order (so byte 128 is higher than 127), but
our implementation uses Scylla's "bytes" type which has signed bytes.

The solution is simple - we can continue to use the "bytes" type, but
we need to use its compare_unsigned() function, not its "<" operator.

This bug affected conditional operations ("Expected" and
"ConditionExpression") and also filters ("QueryFilter", "ScanFilter",
"FilterExpression"). The bug did *not* affect Query's key conditions
("KeyConditions", "KeyConditionExpression") because those already
used Scylla's key comparison functions - which correctly compare binary
blobs as unsigned bytes (in fact, this is why we have the
compare_unsigned() function).

The patch also adds tests that reproduce the bugs in conditional
operations, and show that the bug did not exist in key conditions.

Fixes #6573

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200603084257.394136-1-nyh@scylladb.com>
(cherry picked from commit f6b1f45d69)
Manually removed tests in test_key_conditions.py that did not exist in this branch
2020-08-26 18:47:28 +03:00
Avi Kivity
8930ea5407 Merge "Unregister RPC verbs on stop" from Pavel E
"
There are 5 services, that register their RPC handlers in messaging
service, but quite a few of them unregister them on stop.

Unregistering is somewhat critical, not just because it makes the
code look clean, but also because unregistration does wait for the
message processing to complete, thus avoiding use-after-free's in
the handlers.

In particular, several handlers call service::get_schema_for_write()
which, in turn, may end up in service::maybe_sync() calling for
the local migration manager instance. All those handlers' processing
must be waited for before stopping the migration manager.

The set brings the RPC handlers unregistration in sync with the
registration part.

tests: unit (dev)
       dtest (dev: simple_boot_shutdown, repair)
       start-stop by hands (dev)
fixes: #6904
"

* 'br-rpc-unregister-verbs' of https://github.com/xemul/scylla:
  main: Add missing calls to unregister RPC hanlers
  messaging: Add missing per-service unregistering methods
  messaging: Add missing handlers unregistration helpers
  streaming: Do not use db->invoke_on_all in vain
  storage_proxy: Detach rpc unregistration from stop
  main: Shorten call to storage_proxy::init_messaging_service

(cherry picked from commit 01b838e291)
2020-08-26 14:42:17 +03:00
Raphael S. Carvalho
311cd6403c cql3/statements: verify that counter column cannot be added into non-counter table
A check, to validate that counter column cannot be added into non-counter table,
is missing for alter table statement. Validation is performed when building new
schema, but it's limited to checking that a schema will not contain both counter
and non-counter columns.

Due to lack of validation, the added counter column could be incorrectly
persisted to the schema, but this results in a crash when setting the new
schema to its table. On restart, it can be confirmed that the schema change
was indeed persisted when describing the table.
This problem is fixed by doing proper validation for the alter table statement,
which consists of making sure a new counter column cannot be added to a
non-counter table.

The test cdc_disallow_cdc_for_counters_test is adjusted because one of its tests
was built on the assumption that counter column can be added into a non-counter
table.

Fixes #7065.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200824155709.34743-1-raphaelsc@scylladb.com>
(cherry picked from commit 1c29f0a43d)
2020-08-25 18:45:30 +03:00
Takuya ASADA
b71821435a dist/debian: disable debuginfo compression on .deb
Since older binutils on some distribution does not able to handle
compressed debuginfo generated on Fedora, we need to disable it.
However, debian packager force debuginfo compression since debian/compat = 9,
we have to uncompress them after compressed automatically.

Fixes #6982

(cherry picked from commit 75c2362c95)
2020-08-23 19:02:57 +03:00
Botond Dénes
cd29e2643c scylla-gdb.py: find_db(): don't return current shard's database for shard=0
The `shard` parameter of `find_db()` is optional and is defaulted to
`None`. When missing, the current shard's database instance is returned.
The problem is that the if condition checking this uses `not shard`,
which also evaluates to `True` if `shard == 0`, resulting in returning
the current shard's database instance for shard 0. Change the condition
to `shard is None` to avoid this.

Fixes: #7016
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20200812091546.1704016-1-bdenes@scylladb.com>
(cherry picked from commit 4cfab59eb1)
2020-08-23 18:56:26 +03:00
Avi Kivity
59aa1834a7 Merge "repair: row_level: prevent deadlocks when repairing homogenous nodes" from Botond
"
This series backports the series "repair: row_level: prevent deadlocks
when repairing homogenous nodes" (merged as a9c7a1a86) to branch-4.1.
"

Fixes #6272

* 'repair-row-level-evictable-local-reader/branch-4.1' of https://github.com/denesb/scylla:
  repair: row_level: destroy reader on EOS or error
  repair: row_level: use evictable_reader for local reads
  mutation_reader: expose evictable_reader
  mutation_reader: evictable_reader: add auto_pause flag
  mutation_reader: make evictable_reader a flat_mutation_reader
  mutation_reader: s/inactive_shard_read/inactive_evictable_reader/
  mutation_reader: move inactive_shard_reader code up
  mutation_reader: fix indentation
  mutation_reader: shard_reader: extract remote_reader as evictable_reader
  mutation_reader: reader_lifecycle_policy: make semaphore() available early
2020-08-23 18:06:12 +03:00
Botond Dénes
436b305286 view_update_generator: fix race between registering and processing sstables
fea83f6 introduced a race between processing (and hence removing)
sstables from `_sstables_with_tables` and registering new ones. This
manifested in sstables that were added concurrently with processing a
batch for the same sstables being dropped and the semaphore units
associated with them not returned. This resulted in repairs being
blocked indefinitely as the units of the semaphore were effectively
leaked.

This patch fixes this by moving the contents of `_sstables_with_tables`
to a local variable before starting the processing. A unit test
reproducing the problem is also added.

Fixes: #6892

Tests: unit(dev)
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20200817160913.2296444-1-bdenes@scylladb.com>
(cherry picked from commit 22a6493716)
2020-08-23 18:04:29 +03:00
Botond Dénes
1d85051e8d repair: row_level: destroy reader on EOS or error
To avoid having to make it an optional with all the additional checks,
we just replace it with an empty reader instead, this also also achieves
the desired effect of releasing the read permit and all the associated
resources early.

(cherry picked from commit fbbc86e18c)
2020-08-20 16:10:16 +03:00
Botond Dénes
3f52d8733b repair: row_level: use evictable_reader for local reads
Row level repair, when using a local reader, is prone to deadlocking on
the streaming reader concurrency semaphore. This has been observed to
happen with at least two participating nodes, running more concurrent
repairs than the maximum allowed amount of reads by the concurrency
semaphore. In this situation, it is possible that two repair instances,
competing for the last available permits on both nodes, get a permit on
one of the nodes and get queued on the other one respectively. As
neither will let go of the permit it already acquired, nor give up
waiting on the failed-to-acquired permit, a deadlock happens.

To prevent this, we make the local repair reader evictable. For this we
reuse the newly exposed evictable reader.
The repair reader is paused after the repair buffer is filled, which is
currently 32MB, so the cost of a possible reader recreation is amortized
over 32MB read.

The repair reader is said to be local, when it can use the shard-local
partitioner. This is the case if the participating nodes are homogenous
(their shard configuration is identical), that is the repair instance
has to read just from one shard. A non-local reader uses the multishard
reader, which already makes its shard readers evictable and hence is not
prone to the deadlock described here.

(cherry picked from commit 080f00b99a)
2020-08-20 16:10:16 +03:00
Botond Dénes
eece444547 mutation_reader: expose evictable_reader
Expose functions for the outside world to create evictable readers. We
expose two functions, which create an evictable reader with
`auto_pause::yes` and `auto_pause::no` respectively. The function
creating the latter also returns a handle in addition to the reader,
which can be used to pause the reader.

(cherry picked from commit 542d9c3711)
2020-08-20 16:10:16 +03:00
Botond Dénes
2ab51c4055 mutation_reader: evictable_reader: add auto_pause flag
Currently the evictable reader unconditionally pauses the underlying
reader after each use (`fill_buffer()` or `fast_forward_to()` call).
This is fine for current users (the multishard reader), but the future
user we are doing all this refactoring for -- repair -- will want to
control when the underlying reader is paused "manually". Both these
behaviours can easily be supported in a single implementation, so we
add an `auto_pause` flag to allow the creator of the evictable reader
to control this.

(cherry picked from commit 1cc31deff9)
2020-08-20 16:10:16 +03:00
Botond Dénes
4a1a1feb55 mutation_reader: make evictable_reader a flat_mutation_reader
The `evictable_reader` class is almost a proper flat mutation reader
already, it roughly offers the same interface. This patch makes this
formal: changing the class to inherit from `flat_mutation_reader::impl`,
and implement all virtual methods. This also entails a departure from
using the lifecycle policy to pause/resume and create readers, instead
using more general building blocks like the reader concurrency semaphore
and a mutation source.

(cherry picked from commit af9e1c23e1)
2020-08-20 16:10:16 +03:00
Botond Dénes
76995933e0 mutation_reader: s/inactive_shard_read/inactive_evictable_reader/
Rename `inactive_shard_read` to `inactive_evictable_reader` to reflect
that the fact that the evictable reader is going to be of general use,
not specific to the multishard reader.

(cherry picked from commit 4485864ada)
2020-08-20 16:10:16 +03:00
Botond Dénes
f840263fdd mutation_reader: move inactive_shard_reader code up
It will be used by the `evictable_reader` code too in the next patches.

(cherry picked from commit b6ed054c08)
2020-08-20 16:10:16 +03:00
Botond Dénes
b4887ce4a5 mutation_reader: fix indentation
Deferred from the previous patch.

(cherry picked from commit e3ea1c9080)
2020-08-20 16:10:16 +03:00
Botond Dénes
849e12bf2e mutation_reader: shard_reader: extract remote_reader as evictable_reader
We want to make the evictable reader mechanism used in the multishard
reader pipeline available for general (re)use, as a standalone
flat mutation reader implementation. The first step is extracting
`shard_reader::remote_reader` the class implementing this logic into a
top-level class, also renamed to `evictable_reader`.

(cherry picked from commit f9d1916499)
2020-08-20 16:10:16 +03:00
Botond Dénes
f124f97f99 mutation_reader: reader_lifecycle_policy: make semaphore() available early
Currently all reader lifecycle policy implementations assume that
`semaphore()` will only be called after at least one call to
`make_reader()`. This assumption will soon not hold, so make sure
`semaphore()` can be called at any time, including before any calls are
made to `make_reader()`.

(cherry picked from commit 63309f925c)
2020-08-20 16:10:16 +03:00
Botond Dénes
4ee0b489cf table: get_sstables_by_partition_key(): don't make a copy of selected sstables
Currently we assign the reference to the vector of selected sstables to
`auto sst`. This makes a copy and we pass this local variable to
`do_for_each()`, which will result in a use-after-free if the latter
defers.
Fix by not making a copy and instead just keep the reference.

Fixes: #7060

Tests: unit(dev)
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20200818091241.2341332-1-bdenes@scylladb.com>
(cherry picked from commit 78f94ba36a)
2020-08-19 00:02:01 +03:00
Yaron Kaikov
382dcb9d34 release: prepare for 4.1.5 2020-08-17 21:55:06 +03:00
Calle Wilund
07b7df9171 cdc::log: Missing "preimage" check in row deletion pre-image
Fixes #6561

Pre-image generation in row deletion case only checked if we had a pre-image
result set row. But that can be from post-image. Also check actual existance
of the pre-image CK.
Message-Id: <20200608132804.23541-1-calle@scylladb.com>

(cherry picked from commit 5105e9f5e1)
2020-08-12 13:52:45 +03:00
Nadav Har'El
7fa3a988e3 Update Seastar submodule
> http: add "Expect: 100-continue" handling

Fixes #6844
2020-08-11 13:16:16 +03:00
Asias He
7b23574224 repair: Switch to btree_set for repair_hash.
In one of the longevity tests, we observed 1.3s reactor stall which came from
repair_meta::get_full_row_hashes_source_op. It traced back to a call to
std::unordered_set::insert() which triggered big memory allocation and
reclaim.

I measured std::unordered_set, absl::flat_hash_set, absl::node_hash_set
and absl::btree_set. The absl::btree_set was the only one that seastar
oversized allocation checker did not warn in my tests where around 300K
repair hashes were inserted into the container.

- unordered_set:
hash_sets=295634, time=333029199 ns

- flat_hash_set:
hash_sets=295634, time=312484711 ns

- node_hash_set:
hash_sets=295634, time=346195835 ns

- btree_set:
hash_sets=295634, time=341379801 ns

The btree_set is a bit slower than unordered_set but it does not have
huge memory allocation. I do not measure real difference of total time
to finish repair of the same dataset with unordered_set and btree_set.

To fix, switch to absl btree_set container.

Fixes #6190

(cherry picked from commit 67f6da6466)
(cherry picked from commit a27188886a)
2020-08-11 12:34:26 +03:00
Rafael Ávila de Espíndola
ac207c892b build: Link with abseil
It is a pity we have to list so many libraries, but abseil doesn't
provide a .pc file.

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
(cherry picked from commit 7d1f6725dd)

Ref #6190.
2020-08-11 12:34:26 +03:00
Rafael Ávila de Espíndola
a023b3bb7a Add abseil as a submodule
This adds the https://abseil.io library as a submodule. The patch
series that follows needs a hash table that supports heterogeneous
lookup, and abseil has a really good hash table that supports that
(https://abseil.io/blog/20180927-swisstables).

The library is still not available in Fedora, but it is fairly easy to
use it directly from a submodule.

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
(cherry picked from commit 383a9c6da9)

Ref #6190
2020-08-11 12:34:26 +03:00
Rafael Ávila de Espíndola
0b9db42d9c cofigure: Don't overwrite seastar_cflags
The variable seastar_cflags was being used for flags passed to seastar
and for flags extracted from the seastar.pc file.

This introduces a new variable for the flags extracted from the
seastar.pc file.

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
(cherry picked from commit 2ad09aefb6)

Ref #6190.
2020-08-11 12:34:26 +03:00
Calle Wilund
df8d4482c5 database: Do not assert on replay positions if truncate does not flush
Fixes #6995

In c2c6c71 the assert on replay positions in flushed sstables discarded by
truncate was broken, by the fact that we no longer flush all sstables
unless auto snapshot is enabled.

This means the low_mark assertion does not hold, because we maybe/probably
never got around to creating the sstables that would hold said mark.

Note that the (old) change to not create sstables and then just delete
them is in itself good. But in that case we should not try to verify
the rp mark.

(cherry picked from commit 9620755c7f)
2020-08-10 23:33:39 +03:00
Avi Kivity
442d7bf9ff Update seastar submodule
* seastar c9c1dc5fa7...1337f1158b (1):
  > memory: fix small aligned free memory corruption

Fixes #6831
2020-08-09 18:37:32 +03:00
Avi Kivity
bc6422d16d Merge 'hinted handoff: fix commitlog memory leak' from Piotr D
"
When commitlog is recreated in hints manager, only shutdown() method is
called, but not release(). Because of that, some internal commitlog
objects (`segment_manager` and `segment`s) may be left pointing to each
other through shared_ptr reference cycles, which may result in memory
leak when the parent commitlog object is destroyed.

This PR prevents memory leaks that may happen this way by calling
release() after shutdown() from the hints manager.

Fixes: #6409, Fixes #6776
"

* piodul-fix-commitlog-memory-leak-in-hinted-handoff:
  hinted handoff: disable warnings about segments left on disk
  hinted handoff: release memory on commitlog termination

(cherry picked from commit 4c221855a1)
2020-08-09 17:25:57 +03:00
Yaron Kaikov
76f4bc4c6f release: prepare for 4.1.4 2020-08-09 08:49:19 +03:00
Tomasz Grabiec
dc4efb0a1e thrift: Fix crash on unsorted column names in SlicePredicate
The column names in SlicePredicate can be passed in arbitrary order.
We converted them to clustering ranges in read_command preserving the
original order. As a result, the clustering ranges in read command may
appear out of order. This violates storage engine's assumptions and
lead to undefined behavior.

It was seen manifesting as a SIGSEGV or an abort in sstable reader
when executing a get_slice() thrift verb:

scylla: sstables/consumer.hh:476: seastar::future<> data_consumer::continuous_data_consumer<StateProcessor>::fast_forward_to(size_t, size_t) [with StateProcessor = sstables::data_consume_rows_context_m; size_t = long unsigned int]: Assertion `end >= _stream_position.position' failed.

Fixes #6486.

Tests:

   - added a new dtest to thrift_tests.py which reproduces the problem

Message-Id: <1596725657-15802-1-git-send-email-tgrabiec@scylladb.com>
(cherry picked from commit bfd129cffe)
2020-08-08 19:48:25 +03:00
Rafael Ávila de Espíndola
f699d23f0b alternator: Fix use after return
Avoid a copy of timeout so that we don't end up with a reference to a
stack allocated variable.

Fixes #6897

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
Message-Id: <20200721184939.111665-1-espindola@scylladb.com>
(cherry picked from commit e83e91e352)
2020-08-03 22:36:37 +03:00
Nadav Har'El
d5e5a6fe48 alternator: fix Expected's "NULL" operator with missing AttributeValueList
The "NULL" operator in Expected (old-style conditional operations) doesn't
have any parameters, so we insisted that the AttributeValueList be empty.
However, we forgot to allow it to also be missing - a possibility which
DynamoDB allows.

This patch adds a test to reproduce this case (the test passes on DyanmoDB,
fails on Alternator before this patch, and succeeds after this patch), and
a fix.

Fixes #6816.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200709161254.618755-1-nyh@scylladb.com>
(cherry picked from commit f549d147ea)
2020-08-03 20:42:15 +03:00
Takuya ASADA
5a43c6ec81 scylla_util.py: always use relocatable CLI tools
On some CLI tools, command options may different between latest version
vs older version.
To maximize compatibility of setup scripts, we should always use
relocatable CLI tools instead of distribution version of the tool.

Related #6954

(cherry picked from commit a19a62e6f6)
2020-08-03 10:41:57 +03:00
Takuya ASADA
2aae8bb206 create-relocatable-package.py: add lsblk for relocatable CLI tools
We need latest version of lsblk that supported partition type UUID.

Fixes #6954

(cherry picked from commit 6ba2a6c42e)
2020-08-03 10:41:52 +03:00
Juliusz Stasiewicz
c206399379 aggregate_fcts: Use per-type comparators for dynamic types
For collections and UDTs the `MIN()` and `MAX()` functions are
generated on the fly. Until now they worked by comparing just the
byte representations of arguments.

This patch uses specific per-type comparators to provide semantically
sensible, dynamically created aggregates.

Fixes #6768

(cherry picked from commit 5b438e79be)
2020-08-03 10:26:15 +03:00
Calle Wilund
787b324916 cql3::lists: Fix setter_by_uuid not handing null value
Fixes #6828

When using the scylla list index from UUID extension,
null values were not handled properly causing throws
from underlying layer.

(cherry picked from commit 3b74b9585f)
2020-08-03 10:20:14 +03:00
Takuya ASADA
dfe90a69f5 scylla_post_install.sh: generate memory.conf for CentOS7
On CentOS7, systemd does not support percentage-based parameter.
To apply memory parameter on CentOS7, we need to override the parameter
in bytes, instead of percentage.

Fixes #6783

(cherry picked from commit 3a25e7285b)
2020-07-30 16:41:24 +03:00
Tomasz Grabiec
d03d6f41c2 commitlog: Fix use-after-free on mutation object during replay
The mutation object may be freed prematurely during commitlog replay
in the schema upgrading path. We will hit the problem if the memtable
is full and apply_in_memory() needs to defer.

This will typically manifest as a segfault.

Fixes #6953

Introduced in 79935df

Tests:
  - manual using scylla binary. Reproduced the problem then verified the fix makes it go away

Message-Id: <1596044010-27296-1-git-send-email-tgrabiec@scylladb.com>
(cherry picked from commit 3486eba1ce)
2020-07-30 16:36:55 +03:00
Avi Kivity
0e86f1bf66 dist: debian: do not require root during package build
Debian package builds provide a root environment for the installation
scripts, since that's what typical installation scripts expect. To
avoid providing actual root, a "fakeroot" system is used where syscalls
are intercepted and any effect that requires root (like chown) is emulated.

However, fakeroot sporadically fails for us, aborting the package build.
Since our install scripts don't really require root (when operating in
the --packaging mode), we can just tell dpkg-buildpackage that we don't
need fakeroot. This ought to fix the sporadic failures.

As a side effect, package builds are faster.

Fixes #6655.

(cherry picked from commit b608af870b)
2020-07-29 16:03:33 +03:00
Takuya ASADA
392a007b3a scylla_setup: skip boot partition
On GCE, /dev/sda14 reported as unused disk but it's BIOS boot partition,
should not use for scylla data partition, also cannot use for it since it's
too small.

It's better to exclude such partiotion from unsed disk list.

Fixes #6636

(cherry picked from commit d7de9518fe)
2020-07-29 09:50:19 +03:00
Asias He
254b898cd8 repair: Fix race between create_writer and wait_for_writer_done
We saw scylla hit user after free in repair with the following procedure during tests:

- n1 and n2 in the cluster

- n2 ran decommission

- n2 sent data to n1 using repair

- n2 was killed forcely

- n1 tried to remove repair_meta for n1

- n1 hit use after free on repair_meta object

This was what happened on n1:

1) data was received -> do_apply_rows was called -> yield before create_writer() was called

2) repair_meta::stop() was called -> wait_for_writer_done() / do_wait_for_writer_done was called
   with _writer_done[node_idx] not engaged

3) step 1 resumed, create_writer() was called and _repair_writer object was referenced

4) repair_meta::stop() finished, repair_meta object and its member _repair_writer was destroyed

5) The fiber created by create_writer() at step 3 hit use after free on _repair_writer object

To fix, we should call wait_for_writer_done() after any pending
operations were done which were protected by repair_meta::_gate. This
prevents wait for writer done finishes before the writer is in the
process of being created.

Fixes: #6853
Fixes: #6868
Backports: 4.0, 4.1, 4.2
(cherry picked from commit e6f640441a)
2020-07-29 09:50:15 +03:00
Raphael S. Carvalho
6fb84ed7e0 sstable: index_reader: Make sure streams are all properly closed on failure
Turns out the fix f591c9c710 wasn't enough to make sure all input streams
are properly closed on failure.
It only closes the main input stream that belongs to context, but it misses
all the input streams that can be opened in the consumer for promote index
reading. Consumer stores a list of indexes, where each of them has its own
input stream. On failure, we need to make sure that every single one of
them is properly closed before destroying the indexes as that could cause
memory corruption due to read ahead.

Fixes #6924.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200727182214.377140-1-raphaelsc@scylladb.com>
(cherry picked from commit 0d70efa58e)
2020-07-29 09:48:48 +03:00
Yaron Kaikov
9002592ee0 release: prepare for 4.1.3 2020-07-29 08:26:06 +03:00
Botond Dénes
5d6a7272e7 sstables: clamp estimated_partitions to [1, +inf) in writers
In some cases estimated number of partitions can be 0, which is albeit a
legit estimation result, breaks many low-level sstable writer code, so
some of these have assertions to ensure estimated partitions is > 0.
To avoid hitting this assert all users of the sstable writers do the
clamping, to ensure estimated partitions is at least 1. However leaving
this to the callers is error prone as #6913 has shown it. As this
clamping is standard practice, it is better to do it in the writers
themselves, avoiding this problem altogether. This is exactly what this
patch does. It also adds two unit tests, one that reproduces the crash
in #6913, and another one that ensures all sstable writers are fine with
estimated partitions being 0 now. Call sites previously doing the
clamping are changed to not do it, it is unnecessary now as the writer
does it itself.

Fixes #6913

Tests: unit(dev)
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20200724120227.267184-1-bdenes@scylladb.com>
[avi: adjust sstable_datafile_test's use of compaction_descriptor and make_permit]
(cherry picked from commit fe127a2155)
2020-07-28 09:55:34 +03:00
Piotr Sarna
96625fa54b Merge 'view_update_generator: use partitioned sstable set'
from Botond.

Recently it was observed (#6603) that since 4e6400293ea, the staging
reader is reading from a lot of sstables (200+). This consumes a lot of
memory, and after this reaches a certain threshold -- the entire memory
amount of the streaming reader concurrency semaphore -- it can cause a
deadlock within the view update generation. To reduce this memory usage,
we exploit the fact that the staging sstables are usually disjoint, and
use the partitioned sstable set to create the staging reader. This
should ensure that only the minimum number of sstable readers will be
opened at any time.

Refs: #6603
Fixes: #6707

Tests: unit(dev)

* 'view-update-generator-use-partitioned-set/v1' of https://github.com/denesb/scylla:
  db/view: view_update_generator: use partitioned sstable set
  sstables: make_partitioned_sstable_set(): return an sstable_set

(cherry picked from commit e4b74356bb)
2020-07-21 15:41:46 +03:00
Raphael S. Carvalho
4f5f404619 table: Fix Staging SSTables being incorrectly added or removed from the backlog tracker
Staging SSTables can be incorrectly added or removed from the backlog tracker,
after an ALTER TABLE or TRUNCATE, because the add and removal don't take
into account if the SSTable requires view building, so a Staging SSTable can
be added to the tracker after a ALTER table, or removed after a TRUNCATE,
even though not added previously, potentially causing the backlog to
become negative.

Fixes #6798.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200716180737.944269-1-raphaelsc@scylladb.com>
(cherry picked from commit b67066cae2)
2020-07-21 12:57:28 +03:00
Asias He
cd4502ee64 repair: Relax size check of get_row_diff and set_diff
In case a row hash conflict, a hash in set_diff will get more than one
row from get_row_diff.

For example,

Node1 (Repair master):
row1  -> hash1
row2  -> hash2
row3  -> hash3
row3' -> hash3

Node2 (Repair follower):
row1  -> hash1
row2  -> hash2

We will have set_diff = {hash3} between node1 and node2, while
get_row_diff({hash3}) will return two rows: row3 and row3'. And the
error below was observed:

   repair - Got error in row level repair: std::runtime_error
   (row_diff.size() != set_diff.size())

In this case, node1 should send both row3 and row3' to peer node
instead of fail the whole repair. Because node2 does not have row3 or
row3', otherwise node1 won't send row with hash3 to node1 in the first
place.

Refs: #6252
(cherry picked from commit a00ab8688f)
2020-07-15 14:49:09 +03:00
Hagit Segev
3e6c6d5f58 release: prepare for 4.1.2 2020-07-14 23:56:02 +03:00
Avi Kivity
564b4c32b0 Update seastar submodule
* seastar 78f626af6c...c9c1dc5fa7 (2):
  > futures: Add a test for a broken promise in a parallel_for_each
  > future: Call set_to_broken_promise earlier

Fixes #6749 (probably).
2020-07-13 20:17:54 +03:00
Dmitry Kropachev
dfafc4e1a9 dist/common/scripts/scylla-housekeeping: wrap urllib.request with try ... except
We could hit "cannot serialize '_io.BufferedReader' object" when request get 404 error from the server
	Now you will get legit error message in the case.

	Fixes #6690

(cherry picked from commit de82b3efae)
2020-07-09 18:25:16 +03:00
Dejan Mircevski
db286c5ca4 cql/restrictions: Handle WHERE a>0 AND a<0
WHERE clauses with start point above the end point were handled
incorrectly.  When the slice bounds are transformed to interval
bounds, the resulting interval is interpreted as wrap-around (because
start > end), so it contains all values above 0 and all values below
0.  This is clearly incorrect, as the user's intent was to filter out
all possible values of a.

Fix it by explicitly short-circuiting to false when start > end.  Add
a test case.

Fixes #5799.

Tests: unit (dev)

Signed-off-by: Dejan Mircevski <dejan@scylladb.com>
(cherry picked from commit 921dbd0978)
2020-07-08 13:21:00 +03:00
Botond Dénes
519fcd4729 db/view: view_update_generator: re-balance wait/signal on the register semaphore
The view update generator has a semaphore to limit concurrency. This
semaphore is waited on in `register_staging_sstable()` and later the
unit is returned after the sstable is processed in the loop inside
`start()`.
This was broken by 4e64002, which changed the loop inside `start()` to
process sstables in per table batches, however didn't change the
`signal()` call to return the amount of units according to the number of
sstables processed. This can cause the semaphore units to dry up, as the
loop can process multiple sstables per table but return just a single
unit. This can also block callers of `register_staging_sstable()`
indefinitely as some waiters will never be released as under the right
circumstances the units on the semaphore can permanently go below 0.
In addition to this, 4e64002 introduced another bug: table entries from
the `_sstables_with_tables` are never removed, so they are processed
every turn. If the sstable list is empty, there won't be any update
generated but due to the unconditional `signal()` described above, this
can cause the units on the semaphore to grow to infinity, allowing
future staging sstables producers to register a huge amount of sstables,
causing memory problems due to the amount of sstable readers that have
to be opened (#6603, #6707).
Both outcomes are equally bad. This patch fixes both issues and modifies
the `test_view_update_generator` unit test to reproduce them and hence
to verify that this doesn't happen in the future.

Fixes: #6774
Refs: #6707
Refs: #6603

Tests: unit(dev)
Signed-off-by: Botond Dénes <bdenes@scylladb.com>
Message-Id: <20200706135108.116134-1-bdenes@scylladb.com>
(cherry picked from commit 5ebe2c28d1)
2020-07-08 12:00:12 +03:00
Juliusz Stasiewicz
9bcbcbbcf2 counters: Read the state under timeout
Counter update is a RMW operation. Until now the "Read" part was
not guarded by a timeout, which is changed in this patch.

Fixes #5069

(cherry picked from commit e04fd9f774)
2020-07-07 20:45:01 +03:00
Takuya ASADA
c622e5bfab scylla_setup: don't add same disk device twice
We shouldn't accept adding same disk twice for RAID prompt.

Fixes #6711

(cherry picked from commit 835e76fdfc)
2020-07-07 13:08:22 +03:00
Nadav Har'El
905643bbc2 docker: add option to start Alternator with HTTPS
We already have a docker image option to enable alternator on an unencrypted
port, "--alternator-port", but we forgot to also allow the similar option
for enabling alternator on an encrypted (HTTPS) port: "--alternator-https-port"
so this patch adds the missing option, and documents how to use it.

Note that using this option is not enough. When this option is used,
Alternator also requires two files, /etc/scylla/scylla.crt and
/etc/scylla/scylla.key, to be inserted into the image. These files should
contain the SSL certificate, and key, respectively. If these files are
missing, you will get an error in the log about the missing file.

Fixes #6583.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200621125219.12274-1-nyh@scylladb.com>
(cherry picked from commit e4eca5211a)
2020-07-06 08:22:22 +02:00
Juliusz Stasiewicz
d396a298d6 cdc: Fix segfault when stream ID key is too short
When a token is calculated for stream_id, we check that the key is
exactly 16 bytes long. If it's not - `minimum_token` is returned
and client receives empty result.

This used to be the expected behavior for empty keys; now it's
extended to keys of any incorrect length.

Fixes #6570

(cherry picked from commit 8628ede009)
2020-07-05 15:09:44 +03:00
Asias He
1d9bbbc957 boot_strapper: Ignore node to be replaced explicitly as stream source
After commit 7d86a3b208 (storage_service:
Make replacing node take writes), during replace operation, tokens in
_token_metadata for node being replaced are updated only after the replace
operation is finished. As a result, in range_streamer::add_ranges, the
node being replaced will be considered as a source to stream data from.

Before commit 7d86a3b208, the node being
replaced will not be considered as a source node because it is already
replaced by the replacing node before the replace operation is finished.
This is the reason why it works in the past.

To fix, filter out the node being replaced as a source node explicitly.

Tests: replace_first_boot_test and replace_stopped_node_test
Backports: 4.1
Fixes: #6728
(cherry picked from commit e338028b7e22b0a80be7f80c337c52f958bfe1d7)
2020-07-01 14:35:28 +03:00
Raphael S. Carvalho
4f1878803e compaction: Fix the 2x disk space requirement in SSTable upgrade
SSTable upgrade is requiring 2x the space of input SSTables because
we aren't releasing references of the SSTables that were already
upgraded. So if we're upgrading 1TB, it means that up to 2TB may be
required for the upgrade operation to succeed.

That can be fixed by moving all input SSTables when rewrite_sstables()
asks for the set of SSTables to be compacted, so allowing their space
to be released as soon as there is no longer any ref to them.

Spotted while auditting code.

Fixes #6682.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20200619205701.92891-1-raphaelsc@scylladb.com>
(cherry picked from commit 52180f91d4)
2020-07-01 12:36:52 +03:00
Avi Kivity
c5e2fad1c8 Merge "Fix handling of decimals with negative scales" from Rafael
"
Before this series scylla would effectively infinite loop when, for
example, casting a decimal with a negative scale to float.

Fixes #6720
"

* 'espindola/fix-decimal-issue' of https://github.com/espindola/scylla:
  big_decimal: Add a test for a corner case
  big_decimal: Correctly handle negative scales
  big_decimal: Add a as_rational member function
  big_decimal: Move constructors out of line

(cherry picked from commit 3e2eeec83a)
2020-06-29 12:05:39 +03:00
Hagit Segev
abd0fa52c0 release: prepare for 4.1.1 2020-06-25 08:06:32 +03:00
Piotr Sarna
dfa464c35b alternator: fix propagating tags
Updating tags was erroneously done locally, which means that
the schema change was not propagated to other nodes.
The new code announces new schema globally.

Fixes #6513
Branches: 4.0,4.1
Tests: unit(dev)
       dtest(alternator_tests.AlternatorTest.test_update_condition_expression_and_write_isolation)
Message-Id: <3a816c4ecc33c03af4f36e51b11f195c231e7ce1.1592935039.git.sarna@scylladb.com>

(cherry picked from commit f4e8cfe03b)
2020-06-24 13:56:09 +03:00
Avi Kivity
be29b35c4b Merge 'range_streamer: Handle table of RF 1 in get_range_fetch_map' from Asias
"
After "Make replacing node take writes" series, with repair based node
operations disabled, we saw the replace operation fail like:

```
[shard 0] init - Startup failed: std::runtime_error (unable to find
sufficient sources for streaming range (9203926935651910749, +inf) in
keyspace system_auth)
```
The reason is the system_auth keyspace has default RF of 1. It is
impossible to find a source node to stream from for the ranges owned by
the replaced node.

In the past, the replace operation with keyspace of RF 1 passes, because
the replacing node calls token_metadata.update_normal_tokens(tokens,
ip_of_replacing_node) before streaming. We saw:

```
[shard 0] range_streamer - Bootstrap : keyspace system_auth range
(-9021954492552185543, -9016289150131785593] exists on {127.0.0.6}
```

Node 127.0.0.6 is the replacing node 127.0.0.5. The source node check in
range_streamer::get_range_fetch_map will pass if the source is the node
itself. However, it will not stream from the node itself. As a result,
the system_auth keyspace will not get any data.

After the "Make replacing node take writes" series, the replacing node
calls token_metadata.update_normal_tokens(tokens, ip_of_replacing_node)
after the streaming finishes. We saw:

```
[shard 0] range_streamer - Bootstrap : keyspace system_auth range
(-9049647518073030406, -9048297455405660225] exists on {127.0.0.5}
```

Since 127.0.0.5 was dead, the source node check failed, so the bootstrap
operation.

Ta fix, we ignore the table of RF 1 when it is unable to find a source
node to stream.

Fixes #6351
"

* asias-fix_bootstrap_with_rf_one_in_range_streamer:
  range_streamer: Handle table of RF 1 in get_range_fetch_map
  streaming: Use separate streaming reason for replace operation

(cherry picked from commit 9afd599d7c)
2020-06-23 13:53:03 +03:00
Asias He
97b7024c0c streaming: Do not send end of stream in case of error
Current sender sends stream_mutation_fragments_cmd::end_of_stream to
receiver when an error is received from a peer node. To be safe, send
stream_mutation_fragments_cmd::error instead of
stream_mutation_fragments_cmd::end_of_stream to prevent end_of_stream to
be written into the sstable when a partition is not closed yet.

In addition, use mutation_fragment_stream_validator to valid the
mutation fragments emitted from the reader, e.g., check if
partition_start and partition_end are paired when the reader is done. If
not, fail the stream session and send
stream_mutation_fragments_cmd::error instead of
stream_mutation_fragments_cmd::end_of_stream to isolate the problematic
sstables on the sender node.

Refs: #6478
(cherry picked from commit a521c429e1)
2020-06-23 12:47:35 +03:00
Alejo Sanchez
194ff1d226 lwt: validate before constructing metadata
LWT batches conditions can't span multiple tables.
This was detected in batch_statement::validate() called in ::prepare().
But ::cas_result_set_metadata() was built in the constructor,
causing a bitset assert/crash in a reported scenario.
This patch moves validate() to the constructor before building metadata.

Closes #6332

Tested with https://github.com/scylladb/scylla-dtest/pull/1465

[avi: adjust spelling of exception message to 4.1 spelling]

Signed-off-by: Alejo Sanchez <alejo.sanchez@scylladb.com>
(cherry picked from commit d1521e6721)
2020-06-21 18:20:41 +03:00
Gleb Natapov
b8f7fb35e1 cql transport: do not log broken pipe error when a client closes its side of a connection abruptly
Fixes #5661

Message-Id: <20200615075958.GL335449@scylladb.com>
(cherry picked from commit 7ca937778d)
2020-06-21 13:08:58 +03:00
Amnon Heiman
f7d53ff607 api/storage_service.cc: stream result of token_range
The get token range API can become big which can cause large allocation
and stalls.

This patch replace the implementation so it would stream the results
using the http stream capabilities instead of serialization and sending
one big buffer.

Fixes #6297

Signed-off-by: Amnon Heiman <amnon@scylladb.com>
(cherry picked from commit 7c4562d532)
2020-06-21 12:57:15 +03:00
Rafael Ávila de Espíndola
eb190643f8 configure: Reduce the dynamic linker path size
gdb has a SO_NAME_MAX_PATH_SIZE of 512, so we use that as the path
size.

Fixes: #6494

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
Message-Id: <20200528202741.398695-2-espindola@scylladb.com>
(cherry picked from commit aa778ec152)
2020-06-21 12:26:51 +03:00
Piotr Sarna
3f8345f1b8 alternator: fix the return type of PutItem
Even if there are no attributes to return from PutItem requests,
we should return a valid JSON object, not an empty string.

Fixes #6568
Tests: unit(dev)

(cherry picked from commit 8fc3ca855e)
2020-06-21 12:21:19 +03:00
Piotr Sarna
891a3fa243 alternator: fix returning UnprocessedKeys unconditionally
Client libraries (e.g. PynamoDB) expect the UnprocessedKeys
and UnprocessedItems attributes to appear in the response
unconditionally - it's hereby added, along with a simple test case.

Fixes #6569
Tests: unit(dev)

(cherry picked from commit 3aff52f56e)
2020-06-21 12:19:18 +03:00
Tomasz Grabiec
db31542805 row_cache: Fix undefined behavior on key linearization
This is relevant only when using partition or clustering keys which
have a representation in memory which is larger than 12.8 KB (10% of
LSA segment size).

There are several places in code (cache, background garbage
collection) which may need to linearize keys because of performing key
comparison, but it's not done safely:

 1) the code does not run with the LSA region locked, so pointers may
get invalidated on linearization if it needs to reclaim memory. This
is fixed by running the code inside an allocating section.

 2) LSA region is locked, but the scope of
with_linearized_managed_bytes() encloses the allocating section. If
allocating section needs to reclaim, linearization context will
contain invalidated pointers. The fix is to reorder the scopes so
that linearization context lives within an allocating section.

Example of 1 can be found in
range_populating_reader::handle_end_of_stream() where it performs a
lookup:

  auto prev = std::prev(it);
  if (prev->key().equal(*_cache._schema, *_last_key->_key)) {
     it->set_continuous(true);

but handle_end_of_stream() is not invoked under allocating section.

Example of 2 can be found in mutation_cleaner_impl::merge_some() where
it does:

  return with_linearized_managed_bytes([&] {
  ...
    return _worker_state->alloc_section(region, [&] {

Fixes #6637.
Refs #6108.

Tests:

  - unit (all)

Message-Id: <1592218544-9435-1-git-send-email-tgrabiec@scylladb.com>
(cherry picked from commit e81fc1f095)
2020-06-21 11:56:31 +03:00
Yaron Kaikov
b443b2574a release: prepare for 4.1.0 2020-06-18 14:42:57 +03:00
Asias He
2ee321d88e gossip: Do not send shutdown message when a node is in unknown status
When a replacing node is in early boot up and is not in HIBERNATE sate
yet, if the node is killed by a user, the node will wrongly send a
shutdown message to other nodes. This is because UNKNOWN is not in
SILENT_SHUTDOWN_STATES, so in gossiper::do_stop_gossiping, the node will
send shutdown message. Other nodes in the cluster will call
storage_service::handle_state_normal for this node, since NORMAL and
SHUTDOWN status share the same status handler. As a result, other nodes
will incorrectly think the node is part of the cluster and the replace
operation is finished.

Such problem was seen in replace_node_no_hibernate_state_test dtest:

   n1, n2 are in the cluster
   n2 is dead
   n3 is started to replace n2, but n3 is killed in the middle
   n3 announces SHUTDOWN status wrongly
   n1 runs storage_service::handle_state_normal for n3
   n1 get tokens for n3 which is empty, because n3 hasn't gossip tokens yet
   n1 skips update normal tokens for n3,  but think n3 has replaced n2
   n4 starts to replace n2
   n4 checks the tokens for n2 in storage_service::join_token_ring (Cannot
      replace token {} which does not exist!) or
      storage_service::prepare_replacement_info (Cannot replace_address {}
      because it doesn't exist in gossip)

To fix, we add UNKNOWN into SILENT_SHUTDOWN_STATES and avoid sending
shutdown message.

Tests: replace_address_test.py:TestReplaceAddress.replace_node_no_hibernate_state_test
Fixes: #6436
(cherry picked from commit dddde33512)
2020-06-16 15:03:48 +03:00
Avi Kivity
4563f4b992 tools: toolchain: regenerate for gnutls 3.6.14
CVE-2020-13777.

Fixes #6627.

Toolchain source image registry disambiguated due to tighter podman defaults.
2020-06-15 07:49:21 +03:00
Kamil Braun
81dc8eeec7 cdc: rename CDC description tables
Commit 968177da04 has changed the schema
of cdc_topology_description and cdc_description tables in the
system_distributed keyspace.

Unfortunately this was a backwards-incompatible change: these tables
would always be created, irrespective of whether or not "experimental"
was enabled. They just wouldn't be populated with experimental=off.

If the user now tries to upgrade Scylla from a version before this change
to a version after this change, it will work as long as CDC is protected
b the experimental flag and the flag is off.

However, if we drop the flag, or if the user turns experimental on,
weird things will happen, such as nodes refusing to start because they
try to populate cdc_topology_description while assuming a different schema
for this table.

The simplest fix for this problem is to rename the tables. This fix must
get merged in before CDC goes out of experimental.
If the user upgrades his cluster from a pre-rename version, he will simply
have two garbage tables that he is free to delete after upgrading.

sstables and digests need to be regenerated for schema_digest_test since
this commit effectively adds new tables to the system_distributed keyspace.
This doesn't result in schema disagreement because the table is
announced to all nodes through the migration manager.

(cherry picked from commit d89b7a0548)
Fixes #6537.
2020-06-14 09:15:36 +03:00
Raphael S. Carvalho
2d72f7d8e5 compaction: Disable garbage collected writer if interposer consumer is used
GC writer, used for incremental compaction, cannot be currently used if interposer
consumer is used. That's because compaction assumes that GC writer will be operated
only by a single compaction writer at a given point in time.
With interposer consumer, multiple writers will concurrently operate on the same
GC writer, leading to race condition which potentially result in use-after-free.

Let's disable GC writer if interposer consumer is enabled. We're not losing anything
because GC writer is currently only needed on strategies which don't implement an
interposer consumer. Resharding will always disable GC writer, which is the expected
behavior because it doesn't support incremental compaction yet.
The proper fix, which allows GC writer and interposer consumer to work together,
will require more time to implement and test, and for that reason, I am postponing
it as #6472 is a showstopper for the current release.

Fixes #6472.

tests: mode(dev).

[Raphael: Fixed compilation failure in unit test test_bug_6472 for backport]

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Reviewed-by: Glauber Costa <glauber@scylladb.com>
(cherry picked from commit 097a5e9e07)
Message-Id: <20200610203928.86717-1-raphaelsc@scylladb.com>
2020-06-11 13:21:56 +03:00
Takuya ASADA
c6ee86b512 aws: update enhanced networking supported instance list
Sync enhanced networking supported instance list to latest one.

Reference: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html

Fixes #6540

(cherry picked from commit 969c4258cf)
2020-06-09 16:02:09 +03:00
Hagit Segev
67348cd6e8 release: prepare for 4.1.rc2 2020-06-08 16:37:36 +03:00
Israel Fruchter
44cc4843f1 fix "scylla_coredump_setup: Remove the coredump create by the check"
In 28c3d4 `out()` was used without `shell=True` and was the spliting of arguments
failed cause of the complex commands in the cmd (pipe and such)

Fixes #6159

(cherry picked from commit a2bb48f44b)
2020-06-04 20:54:51 +03:00
Israel Fruchter
f1f5586bf6 scylla_coredump_setup: Remove the coredump create by the check
We generate a coredump as part of "scylla_coredump_setup" to verify that
coredumps are working. However, we need to *remove* that test coredump
to avoid people and test infrastructure reporting those coredumps.

Fixes #6159

(cherry picked from commit 28c3d4f8e8)
2020-06-03 16:52:51 +03:00
Amos Kong
3a447cd755 active the coredump directory mount during coredump setup
Currently we use a systemd mount (var-lib-systemd-coredump.mount) to mount
default coredump directory (/var/lib/systemd/coredump) to
(/var/lib/scylla/coredump). The /var/lib/scylla had been mounted to a big
storage, so we will have enough space for coredump after the mount.

Currently in coredump_setup, we only enabled var-lib-systemd-coredump.mount,
but not start it. The directory won't be mounted after coredump_setup, so the
coredump will still be saved to default coredump directory.
The mount will only effect after reboot.

Fixes #6566

(cherry picked from commit abf246f6e5)
2020-06-03 09:25:59 +03:00
Pekka Enberg
176aa91be5 Revert "scylla_coredump_setup: Fix incorrect coredump directory mount"
This reverts commit e77dad3adf because its
incorrect.

Amos explains:

"Quote from https://www.freedesktop.org/software/systemd/man/systemd.mount.html

 What=

   Takes an absolute path of a device node, file or other resource to
   mount. See mount(8) for details. If this refers to a device node, a
   dependency on the respective device unit is automatically created.

 Where=

   Takes an absolute path of a file or directory for the mount point; in
   particular, the destination cannot be a symbolic link. If the mount
   point does not exist at the time of mounting, it is created as
   directory.

 So the mount point is '/var/lib/systemd/coredump' and
 '/var/lib/scylla/coredump' is the file to mount, because /var/lib/scylla
 had mounted a second big storage, which has enough space for Huge
 coredumps.

 Bentsi or other touched problem with old scylla-master AMI, a coredump
 occurred but not successfully saved to disk for enospc.  The directory
 /var/lib/systemd/coredump wasn't mounted to /var/lib/scylla/coredump.
 They WRONGLY thought the wrong mount was caused by the config problem,
 so he posted a fix.

 Actually scylla-ami-setup / coredump wasn't executed on that AMI, err:
 unit scylla-ami-setup.service not found Because
 'scylla-ami-setup.service' config file doesn't exist or is invalid.

 Details of my testing: https://github.com/scylladb/scylla/issues/6300#issuecomment-637324507

 So we need to revert Bentsi's patch, it changed the right config to wrong."

(cherry picked from commit 9d9d54c804)
2020-06-03 09:25:49 +03:00
Avi Kivity
4a3eff17ff Revert "Revert "config: Do not enable repair based node operations by default""
This reverts commit 71d0d58f8c. Repair-based
node operations are still not ready.
2020-06-02 18:08:03 +03:00
Nadav Har'El
2e00f6d0a1 alternator: fix support for bytes type in Query's KeyConditions
Our parsing of values in a KeyConditions paramter of Query was done naively.
As a result, we got bizarre error messages "condition not met: false" when
these values had incorrect type (this is issue #6490). Worse - the naive
conversion did not decode base64-encoded bytes value as needed, so
KeyConditions on bytes-typed keys did not work at all.

This patch fixes these bugs by using our existing utility function
get_key_from_typed_value(), which takes care of throwing sensible errors
when types don't match, and decoding base64 as needed.

Unfortunately, we didn't have test coverage for many of the KeyConditions
features including bytes keys, which is why this issue escaped detection.
A patch will follow with much more comprehensive tests for KeyConditions,
which also reproduce this issue and verify that it is fixed.

Refs #6490
Fixes #6495

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200524141800.104950-1-nyh@scylladb.com>
(cherry picked from commit 6b38126a8f)
2020-05-31 13:53:45 +03:00
Nadav Har'El
bf509c3b16 alternator: add mandatory configurable write isolation mode
Alternator supports four ways in which write operations can use quorum
writes or LWT or both, which we called "write isolation policies".

Until this patch, Alternator defaulted to the most generally safe policy,
"always_use_lwt". This default could have been overriden for each table
separately, but there was no way to change this default for all tables.
This patch adds a "--alternator-write-isolation" configuration option which
allows changing the default.

Moreover, @dorlaor asked that users must *explicitly* choose this default
mode, and not get "always_use_lwt" without noticing. The previous default,
"always_use_lwt" supports any workload correctly but because it uses LWT
for all writes it may be disappointingly slow for users who run write-only
workloads (including most benchmarks) - such users might find the slow
writes so disappointing that they will drop Scylla. Conversely, a default
of "forbid_rmw" will be faster and still correct, but will fail on workloads
which need read-modify-write operations - and suprise users that need these
operations. So Dor asked that that *none* of the write modes be made the
default, and users must make an informed choice between the different write
modes, rather than being disappointed by a default choice they weren't
aware of.

So after this patch, Scylla refuses to boot if Alternator is enabled but
a "--alternator-write-isolation" option is missing.

The patch also modifies the relevant documentation, adds the same option to
our docker image, and the modifies the test-running script
test/alternator/run to run Scylla with the old default mode (always_use_lwt),
which we need because we want to test RMW operations as well.

Fixes #6452

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200524160338.108417-1-nyh@scylladb.com>
(cherry picked from commit c3da9f2bd4)
2020-05-31 13:42:11 +03:00
Avi Kivity
84ef30752f Update seastar submodule
* seastar e708d1df3a...78f626af6c (1):
  > reactor: don't mlock all memory at once

Fixes #6460.
2020-05-31 13:34:42 +03:00
Avi Kivity
f1b71ec216 Point seastar submodule at scylla-seastar.git
This allows us to backport seastar patches to the 4.1 branch.
2020-05-31 13:34:42 +03:00
Piotr Sarna
93ed536fba alternator: wait for schema agreement after table creation
In order to be sure that all nodes acknowledged that a table was
created, the CreateTable request will now only return after
seeing that schema agreement was reached.
Rationale: alternator users check if the table was created by issuing
a DescribeTable request, and assume that the table was correctly
created if it returns nonempty results. However, our current
implementation of DescribeTable returns local results, which is
not enough to judge if all the other nodes acknowledge the new table.
CQL drivers are reported to always wait for schema agreement after
issuing DDL-changing requests, so there should be no harm in waiting
a little longer for alternator's CreateTable as well.

Fixes #6361
Tests: alternator(local)

(cherry picked from commit 5f2eadce09)
2020-05-31 13:18:11 +03:00
Nadav Har'El
ab3da4510c docs, alternator: improve description of status of global tables support
The existing text did not explain what happens if additional DCs are added
to the cluster, so this patch improves the explanation of the status of
our support for global tables, including that issue.

Fixes #6353

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200513175908.21642-1-nyh@scylladb.com>
(cherry picked from commit f3fd976120)
2020-05-31 13:13:13 +03:00
Asias He
bb8fcbff68 repair: Abort the queue in write_end_of_stream in case of error
In write_end_of_stream, it does:

1) Write write_partition_end
2) Write empty mutation_fragment_opt

If 1) fails, 2) will be skipped, the consumer of the queue will wait for
the empty mutation_fragment_opt forever.

Found this issue when injecting random exceptions between 1) and 2).

Refs #6272
Refs #6248

(cherry picked from commit b744dba75a)
2020-05-27 20:11:30 +03:00
Hagit Segev
af43d0c62d release: prepare for 4.1.rc1 2020-05-26 18:57:30 +03:00
Amnon Heiman
8c8c266f67 storage_service: get_range_to_address_map prevent use after free
The implementation of get_range_to_address_map has a default behaviour,
when getting an empty keypsace, it uses the first non-system keyspace
(first here is basically, just a keyspace).

The current implementation has two issues, first, it uses a reference to
a string that is held on a stack of another function. In other word,
there's a use after free that is not clear why we never hit.

The second, it calls get_non_system_keyspaces twice. Though this is not
a bug, it's redundant (get_non_system_keyspaces uses a loop, so calling
that function does have a cost).

This patch solves both issues, by chaning the implementation to hold a
string instead of a reference to a string.

Second, it stores the results from get_non_system_keyspaces and reuse
them it's more efficient and holds the returned values on the local
stack.

Fixes #6465

Signed-off-by: Amnon Heiman <amnon@scylladb.com>
(cherry picked from commit 69a46d4179)
2020-05-25 12:48:11 +03:00
Nadav Har'El
6d1301d93c alternator: better error messages when 'forbid_rmw' mode is on
When the 'forbid_rmw' write isolation policy is selected, read-modify-write
are intentionally forbidden. The error message in this case used to say:

	"Read-modify-write operations not supported"

Which can lead users to believe that this operation isn't supported by this
version of Alternator - instead of realizing that this is in fact a
configurable choice.

So in this patch we just change the error message to say:

	"Read-modify-write operations are disabled by 'forbid_rmw' write isolation policy. Refer to https://github.com/scylladb/scylla/blob/master/docs/alternator/alternator.md#write-isolation-policies for more information."

Fixes #6421.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200518125538.8347-1-nyh@scylladb.com>
(cherry picked from commit 5ef9854e86)
2020-05-25 08:49:48 +03:00
Tomasz Grabiec
be545d6d5d sstables: index_reader: Fix overflow when calculating promoted index end
When index file is larger than 4GB, offset calculation will overflow
uint32_t and _promoted_index_end will be too small.

As a result, promoted_index_size calculation will underflow and the
rest of the page will be interpretd as a promoted index.

The partitions which are in the remainder of the index page will not
be found by single-partition queries.

Data is not lost.

Introduced in 6c5f8e0eda.

Fixes #6040
Message-Id: <20200521174822.8350-1-tgrabiec@scylladb.com>

(cherry picked from commit a6c87a7b9e)
2020-05-24 09:45:42 +03:00
Rafael Ávila de Espíndola
a1c15f0690 repair: Make sure sinks are always closed
In a recent next failure I got the following backtrace

    function=function@entry=0x270360 "seastar::rpc::sink_impl<Serializer, Out>::~sink_impl() [with Serializer = netw::serializer; Out = {repair_row_on_wire_with_cmd}]") at assert.c:101
    at ./seastar/include/seastar/core/shared_ptr.hh:463
    at repair/row_level.cc:2059

This patch changes a few functions to use finally to make sure the sink
is always closed.

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
Message-Id: <20200515202803.60020-1-espindola@scylladb.com>
(cherry picked from commit 311fbe2f0a)

Ref #6414
2020-05-20 09:00:10 +03:00
Asias He
4d68c53389 repair: Fix race between write_end_of_stream and apply_rows
Consider: n1, n2, n1 is the repair master, n2 is the repair follower.

=== Case 1 ===
1) n1 sends missing rows {r1, r2} to n2
2) n2 runs apply_rows_on_follower to apply rows, e.g., {r1, r2}, r1
   is written to sstable, r2 is not written yet, r1 belongs to
   partition 1, r2 belongs to partition 2. It yields after row r1 is
   written.
   data: partition_start, r1
3) n1 sends repair_row_level_stop to n2 because error has happened on n1
4) n2 calls wait_for_writer_done() which in turn calls write_end_of_stream()
   data: partition_start, r1, partition_end
5) Step 2 resumes to apply the rows.
   data: partition_start, r1, partition_end, partition_end, partition_start, r2

=== Case 2 ===
1) n1 sends missing rows {r1, r2} to n2
2) n2 runs apply_rows_on_follower to apply rows, e.g., {r1, r2}, r1
   is written to sstable, r2 is not written yet, r1 belongs to partition
   1, r2 belongs to partition 2. It yields after partition_start for r2
   is written but before _partition_opened is set to true.
   data: partition_start, r1, partition_end, partition_start
3) n1 sends repair_row_level_stop to n2 because error has happened on n1
4) n2 calls wait_for_writer_done() which in turn calls write_end_of_stream().
   Since _partition_opened[node_idx] is false, partition_end is skipped,
   end_of_stream is written.
   data: partition_start, r1, partition_end, partition_start, end_of_stream

This causes unbalanced partition_start and partition_end in the stream
written to sstables.

To fix, serialize the write_end_of_stream and apply_rows with a semaphore.

Fixes: #6394
Fixes: #6296
Fixes: #6414
(cherry picked from commit b2c4d9fdbc)
2020-05-20 08:07:53 +03:00
Piotr Dulikowski
7d1f352be2 hinted handoff: don't keep positions of old hints in rps_set
When sending hints from one file, rps_set field in send_one_file_ctx
keeps track of commitlog positions of hints that are being currently
sent, or have failed to be sent. At the end of the operation, if sending
of some hints failed, we will choose position of the earliest hint that
failed to be sent, and will retry sending that file later, starting from
that position. This position is stored in _last_not_complete_rp.

Usually, this set has a bounded size, because we impose a limit of at
most 128 hints being sent concurrently. Because we do not attempt to
send any more hints after a failure is detected, rps_set should not have
more than 128 elements at a time.

Due to a bug, commitlog positions of old hints (older than
gc_grace_seconds of the destination table) were inserted into rps_set
but not removed after checking their age. This could cause rps_set to
grow very large when replaying a file with old hints.

Moreover, if the file mixed expired and non-expired hints (which could
happen if it had hints to two tables with different gc_grace_seconds),
and sending of some non-expired hints failed, then positions of expired
hints could influence calculation _last_not_complete_rp, and more hints
than necessary would be resent on the next retry.

This simple patch removes commitlog position of a hint from rps_set when
it is detected to be too old.

Fixes #6422

(cherry picked from commit 85d5c3d5ee)
2020-05-20 08:05:51 +03:00
Piotr Dulikowski
0fe5335447 hinted handoff: remove discarded hint positions from rps_set
Related commit: 85d5c3d

When attempting to send a hint, an exception might occur that results in
that hint being discarded (e.g. keyspace or table of the hint was
removed).

When such an exception is thrown, position of the hint will already be
stored in rps_set. We are only allowed to retain positions of hints that
failed to be sent and needed to be retried later. Dropping a hint is not
an error, therefore its position should be removed from rps_set - but
current logic does not do that.

Because of that bug, hint files with many discardable hints might cause
rps_set to grow large when the file is replayed. Furthermore, leaving
positions of such hints in rps_set might cause more hints than necessary
to be re-sent if some non-discarded hints fail to be sent.

This commit fixes the problem by removing positions of discarded hints
from rps_set.

Fixes #6433

(cherry picked from commit 0c5ac0da98)
2020-05-20 08:03:20 +03:00
Avi Kivity
8a026b8b14 Revert "compaction_manager: allow early aborts through abort sources."
This reverts commit e8213fb5c3. It results
in an assertion failure in remove_index_file_test.

Fixes #6413.

(cherry picked from commit 5b971397aa)
2020-05-13 18:26:34 +03:00
Yaron Kaikov
0760107b9f release: prepare for 4.1.rc0 2020-05-11 11:32:01 +03:00
463 changed files with 9344 additions and 18238 deletions

9
.gitmodules vendored
View File

@@ -9,12 +9,9 @@
[submodule "libdeflate"]
path = libdeflate
url = ../libdeflate
[submodule "zstd"]
path = zstd
url = ../zstd
[submodule "abseil"]
path = abseil
url = ../abseil-cpp
[submodule "scylla-jmx"]
path = scylla-jmx
url = ../scylla-jmx
[submodule "scylla-tools"]
path = scylla-tools
url = ../scylla-tools-java

View File

@@ -134,11 +134,15 @@ add_executable(scylla
${SEASTAR_SOURCE_FILES}
${SCYLLA_SOURCE_FILES})
# Note that since CLion does not undestand GCC6 concepts, we always disable them (even if users configure otherwise).
# CLion seems to have trouble with `-U` (macro undefinition), so we do it this way instead.
list(REMOVE_ITEM SEASTAR_CFLAGS "-DHAVE_GCC6_CONCEPTS")
# If the Seastar pkg-config information is available, append to the default flags.
#
# For ease of browsing the source code, we always pretend that DPDK is enabled.
target_compile_options(scylla PUBLIC
-std=gnu++20
-std=gnu++1z
-DHAVE_DPDK
-DHAVE_HWLOC
"${SEASTAR_CFLAGS}")

View File

@@ -8,4 +8,4 @@ Please use the [Issue Tracker](https://github.com/scylladb/scylla/issues/) to re
# Contributing Code to Scylla
To contribute code to Scylla, you need to sign the [Contributor License Agreement](https://www.scylladb.com/open-source/contributor-agreement/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
To contribute code to Scylla, you need to sign the [Contributor License Agreement](http://www.scylladb.com/opensource/cla/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.

View File

@@ -18,35 +18,23 @@ $ git submodule update --init --recursive
### Dependencies
Scylla is fairly fussy about its build environment, requiring a very recent
version of the C++20 compiler and numerous tools and libraries to build.
Scylla depends on the system package manager for its development dependencies.
Run `./install-dependencies.sh` (as root) to use your Linux distributions's
package manager to install the appropriate packages on your build machine.
However, this will only work on very recent distributions. For example,
currently Fedora users must upgrade to Fedora 32 otherwise the C++ compiler
will be too old, and not support the new C++20 standard that Scylla uses.
Running `./install-dependencies.sh` (as root) installs the appropriate packages based on your Linux distribution.
Alternatively, to avoid having to upgrade your build machine or install
various packages on it, we provide another option - the **frozen toolchain**.
This is a script, `./tools/toolchain/dbuild`, that can execute build or run
commands inside a Docker image that contains exactly the right build tools and
libraries. The `dbuild` technique is useful for beginners, but is also the way
in which ScyllaDB produces official releases, so it is highly recommended.
On Ubuntu and Debian based Linux distributions, some packages
required to build Scylla are missing in the official upstream:
To use `dbuild`, you simply prefix any build or run command with it. Building
and running Scylla becomes as easy as:
- libthrift-dev and libthrift
- antlr3-c++-dev
```bash
$ ./tools/toolchain/dbuild ./configure.py
$ ./tools/toolchain/dbuild ninja build/release/scylla
$ ./tools/toolchain/dbuild ./build/release/scylla --developer-mode 1
```
Try running ```sudo ./scripts/scylla_current_repo``` to add Scylla upstream,
and get the missing packages from it.
### Build system
**Note**: Compiling Scylla requires, conservatively, 2 GB of memory per native
thread, and up to 3 GB per native thread while linking. GCC >= 10 is
thread, and up to 3 GB per native thread while linking. GCC >= 8.1.1. is
required.
Scylla is built with [Ninja](https://ninja-build.org/), a low-level rule-based system. A Python script, `configure.py`, generates a Ninja file (`build.ninja`) based on configuration options.

View File

@@ -2,24 +2,22 @@
## Quick-start
Scylla is fairly fussy about its build environment, requiring very recent
versions of the C++20 compiler and of many libraries to build. The document
[HACKING.md](HACKING.md) includes detailed information on building and
developing Scylla, but to get Scylla building quickly on (almost) any build
machine, Scylla offers offers a [frozen toolchain](tools/toolchain/README.md),
This is a pre-configured Docker image which includes recent versions of all
the required compilers, libraries and build tools. Using the frozen toolchain
allows you to avoid changing anything in your build machine to meet Scylla's
requirements - you just need to meet the frozen toolchain's prerequisites
(mostly, Docker or Podman being available).
Building and running Scylla with the frozen toolchain is as easy as:
To get the build going quickly, Scylla offers a [frozen toolchain](tools/toolchain/README.md)
which would build and run Scylla using a pre-configured Docker image.
Using the frozen toolchain will also isolate all of the installed
dependencies in a Docker container.
Assuming you have met the toolchain prerequisites, which is running
Docker in user mode, building and running is as easy as:
```bash
$ ./tools/toolchain/dbuild ./configure.py
$ ./tools/toolchain/dbuild ninja build/release/scylla
$ ./tools/toolchain/dbuild ./build/release/scylla --developer-mode 1
```
```
Please see [HACKING.md](HACKING.md) for detailed information on building and developing Scylla.
**Note**: GCC >= 8.1.1 is required to compile Scylla.
## Running Scylla
@@ -69,20 +67,15 @@ The courses are free, self-paced and include hands-on examples. They cover a var
administration, architecture, basic NoSQL concepts, using drivers for application development, Scylla setup, failover, compactions,
multi-datacenters and how Scylla integrates with third-party applications.
## Building a CentOS-based Docker image
## Building Fedora-based Docker image
Build a Docker image with:
```
cd dist/docker/redhat
cd dist/docker
docker build -t <image-name> .
```
This build is based on executables downloaded from downloads.scylladb.com,
**not** on the executables built in this source directory. See further
instructions in dist/docker/redhat/README.md to build a docker image from
your own executables.
Run the image with:
```

View File

@@ -1,7 +1,7 @@
#!/bin/sh
PRODUCT=scylla
VERSION=4.2.4
VERSION=4.1.11
if test -f version
then

View File

@@ -1,26 +0,0 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "absl-flat_hash_map.hh"
size_t sstring_hash::operator()(std::string_view v) const noexcept {
return absl::Hash<std::string_view>{}(v);
}

View File

@@ -1,47 +0,0 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <absl/container/flat_hash_map.h>
#include <seastar/core/sstring.hh>
using namespace seastar;
struct sstring_hash {
using is_transparent = void;
size_t operator()(std::string_view v) const noexcept;
};
struct sstring_eq {
using is_transparent = void;
bool operator()(std::string_view a, std::string_view b) const noexcept {
return a == b;
}
};
template <typename K, typename V, typename... Ts>
struct flat_hash_map : public absl::flat_hash_map<K, V, Ts...> {
};
template <typename V>
struct flat_hash_map<sstring, V>
: public absl::flat_hash_map<sstring, V, sstring_hash, sstring_eq> {};

View File

@@ -77,7 +77,7 @@ std::string base64_encode(bytes_view in) {
return ret;
}
static std::string base64_decode_string(std::string_view in) {
bytes base64_decode(std::string_view in) {
int i = 0;
int8_t chunk4[4]; // chunk of input, each byte converted to 0..63;
std::string ret;
@@ -104,42 +104,8 @@ static std::string base64_decode_string(std::string_view in) {
if (i==3)
ret += ((chunk4[1] & 0xf) << 4) + ((chunk4[2] & 0x3c) >> 2);
}
return ret;
}
bytes base64_decode(std::string_view in) {
// FIXME: This copy is sad. The problem is we need back "bytes"
// but "bytes" doesn't have efficient append and std::string.
// To fix this we need to use bytes' "uninitialized" feature.
std::string ret = base64_decode_string(in);
return bytes(ret.begin(), ret.end());
}
static size_t base64_padding_len(std::string_view str) {
size_t padding = 0;
padding += (!str.empty() && str.back() == '=');
padding += (str.size() > 1 && *(str.end() - 2) == '=');
return padding;
}
size_t base64_decoded_len(std::string_view str) {
return str.size() / 4 * 3 - base64_padding_len(str);
}
bool base64_begins_with(std::string_view base, std::string_view operand) {
if (base.size() < operand.size() || base.size() % 4 != 0 || operand.size() % 4 != 0) {
return false;
}
if (base64_padding_len(operand) == 0) {
return base.starts_with(operand);
}
const std::string_view unpadded_base_prefix = base.substr(0, operand.size() - 4);
const std::string_view unpadded_operand = operand.substr(0, operand.size() - 4);
if (unpadded_base_prefix != unpadded_operand) {
return false;
}
// Decode and compare last 4 bytes of base64-encoded strings
const std::string base_remainder = base64_decode_string(base.substr(operand.size() - 4, operand.size()));
const std::string operand_remainder = base64_decode_string(operand.substr(operand.size() - 4));
return base_remainder.starts_with(operand_remainder);
}

View File

@@ -32,7 +32,3 @@ bytes base64_decode(std::string_view);
inline bytes base64_decode(const rjson::value& v) {
return base64_decode(std::string_view(v.GetString(), v.GetStringLength()));
}
size_t base64_decoded_len(std::string_view str);
bool base64_begins_with(std::string_view base, std::string_view operand);

View File

@@ -34,7 +34,7 @@
#include <boost/algorithm/cxx11/any_of.hpp>
#include "utils/overloaded_functor.hh"
#include "expressions.hh"
#include "expressions_eval.hh"
namespace alternator {
@@ -67,6 +67,49 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
return it->second;
}
static ::shared_ptr<cql3::restrictions::single_column_restriction::contains> make_map_element_restriction(const column_definition& cdef, std::string_view key, const rjson::value& value) {
bytes raw_key = utf8_type->from_string(sstring_view(key.data(), key.size()));
auto key_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_key)));
bytes raw_value = serialize_item(value);
auto entry_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_value)));
return make_shared<cql3::restrictions::single_column_restriction::contains>(cdef, std::move(key_value), std::move(entry_value));
}
static ::shared_ptr<cql3::restrictions::single_column_restriction::EQ> make_key_eq_restriction(const column_definition& cdef, const rjson::value& value) {
bytes raw_value = get_key_from_typed_value(value, cdef);
auto restriction_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_value)));
return make_shared<cql3::restrictions::single_column_restriction::EQ>(cdef, std::move(restriction_value));
}
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter) {
clogger.trace("Getting filtering restrictions for: {}", rjson::print(query_filter));
auto filtering_restrictions = ::make_shared<cql3::restrictions::statement_restrictions>(schema, true);
for (auto it = query_filter.MemberBegin(); it != query_filter.MemberEnd(); ++it) {
std::string_view column_name(it->name.GetString(), it->name.GetStringLength());
const rjson::value& condition = it->value;
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
comparison_operator_type op = get_comparison_operator(comp_definition);
if (op != comparison_operator_type::EQ) {
throw api_error("ValidationException", "Filtering is currently implemented for EQ operator only");
}
if (attr_list.Size() != 1) {
throw api_error("ValidationException", format("EQ restriction needs exactly 1 attribute value: {}", rjson::print(attr_list)));
}
if (const column_definition* cdef = schema->get_column_definition(to_bytes(column_name.data()))) {
// Primary key restriction
filtering_restrictions->add_restriction(make_key_eq_restriction(*cdef, attr_list[0]), false, true);
} else {
// Regular column restriction
filtering_restrictions->add_restriction(make_map_element_restriction(attrs_col, column_name, attr_list[0]), false, true);
}
}
return filtering_restrictions;
}
namespace {
struct size_check {
@@ -159,47 +202,36 @@ static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
}
// Check if two JSON-encoded values match with the BEGINS_WITH relation
bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
bool v1_from_query, bool v2_from_query) {
bool bad = false;
if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
if (v1_from_query) {
throw api_error("ValidationException", "begins_with() encountered malformed argument");
} else {
bad = true;
}
} else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
if (v1_from_query) {
throw api_error("ValidationException", format("begins_with supports only string or binary type, got: {}", *v1));
} else {
bad = true;
}
}
static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
// BEGINS_WITH requires that its single operand (v2) be a string or
// binary - otherwise it's a validation error. However, problems with
// the stored attribute (v1) will just return false (no match).
if (!v2.IsObject() || v2.MemberCount() != 1) {
if (v2_from_query) {
throw api_error("ValidationException", "begins_with() encountered malformed argument");
} else {
bad = true;
}
} else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
if (v2_from_query) {
throw api_error("ValidationException", format("begins_with() supports only string or binary type, got: {}", v2));
} else {
bad = true;
}
throw api_error("ValidationException", format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
}
if (bad) {
auto it2 = v2.MemberBegin();
if (it2->name != "S" && it2->name != "B") {
throw api_error("ValidationException", format("BEGINS_WITH operator requires String or Binary in AttributeValue, got {}", it2->name));
}
if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
return false;
}
auto it1 = v1->MemberBegin();
auto it2 = v2.MemberBegin();
if (it1->name != it2->name) {
return false;
}
if (it2->name == "S") {
return rjson::to_string_view(it1->value).starts_with(rjson::to_string_view(it2->value));
std::string_view val1(it1->value.GetString(), it1->value.GetStringLength());
std::string_view val2(it2->value.GetString(), it2->value.GetStringLength());
return val1.substr(0, val2.size()) == val2;
} else /* it2->name == "B" */ {
return base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
// TODO (optimization): Check the begins_with condition directly on
// the base64-encoded string, without making a decoded copy.
bytes val1 = base64_decode(it1->value);
bytes val2 = base64_decode(it2->value);
return val1.substr(0, val2.size()) == val2;
}
}
@@ -214,6 +246,11 @@ bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2) {
}
const auto& kv1 = *v1->MemberBegin();
const auto& kv2 = *v2.MemberBegin();
if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
throw api_error("ValidationException",
format("CONTAINS operator requires a single AttributeValue of type String, Number, or Binary, "
"got {} instead", kv2.name));
}
if (kv1.name == "S" && kv2.name == "S") {
return rjson::to_string_view(kv1.value).find(rjson::to_string_view(kv2.value)) != std::string_view::npos;
} else if (kv1.name == "B" && kv2.name == "B") {
@@ -296,38 +333,24 @@ static bool check_NOT_NULL(const rjson::value* val) {
return val != nullptr;
}
// Only types S, N or B (string, number or bytes) may be compared by the
// various comparion operators - lt, le, gt, ge, and between.
static bool check_comparable_type(const rjson::value& v) {
if (!v.IsObject() || v.MemberCount() != 1) {
return false;
}
const rjson::value& type = v.MemberBegin()->name;
return type == "S" || type == "N" || type == "B";
}
// Check if two JSON-encoded values match with cmp.
template <typename Comparator>
bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
bool v1_from_query, bool v2_from_query) {
bool bad = false;
if (!v1 || !check_comparable_type(*v1)) {
if (v1_from_query) {
throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
}
bad = true;
bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
if (!v2.IsObject() || v2.MemberCount() != 1) {
throw api_error("ValidationException",
format("{} requires a single AttributeValue of type String, Number, or Binary",
cmp.diagnostic));
}
if (!check_comparable_type(v2)) {
if (v2_from_query) {
throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
}
bad = true;
const auto& kv2 = *v2.MemberBegin();
if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
throw api_error("ValidationException",
format("{} requires a single AttributeValue of type String, Number, or Binary",
cmp.diagnostic));
}
if (bad) {
if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
return false;
}
const auto& kv1 = *v1->MemberBegin();
const auto& kv2 = *v2.MemberBegin();
if (kv1.name != kv2.name) {
return false;
}
@@ -341,8 +364,7 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
if (kv1.name == "B") {
return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
}
// cannot reach here, as check_comparable_type() verifies the type is one
// of the above options.
clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
return false;
}
@@ -373,71 +395,57 @@ struct cmp_gt {
static constexpr const char* diagnostic = "GT operator";
};
// True if v is between lb and ub, inclusive. Throws or returns false
// (depending on bounds_from_query parameter) if lb > ub.
// True if v is between lb and ub, inclusive. Throws if lb > ub.
template <typename T>
static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
if (cmp_lt()(ub, lb)) {
if (bounds_from_query) {
throw api_error("ValidationException",
format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
} else {
return false;
}
throw api_error("ValidationException",
format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
}
return cmp_ge()(v, lb) && cmp_le()(v, ub);
}
static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
bool v_from_query, bool lb_from_query, bool ub_from_query) {
if ((v && v_from_query && !check_comparable_type(*v)) ||
(lb_from_query && !check_comparable_type(lb)) ||
(ub_from_query && !check_comparable_type(ub))) {
throw api_error("ValidationException", "between allow only the types String, Number, or Binary");
}
if (!v || !v->IsObject() || v->MemberCount() != 1 ||
!lb.IsObject() || lb.MemberCount() != 1 ||
!ub.IsObject() || ub.MemberCount() != 1) {
static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
if (!v) {
return false;
}
if (!v->IsObject() || v->MemberCount() != 1) {
throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
}
if (!lb.IsObject() || lb.MemberCount() != 1) {
throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
}
if (!ub.IsObject() || ub.MemberCount() != 1) {
throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
}
const auto& kv_v = *v->MemberBegin();
const auto& kv_lb = *lb.MemberBegin();
const auto& kv_ub = *ub.MemberBegin();
bool bounds_from_query = lb_from_query && ub_from_query;
if (kv_lb.name != kv_ub.name) {
if (bounds_from_query) {
throw api_error("ValidationException",
throw api_error(
"ValidationException",
format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
kv_lb.name, kv_ub.name));
} else {
return false;
}
}
if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
return false;
}
if (kv_v.name == "N") {
const char* diag = "BETWEEN operator";
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
}
if (kv_v.name == "S") {
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
bounds_from_query);
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
}
if (kv_v.name == "B") {
return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
}
if (v_from_query) {
throw api_error("ValidationException",
format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
throw api_error("ValidationException",
format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
kv_lb.name));
} else {
return false;
}
}
// Verify one Expect condition on one attribute (whose content is "got")
@@ -484,19 +492,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
return check_NE(got, (*attribute_value_list)[0]);
case comparison_operator_type::LT:
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
case comparison_operator_type::LE:
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
return check_compare(got, (*attribute_value_list)[0], cmp_le{});
case comparison_operator_type::GT:
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
case comparison_operator_type::GE:
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
case comparison_operator_type::BEGINS_WITH:
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
case comparison_operator_type::IN:
verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
return check_IN(got, *attribute_value_list);
@@ -508,87 +516,56 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
return check_NOT_NULL(got);
case comparison_operator_type::BETWEEN:
verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
false, true, true);
return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
case comparison_operator_type::CONTAINS:
{
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
// Expected's "CONTAINS" has this artificial limitation.
// ConditionExpression's "contains()" does not...
const rjson::value& arg = (*attribute_value_list)[0];
const auto& argtype = (*arg.MemberBegin()).name;
if (argtype != "S" && argtype != "N" && argtype != "B") {
throw api_error("ValidationException",
format("CONTAINS operator requires a single AttributeValue of type String, Number, or Binary, "
"got {} instead", argtype));
}
return check_CONTAINS(got, arg);
}
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_CONTAINS(got, (*attribute_value_list)[0]);
case comparison_operator_type::NOT_CONTAINS:
{
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
// Expected's "NOT_CONTAINS" has this artificial limitation.
// ConditionExpression's "contains()" does not...
const rjson::value& arg = (*attribute_value_list)[0];
const auto& argtype = (*arg.MemberBegin()).name;
if (argtype != "S" && argtype != "N" && argtype != "B") {
throw api_error("ValidationException",
format("CONTAINS operator requires a single AttributeValue of type String, Number, or Binary, "
"got {} instead", argtype));
}
return check_NOT_CONTAINS(got, arg);
}
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
return check_NOT_CONTAINS(got, (*attribute_value_list)[0]);
}
throw std::logic_error(format("Internal error: corrupted operator enum: {}", int(op)));
}
}
conditional_operator_type get_conditional_operator(const rjson::value& req) {
const rjson::value* conditional_operator = rjson::find(req, "ConditionalOperator");
if (!conditional_operator) {
return conditional_operator_type::MISSING;
}
if (!conditional_operator->IsString()) {
throw api_error("ValidationException", "'ConditionalOperator' parameter, if given, must be a string");
}
auto s = rjson::to_string_view(*conditional_operator);
if (s == "AND") {
return conditional_operator_type::AND;
} else if (s == "OR") {
return conditional_operator_type::OR;
} else {
throw api_error("ValidationException",
format("'ConditionalOperator' parameter must be AND, OR or missing. Found {}.", s));
}
}
// Check if the existing values of the item (previous_item) match the
// conditions given by the Expected and ConditionalOperator parameters
// (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
// This function can throw an ValidationException API error if there
// are errors in the format of the condition itself.
bool verify_expected(const rjson::value& req, const rjson::value* previous_item) {
bool verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item) {
const rjson::value* expected = rjson::find(req, "Expected");
auto conditional_operator = get_conditional_operator(req);
if (conditional_operator != conditional_operator_type::MISSING &&
(!expected || (expected->IsObject() && expected->GetObject().ObjectEmpty()))) {
throw api_error("ValidationException", "'ConditionalOperator' parameter cannot be specified for missing or empty Expression");
}
if (!expected) {
return true;
}
if (!expected->IsObject()) {
throw api_error("ValidationException", "'Expected' parameter, if given, must be an object");
}
bool require_all = conditional_operator != conditional_operator_type::OR;
return verify_condition(*expected, require_all, previous_item);
}
// ConditionalOperator can be "AND" for requiring all conditions, or
// "OR" for requiring one condition, and defaults to "AND" if missing.
const rjson::value* conditional_operator = rjson::find(req, "ConditionalOperator");
bool require_all = true;
if (conditional_operator) {
if (!conditional_operator->IsString()) {
throw api_error("ValidationException", "'ConditionalOperator' parameter, if given, must be a string");
}
std::string_view s(conditional_operator->GetString(), conditional_operator->GetStringLength());
if (s == "AND") {
// require_all is already true
} else if (s == "OR") {
require_all = false;
} else {
throw api_error("ValidationException", "'ConditionalOperator' parameter must be AND, OR or missing");
}
if (expected->GetObject().ObjectEmpty()) {
throw api_error("ValidationException", "'ConditionalOperator' parameter cannot be specified for empty Expression");
}
}
bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item) {
for (auto it = condition.MemberBegin(); it != condition.MemberEnd(); ++it) {
for (auto it = expected->MemberBegin(); it != expected->MemberEnd(); ++it) {
const rjson::value* got = nullptr;
if (previous_item) {
got = rjson::find(*previous_item, rjson::to_string_view(it->name));
if (previous_item && previous_item->IsObject() && previous_item->HasMember("Item")) {
got = rjson::find((*previous_item)["Item"], rjson::to_string_view(it->name));
}
bool success = verify_expected_one(it->value, got);
if (success && !require_all) {
@@ -604,8 +581,12 @@ bool verify_condition(const rjson::value& condition, bool require_all, const rjs
return require_all;
}
static bool calculate_primitive_condition(const parsed::primitive_condition& cond,
const rjson::value* previous_item) {
bool calculate_primitive_condition(const parsed::primitive_condition& cond,
std::unordered_set<std::string>& used_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
const rjson::value& req,
schema_ptr schema,
const std::unique_ptr<rjson::value>& previous_item) {
std::vector<rjson::value> calculated_values;
calculated_values.reserve(cond._values.size());
for (const parsed::value& v : cond._values) {
@@ -613,7 +594,9 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
cond._op == parsed::primitive_condition::type::VALUE ?
calculate_value_caller::ConditionExpressionAlone :
calculate_value_caller::ConditionExpression,
previous_item));
rjson::find(req, "ExpressionAttributeValues"),
used_attribute_names, used_attribute_values,
req, schema, previous_item));
}
switch (cond._op) {
case parsed::primitive_condition::type::BETWEEN:
@@ -621,8 +604,7 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
// Shouldn't happen unless we have a bug in the parser
throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
}
return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
case parsed::primitive_condition::type::IN:
return check_IN(calculated_values);
case parsed::primitive_condition::type::VALUE:
@@ -653,17 +635,13 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
case parsed::primitive_condition::type::NE:
return check_NE(&calculated_values[0], calculated_values[1]);
case parsed::primitive_condition::type::GT:
return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
cond._values[0].is_constant(), cond._values[1].is_constant());
return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
case parsed::primitive_condition::type::GE:
return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
cond._values[0].is_constant(), cond._values[1].is_constant());
return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
case parsed::primitive_condition::type::LT:
return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
cond._values[0].is_constant(), cond._values[1].is_constant());
return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
case parsed::primitive_condition::type::LE:
return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
cond._values[0].is_constant(), cond._values[1].is_constant());
return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
default:
// Shouldn't happen unless we have a bug in the parser
throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
@@ -674,17 +652,23 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
// conditions given by the given parsed ConditionExpression.
bool verify_condition_expression(
const parsed::condition_expression& condition_expression,
const rjson::value* previous_item) {
std::unordered_set<std::string>& used_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
const rjson::value& req,
schema_ptr schema,
const std::unique_ptr<rjson::value>& previous_item) {
if (condition_expression.empty()) {
return true;
}
bool ret = std::visit(overloaded_functor {
[&] (const parsed::primitive_condition& cond) -> bool {
return calculate_primitive_condition(cond, previous_item);
return calculate_primitive_condition(cond, used_attribute_values,
used_attribute_names, req, schema, previous_item);
},
[&] (const parsed::condition_expression::condition_list& list) -> bool {
auto verify_condition = [&] (const parsed::condition_expression& e) {
return verify_condition_expression(e, previous_item);
return verify_condition_expression(e, used_attribute_values,
used_attribute_names, req, schema, previous_item);
};
switch (list.op) {
case '&':

View File

@@ -33,7 +33,6 @@
#include "cql3/restrictions/statement_restrictions.hh"
#include "serialization.hh"
#include "expressions_types.hh"
namespace alternator {
@@ -43,19 +42,8 @@ enum class comparison_operator_type {
comparison_operator_type get_comparison_operator(const rjson::value& comparison_operator);
enum class conditional_operator_type {
AND, OR, MISSING
};
conditional_operator_type get_conditional_operator(const rjson::value& req);
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter);
bool verify_expected(const rjson::value& req, const rjson::value* previous_item);
bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);
bool verify_condition_expression(
const parsed::condition_expression& condition_expression,
const rjson::value* previous_item);
bool verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item);
}

File diff suppressed because it is too large Load Diff

View File

@@ -20,24 +20,16 @@
*/
#include "expressions.hh"
#include "serialization.hh"
#include "base64.hh"
#include "conditions.hh"
#include "alternator/expressionsLexer.hpp"
#include "alternator/expressionsParser.hpp"
#include "utils/overloaded_functor.hh"
#include "error.hh"
#include "seastarx.hh"
#include <seastarx.hh>
#include <seastar/core/print.hh>
#include <seastar/util/log.hh>
#include <boost/algorithm/cxx11/any_of.hpp>
#include <boost/algorithm/cxx11/all_of.hpp>
#include <functional>
#include <unordered_map>
namespace alternator {
@@ -130,555 +122,6 @@ void condition_expression::append(condition_expression&& a, char op) {
}, _expression);
}
} // namespace parsed
// The following resolve_*() functions resolve references in parsed
// expressions of different types. Resolving a parsed expression means
// replacing:
// 1. In parsed::path objects, replace references like "#name" with the
// attribute name from ExpressionAttributeNames,
// 2. In parsed::constant objects, replace references like ":value" with
// the value from ExpressionAttributeValues.
// These function also track which name and value references were used, to
// allow complaining if some remain unused.
// Note that the resolve_*() functions modify the expressions in-place,
// so if we ever intend to cache parsed expression, we need to pass a copy
// into this function.
//
// Doing the "resolving" stage before the evaluation stage has two benefits.
// First, it allows us to be compatible with DynamoDB in catching unused
// names and values (see issue #6572). Second, in the FilterExpression case,
// we need to resolve the expression just once but then use it many times
// (once for each item to be filtered).
static void resolve_path(parsed::path& p,
const rjson::value* expression_attribute_names,
std::unordered_set<std::string>& used_attribute_names) {
const std::string& column_name = p.root();
if (column_name.size() > 0 && column_name.front() == '#') {
if (!expression_attribute_names) {
throw api_error("ValidationException",
format("ExpressionAttributeNames missing, entry '{}' required by expression", column_name));
}
const rjson::value* value = rjson::find(*expression_attribute_names, column_name);
if (!value || !value->IsString()) {
throw api_error("ValidationException",
format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
}
used_attribute_names.emplace(column_name);
p.set_root(std::string(rjson::to_string_view(*value)));
}
}
static void resolve_constant(parsed::constant& c,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_values) {
std::visit(overloaded_functor {
[&] (const std::string& valref) {
if (!expression_attribute_values) {
throw api_error("ValidationException",
format("ExpressionAttributeValues missing, entry '{}' required by expression", valref));
}
const rjson::value* value = rjson::find(*expression_attribute_values, valref);
if (!value) {
throw api_error("ValidationException",
format("ExpressionAttributeValues missing entry '{}' required by expression", valref));
}
if (value->IsNull()) {
throw api_error("ValidationException",
format("ExpressionAttributeValues null value for entry '{}' required by expression", valref));
}
validate_value(*value, "ExpressionAttributeValues");
used_attribute_values.emplace(valref);
c.set(*value);
},
[&] (const parsed::constant::literal& lit) {
// Nothing to do, already resolved
}
}, c._value);
}
void resolve_value(parsed::value& rhs,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values) {
std::visit(overloaded_functor {
[&] (parsed::constant& c) {
resolve_constant(c, expression_attribute_values, used_attribute_values);
},
[&] (parsed::value::function_call& f) {
for (parsed::value& value : f._parameters) {
resolve_value(value, expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
}
},
[&] (parsed::path& p) {
resolve_path(p, expression_attribute_names, used_attribute_names);
}
}, rhs._value);
}
void resolve_set_rhs(parsed::set_rhs& rhs,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values) {
resolve_value(rhs._v1, expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
if (rhs._op != 'v') {
resolve_value(rhs._v2, expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
}
}
void resolve_update_expression(parsed::update_expression& ue,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values) {
for (parsed::update_expression::action& action : ue.actions()) {
resolve_path(action._path, expression_attribute_names, used_attribute_names);
std::visit(overloaded_functor {
[&] (parsed::update_expression::action::set& a) {
resolve_set_rhs(a._rhs, expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
},
[&] (parsed::update_expression::action::remove& a) {
// nothing to do
},
[&] (parsed::update_expression::action::add& a) {
resolve_constant(a._valref, expression_attribute_values, used_attribute_values);
},
[&] (parsed::update_expression::action::del& a) {
resolve_constant(a._valref, expression_attribute_values, used_attribute_values);
}
}, action._action);
}
}
static void resolve_primitive_condition(parsed::primitive_condition& pc,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values) {
for (parsed::value& value : pc._values) {
resolve_value(value,
expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
}
}
void resolve_condition_expression(parsed::condition_expression& ce,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values) {
std::visit(overloaded_functor {
[&] (parsed::primitive_condition& cond) {
resolve_primitive_condition(cond,
expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
},
[&] (parsed::condition_expression::condition_list& list) {
for (parsed::condition_expression& cond : list.conditions) {
resolve_condition_expression(cond,
expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
}
}
}, ce._expression);
}
void resolve_projection_expression(std::vector<parsed::path>& pe,
const rjson::value* expression_attribute_names,
std::unordered_set<std::string>& used_attribute_names) {
for (parsed::path& p : pe) {
resolve_path(p, expression_attribute_names, used_attribute_names);
}
}
// condition_expression_on() checks whether a condition_expression places any
// condition on the given attribute. It can be useful, for example, for
// checking whether the condition tries to restrict a key column.
static bool value_on(const parsed::value& v, std::string_view attribute) {
return std::visit(overloaded_functor {
[&] (const parsed::constant& c) {
return false;
},
[&] (const parsed::value::function_call& f) {
for (const parsed::value& value : f._parameters) {
if (value_on(value, attribute)) {
return true;
}
}
return false;
},
[&] (const parsed::path& p) {
return p.root() == attribute;
}
}, v._value);
}
static bool primitive_condition_on(const parsed::primitive_condition& pc, std::string_view attribute) {
for (const parsed::value& value : pc._values) {
if (value_on(value, attribute)) {
return true;
}
}
return false;
}
bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute) {
return std::visit(overloaded_functor {
[&] (const parsed::primitive_condition& cond) {
return primitive_condition_on(cond, attribute);
},
[&] (const parsed::condition_expression::condition_list& list) {
for (const parsed::condition_expression& cond : list.conditions) {
if (condition_expression_on(cond, attribute)) {
return true;
}
}
return false;
}
}, ce._expression);
}
// for_condition_expression_on() runs a given function over all the attributes
// mentioned in the expression. If the same attribute is mentioned more than
// once, the function will be called more than once for the same attribute.
static void for_value_on(const parsed::value& v, const noncopyable_function<void(std::string_view)>& func) {
std::visit(overloaded_functor {
[&] (const parsed::constant& c) { },
[&] (const parsed::value::function_call& f) {
for (const parsed::value& value : f._parameters) {
for_value_on(value, func);
}
},
[&] (const parsed::path& p) {
func(p.root());
}
}, v._value);
}
void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func) {
std::visit(overloaded_functor {
[&] (const parsed::primitive_condition& cond) {
for (const parsed::value& value : cond._values) {
for_value_on(value, func);
}
},
[&] (const parsed::condition_expression::condition_list& list) {
for (const parsed::condition_expression& cond : list.conditions) {
for_condition_expression_on(cond, func);
}
}
}, ce._expression);
}
// The following calculate_value() functions calculate, or evaluate, a parsed
// expression. The parsed expression is assumed to have been "resolved", with
// the matching resolve_* function.
// Take two JSON-encoded list values (remember that a list value is
// {"L": [...the actual list]}) and return the concatenation, again as
// a list value.
static rjson::value list_concatenate(const rjson::value& v1, const rjson::value& v2) {
const rjson::value* list1 = unwrap_list(v1);
const rjson::value* list2 = unwrap_list(v2);
if (!list1 || !list2) {
throw api_error("ValidationException", "UpdateExpression: list_append() given a non-list");
}
rjson::value cat = rjson::copy(*list1);
for (const auto& a : list2->GetArray()) {
rjson::push_back(cat, rjson::copy(a));
}
rjson::value ret = rjson::empty_object();
rjson::set(ret, "L", std::move(cat));
return ret;
}
// calculate_size() is ConditionExpression's size() function, i.e., it takes
// a JSON-encoded value and returns its "size" as defined differently for the
// different types - also as a JSON-encoded number.
// It return a JSON-encoded "null" value if this value's type has no size
// defined. Comparisons against this non-numeric value will later fail.
static rjson::value calculate_size(const rjson::value& v) {
// NOTE: If v is improperly formatted for our JSON value encoding, it
// must come from the request itself, not from the database, so it makes
// sense to throw a ValidationException if we see such a problem.
if (!v.IsObject() || v.MemberCount() != 1) {
throw api_error("ValidationException", format("invalid object: {}", v));
}
auto it = v.MemberBegin();
int ret;
if (it->name == "S") {
if (!it->value.IsString()) {
throw api_error("ValidationException", format("invalid string: {}", v));
}
ret = it->value.GetStringLength();
} else if (it->name == "NS" || it->name == "SS" || it->name == "BS" || it->name == "L") {
if (!it->value.IsArray()) {
throw api_error("ValidationException", format("invalid set: {}", v));
}
ret = it->value.Size();
} else if (it->name == "M") {
if (!it->value.IsObject()) {
throw api_error("ValidationException", format("invalid map: {}", v));
}
ret = it->value.MemberCount();
} else if (it->name == "B") {
if (!it->value.IsString()) {
throw api_error("ValidationException", format("invalid byte string: {}", v));
}
ret = base64_decoded_len(rjson::to_string_view(it->value));
} else {
rjson::value json_ret = rjson::empty_object();
rjson::set(json_ret, "null", rjson::value(true));
return json_ret;
}
rjson::value json_ret = rjson::empty_object();
rjson::set(json_ret, "N", rjson::from_string(std::to_string(ret)));
return json_ret;
}
static const rjson::value& calculate_value(const parsed::constant& c) {
return std::visit(overloaded_functor {
[&] (const parsed::constant::literal& v) -> const rjson::value& {
return *v;
},
[&] (const std::string& valref) -> const rjson::value& {
// Shouldn't happen, we should have called resolve_value() earlier
// and replaced the value reference by the literal constant.
throw std::logic_error("calculate_value() called before resolve_value()");
}
}, c._value);
}
static rjson::value to_bool_json(bool b) {
rjson::value json_ret = rjson::empty_object();
rjson::set(json_ret, "BOOL", rjson::value(b));
return json_ret;
}
static bool known_type(std::string_view type) {
static thread_local const std::unordered_set<std::string_view> types = {
"N", "S", "B", "NS", "SS", "BS", "L", "M", "NULL", "BOOL"
};
return types.contains(type);
}
using function_handler_type = rjson::value(calculate_value_caller, const rjson::value*, const parsed::value::function_call&);
static const
std::unordered_map<std::string_view, function_handler_type*> function_handlers {
{"list_append", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::UpdateExpression) {
throw api_error("ValidationException",
format("{}: list_append() not allowed here", caller));
}
if (f._parameters.size() != 2) {
throw api_error("ValidationException",
format("{}: list_append() accepts 2 parameters, got {}", caller, f._parameters.size()));
}
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
return list_concatenate(v1, v2);
}
},
{"if_not_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::UpdateExpression) {
throw api_error("ValidationException",
format("{}: if_not_exists() not allowed here", caller));
}
if (f._parameters.size() != 2) {
throw api_error("ValidationException",
format("{}: if_not_exists() accepts 2 parameters, got {}", caller, f._parameters.size()));
}
if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
throw api_error("ValidationException",
format("{}: if_not_exists() must include path as its first argument", caller));
}
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
return v1.IsNull() ? std::move(v2) : std::move(v1);
}
},
{"size", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::ConditionExpression) {
throw api_error("ValidationException",
format("{}: size() not allowed here", caller));
}
if (f._parameters.size() != 1) {
throw api_error("ValidationException",
format("{}: size() accepts 1 parameter, got {}", caller, f._parameters.size()));
}
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
return calculate_size(v);
}
},
{"attribute_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::ConditionExpressionAlone) {
throw api_error("ValidationException",
format("{}: attribute_exists() not allowed here", caller));
}
if (f._parameters.size() != 1) {
throw api_error("ValidationException",
format("{}: attribute_exists() accepts 1 parameter, got {}", caller, f._parameters.size()));
}
if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
throw api_error("ValidationException",
format("{}: attribute_exists()'s parameter must be a path", caller));
}
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
return to_bool_json(!v.IsNull());
}
},
{"attribute_not_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::ConditionExpressionAlone) {
throw api_error("ValidationException",
format("{}: attribute_not_exists() not allowed here", caller));
}
if (f._parameters.size() != 1) {
throw api_error("ValidationException",
format("{}: attribute_not_exists() accepts 1 parameter, got {}", caller, f._parameters.size()));
}
if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
throw api_error("ValidationException",
format("{}: attribute_not_exists()'s parameter must be a path", caller));
}
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
return to_bool_json(v.IsNull());
}
},
{"attribute_type", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::ConditionExpressionAlone) {
throw api_error("ValidationException",
format("{}: attribute_type() not allowed here", caller));
}
if (f._parameters.size() != 2) {
throw api_error("ValidationException",
format("{}: attribute_type() accepts 2 parameters, got {}", caller, f._parameters.size()));
}
// There is no real reason for the following check (not
// allowing the type to come from a document attribute), but
// DynamoDB does this check, so we do too...
if (!f._parameters[1].is_constant()) {
throw api_error("ValidationException",
format("{}: attribute_types()'s first parameter must be an expression attribute", caller));
}
rjson::value v0 = calculate_value(f._parameters[0], caller, previous_item);
rjson::value v1 = calculate_value(f._parameters[1], caller, previous_item);
if (v1.IsObject() && v1.MemberCount() == 1 && v1.MemberBegin()->name == "S") {
// If the type parameter is not one of the legal types
// we should generate an error, not a failed condition:
if (!known_type(rjson::to_string_view(v1.MemberBegin()->value))) {
throw api_error("ValidationException",
format("{}: attribute_types()'s second parameter, {}, is not a known type",
caller, v1.MemberBegin()->value));
}
if (v0.IsObject() && v0.MemberCount() == 1) {
return to_bool_json(v1.MemberBegin()->value == v0.MemberBegin()->name);
} else {
return to_bool_json(false);
}
} else {
throw api_error("ValidationException",
format("{}: attribute_type() second parameter must refer to a string, got {}", caller, v1));
}
}
},
{"begins_with", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::ConditionExpressionAlone) {
throw api_error("ValidationException",
format("{}: begins_with() not allowed here", caller));
}
if (f._parameters.size() != 2) {
throw api_error("ValidationException",
format("{}: begins_with() accepts 2 parameters, got {}", caller, f._parameters.size()));
}
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1, v2,
f._parameters[0].is_constant(), f._parameters[1].is_constant()));
}
},
{"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
if (caller != calculate_value_caller::ConditionExpressionAlone) {
throw api_error("ValidationException",
format("{}: contains() not allowed here", caller));
}
if (f._parameters.size() != 2) {
throw api_error("ValidationException",
format("{}: contains() accepts 2 parameters, got {}", caller, f._parameters.size()));
}
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
return to_bool_json(check_CONTAINS(v1.IsNull() ? nullptr : &v1, v2));
}
},
};
// Given a parsed::value, which can refer either to a constant value from
// ExpressionAttributeValues, to the value of some attribute, or to a function
// of other values, this function calculates the resulting value.
// "caller" determines which expression - ConditionExpression or
// UpdateExpression - is asking for this value. We need to know this because
// DynamoDB allows a different choice of functions for different expressions.
rjson::value calculate_value(const parsed::value& v,
calculate_value_caller caller,
const rjson::value* previous_item) {
return std::visit(overloaded_functor {
[&] (const parsed::constant& c) -> rjson::value {
return rjson::copy(calculate_value(c));
},
[&] (const parsed::value::function_call& f) -> rjson::value {
auto function_it = function_handlers.find(std::string_view(f._function_name));
if (function_it == function_handlers.end()) {
throw api_error("ValidationException",
format("UpdateExpression: unknown function '{}' called.", f._function_name));
}
return function_it->second(caller, previous_item, f);
},
[&] (const parsed::path& p) -> rjson::value {
if (!previous_item) {
return rjson::null_value();
}
std::string update_path = p.root();
if (p.has_operators()) {
// FIXME: support this
throw api_error("ValidationException", "Reading attribute paths not yet implemented");
}
const rjson::value* previous_value = rjson::find(*previous_item, update_path);
return previous_value ? rjson::copy(*previous_value) : rjson::null_value();
}
}, v._value);
}
// Same as calculate_value() above, except takes a set_rhs, which may be
// either a single value, or v1+v2 or v1-v2.
rjson::value calculate_value(const parsed::set_rhs& rhs,
const rjson::value* previous_item) {
switch(rhs._op) {
case 'v':
return calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
case '+': {
rjson::value v1 = calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
rjson::value v2 = calculate_value(rhs._v2, calculate_value_caller::UpdateExpression, previous_item);
return number_add(v1, v2);
}
case '-': {
rjson::value v1 = calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
rjson::value v2 = calculate_value(rhs._v2, calculate_value_caller::UpdateExpression, previous_item);
return number_subtract(v1, v2);
}
}
// Can't happen
return rjson::null_value();
}
} // namespace alternator

View File

@@ -24,13 +24,8 @@
#include <string>
#include <stdexcept>
#include <vector>
#include <unordered_set>
#include <string_view>
#include <seastar/util/noncopyable_function.hh>
#include "expressions_types.hh"
#include "rjson.hh"
namespace alternator {
@@ -43,60 +38,4 @@ parsed::update_expression parse_update_expression(std::string query);
std::vector<parsed::path> parse_projection_expression(std::string query);
parsed::condition_expression parse_condition_expression(std::string query);
void resolve_update_expression(parsed::update_expression& ue,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values);
void resolve_projection_expression(std::vector<parsed::path>& pe,
const rjson::value* expression_attribute_names,
std::unordered_set<std::string>& used_attribute_names);
void resolve_condition_expression(parsed::condition_expression& ce,
const rjson::value* expression_attribute_names,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values);
void validate_value(const rjson::value& v, const char* caller);
bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute);
// for_condition_expression_on() runs the given function on the attributes
// that the expression uses. It may run for the same attribute more than once
// if the same attribute is used more than once in the expression.
void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func);
// calculate_value() behaves slightly different (especially, different
// functions supported) when used in different types of expressions, as
// enumerated in this enum:
enum class calculate_value_caller {
UpdateExpression, ConditionExpression, ConditionExpressionAlone
};
inline std::ostream& operator<<(std::ostream& out, calculate_value_caller caller) {
switch (caller) {
case calculate_value_caller::UpdateExpression:
out << "UpdateExpression";
break;
case calculate_value_caller::ConditionExpression:
out << "ConditionExpression";
break;
case calculate_value_caller::ConditionExpressionAlone:
out << "ConditionExpression";
break;
default:
out << "unknown type of expression";
break;
}
return out;
}
rjson::value calculate_value(const parsed::value& v,
calculate_value_caller caller,
const rjson::value* previous_item);
rjson::value calculate_value(const parsed::set_rhs& rhs,
const rjson::value* previous_item);
} /* namespace alternator */

View File

@@ -0,0 +1,78 @@
/*
* Copyright 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <unordered_set>
#include "rjson.hh"
#include "schema_fwd.hh"
#include "expressions_types.hh"
namespace alternator {
// calculate_value() behaves slightly different (especially, different
// functions supported) when used in different types of expressions, as
// enumerated in this enum:
enum class calculate_value_caller {
UpdateExpression, ConditionExpression, ConditionExpressionAlone
};
inline std::ostream& operator<<(std::ostream& out, calculate_value_caller caller) {
switch (caller) {
case calculate_value_caller::UpdateExpression:
out << "UpdateExpression";
break;
case calculate_value_caller::ConditionExpression:
out << "ConditionExpression";
break;
case calculate_value_caller::ConditionExpressionAlone:
out << "ConditionExpression";
break;
default:
out << "unknown type of expression";
break;
}
return out;
}
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
rjson::value calculate_value(const parsed::value& v,
calculate_value_caller caller,
const rjson::value* expression_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
std::unordered_set<std::string>& used_attribute_values,
const rjson::value& update_info,
schema_ptr schema,
const std::unique_ptr<rjson::value>& previous_item);
bool verify_condition_expression(
const parsed::condition_expression& condition_expression,
std::unordered_set<std::string>& used_attribute_values,
std::unordered_set<std::string>& used_attribute_names,
const rjson::value& req,
schema_ptr schema,
const std::unique_ptr<rjson::value>& previous_item);
} /* namespace alternator */

View File

@@ -25,10 +25,6 @@
#include <string>
#include <variant>
#include <seastar/core/shared_ptr.hh>
#include "rjson.hh"
/*
* Parsed representation of expressions and their components.
*
@@ -67,27 +63,10 @@ public:
}
};
// When an expression is first parsed, all constants are references, like
// ":val1", into ExpressionAttributeValues. This uses std::string() variant.
// The resolve_value() function replaces these constants by the JSON item
// extracted from the ExpressionAttributeValues.
struct constant {
// We use lw_shared_ptr<rjson::value> just to make rjson::value copyable,
// to make this entire object copyable as ANTLR needs.
using literal = lw_shared_ptr<rjson::value>;
std::variant<std::string, literal> _value;
void set(const rjson::value& v) {
_value = make_lw_shared<rjson::value>(rjson::copy(v));
}
void set(std::string& s) {
_value = s;
}
};
// "value" is is a value used in the right hand side of an assignment
// expression, "SET a = ...". It can be a constant (a reference to a value
// included in the request, e.g., ":val"), a path to an attribute from the
// existing item (e.g., "a.b[3].c"), or a function of other such values.
// expression, "SET a = ...". It can be a reference to a value included in
// the request (":val"), a path to an attribute from the existing item
// (e.g., "a.b[3].c"), or a function of other such values.
// Note that the real right-hand-side of an assignment is actually a bit
// more general - it allows either a value, or a value+value or value-value -
// see class set_rhs below.
@@ -96,12 +75,9 @@ struct value {
std::string _function_name;
std::vector<value> _parameters;
};
std::variant<constant, path, function_call> _value;
void set_constant(constant c) {
_value = std::move(c);
}
std::variant<std::string, path, function_call> _value;
void set_valref(std::string s) {
_value = constant { std::move(s) };
_value = std::move(s);
}
void set_path(path p) {
_value = std::move(p);
@@ -112,8 +88,8 @@ struct value {
void add_func_parameter(value v) {
std::get<function_call>(_value)._parameters.emplace_back(std::move(v));
}
bool is_constant() const {
return std::holds_alternative<constant>(_value);
bool is_valref() const {
return std::holds_alternative<std::string>(_value);
}
bool is_path() const {
return std::holds_alternative<path>(_value);
@@ -154,10 +130,10 @@ public:
struct remove {
};
struct add {
constant _valref;
std::string _valref;
};
struct del {
constant _valref;
std::string _valref;
};
std::variant<set, remove, add, del> _action;
@@ -171,11 +147,11 @@ public:
}
void assign_add(path p, std::string v) {
_path = std::move(p);
_action = add { constant { std::move(v) } };
_action = add { std::move(v) };
}
void assign_del(path p, std::string v) {
_path = std::move(p);
_action = del { constant { std::move(v) } };
_action = del { std::move(v) };
}
};
private:
@@ -193,9 +169,6 @@ public:
const std::vector<action>& actions() const {
return _actions;
}
std::vector<action>& actions() {
return _actions;
}
};
// A primitive_condition is a condition expression involving one condition,

View File

@@ -21,9 +21,9 @@
#pragma once
#include "seastarx.hh"
#include "service/storage_proxy.hh"
#include "service/storage_proxy.hh"
#include <seastarx.hh>
#include <service/storage_proxy.hh>
#include <service/storage_proxy.hh>
#include "rjson.hh"
#include "executor.hh"

View File

@@ -31,8 +31,8 @@ static logging::logger slogger("alternator-serialization");
namespace alternator {
type_info type_info_from_string(std::string_view type) {
static thread_local const std::unordered_map<std::string_view, type_info> type_infos = {
type_info type_info_from_string(std::string type) {
static thread_local const std::unordered_map<std::string, type_info> type_infos = {
{"S", {alternator_type::S, utf8_type}},
{"B", {alternator_type::B, bytes_type}},
{"BOOL", {alternator_type::BOOL, boolean_type}},
@@ -87,7 +87,7 @@ bytes serialize_item(const rjson::value& item) {
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item));
}
auto it = item.MemberBegin();
type_info type_info = type_info_from_string(rjson::to_string_view(it->name)); // JSON keys are guaranteed to be strings
type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings
if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) {
slogger.trace("Non-optimal serialization of type {}", it->name.GetString());
@@ -186,11 +186,6 @@ bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column
format("Type mismatch: expected type {} for key column {}, got type {}",
type_to_string(column.type), column.name_as_text(), it->name.GetString()));
}
std::string_view value_view = rjson::to_string_view(it->value);
if (value_view.empty()) {
throw api_error("ValidationException",
format("The AttributeValue for a key attribute cannot contain an empty string value. Key: {}", column.name_as_text()));
}
if (column.type == bytes_type) {
return base64_decode(it->value);
} else {
@@ -275,93 +270,4 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
return std::make_pair(it_key, &(it->value));
}
const rjson::value* unwrap_list(const rjson::value& v) {
if (!v.IsObject() || v.MemberCount() != 1) {
return nullptr;
}
auto it = v.MemberBegin();
if (it->name != std::string("L")) {
return nullptr;
}
return &(it->value);
}
// Take two JSON-encoded numeric values ({"N": "thenumber"}) and return the
// sum, again as a JSON-encoded number.
rjson::value number_add(const rjson::value& v1, const rjson::value& v2) {
auto n1 = unwrap_number(v1, "UpdateExpression");
auto n2 = unwrap_number(v2, "UpdateExpression");
rjson::value ret = rjson::empty_object();
std::string str_ret = std::string((n1 + n2).to_string());
rjson::set(ret, "N", rjson::from_string(str_ret));
return ret;
}
rjson::value number_subtract(const rjson::value& v1, const rjson::value& v2) {
auto n1 = unwrap_number(v1, "UpdateExpression");
auto n2 = unwrap_number(v2, "UpdateExpression");
rjson::value ret = rjson::empty_object();
std::string str_ret = std::string((n1 - n2).to_string());
rjson::set(ret, "N", rjson::from_string(str_ret));
return ret;
}
// Take two JSON-encoded set values (e.g. {"SS": [...the actual set]}) and
// return the sum of both sets, again as a set value.
rjson::value set_sum(const rjson::value& v1, const rjson::value& v2) {
auto [set1_type, set1] = unwrap_set(v1);
auto [set2_type, set2] = unwrap_set(v2);
if (set1_type != set2_type) {
throw api_error("ValidationException", format("Mismatched set types: {} and {}", set1_type, set2_type));
}
if (!set1 || !set2) {
throw api_error("ValidationException", "UpdateExpression: ADD operation for sets must be given sets as arguments");
}
rjson::value sum = rjson::copy(*set1);
std::set<rjson::value, rjson::single_value_comp> set1_raw;
for (auto it = sum.Begin(); it != sum.End(); ++it) {
set1_raw.insert(rjson::copy(*it));
}
for (const auto& a : set2->GetArray()) {
if (set1_raw.count(a) == 0) {
rjson::push_back(sum, rjson::copy(a));
}
}
rjson::value ret = rjson::empty_object();
rjson::set_with_string_name(ret, set1_type, std::move(sum));
return ret;
}
// Take two JSON-encoded set values (e.g. {"SS": [...the actual list]}) and
// return the difference of s1 - s2, again as a set value.
// DynamoDB does not allow empty sets, so if resulting set is empty, return
// an unset optional instead.
std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value& v2) {
auto [set1_type, set1] = unwrap_set(v1);
auto [set2_type, set2] = unwrap_set(v2);
if (set1_type != set2_type) {
throw api_error("ValidationException", format("Mismatched set types: {} and {}", set1_type, set2_type));
}
if (!set1 || !set2) {
throw api_error("ValidationException", "UpdateExpression: DELETE operation can only be performed on a set");
}
std::set<rjson::value, rjson::single_value_comp> set1_raw;
for (auto it = set1->Begin(); it != set1->End(); ++it) {
set1_raw.insert(rjson::copy(*it));
}
for (const auto& a : set2->GetArray()) {
set1_raw.erase(a);
}
if (set1_raw.empty()) {
return std::nullopt;
}
rjson::value ret = rjson::empty_object();
rjson::set_with_string_name(ret, set1_type, rjson::empty_array());
rjson::value& result_set = ret[set1_type];
for (const auto& a : set1_raw) {
rjson::push_back(result_set, rjson::copy(a));
}
return ret;
}
}

View File

@@ -45,7 +45,7 @@ struct type_representation {
data_type dtype;
};
type_info type_info_from_string(std::string_view type);
type_info type_info_from_string(std::string type);
type_representation represent_type(alternator_type atype);
bytes serialize_item(const rjson::value& item);
@@ -69,21 +69,4 @@ big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);
// returned value is {"", nullptr}
const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value& v);
// Check if a given JSON object encodes a list (i.e., it is a {"L": [...]}
// and returns a pointer to that list.
const rjson::value* unwrap_list(const rjson::value& v);
// Take two JSON-encoded numeric values ({"N": "thenumber"}) and return the
// sum, again as a JSON-encoded number.
rjson::value number_add(const rjson::value& v1, const rjson::value& v2);
rjson::value number_subtract(const rjson::value& v1, const rjson::value& v2);
// Take two JSON-encoded set values (e.g. {"SS": [...the actual set]}) and
// return the sum of both sets, again as a set value.
rjson::value set_sum(const rjson::value& v1, const rjson::value& v2);
// Take two JSON-encoded set values (e.g. {"SS": [...the actual list]}) and
// return the difference of s1 - s2, again as a set value.
// DynamoDB does not allow empty sets, so if resulting set is empty, return
// an unset optional instead.
std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value& v2);
}

View File

@@ -23,7 +23,7 @@
#include "log.hh"
#include <seastar/http/function_handlers.hh>
#include <seastar/json/json_elements.hh>
#include "seastarx.hh"
#include <seastarx.hh>
#include "error.hh"
#include "rjson.hh"
#include "auth.hh"

View File

@@ -26,8 +26,8 @@
#include <seastar/http/httpd.hh>
#include <seastar/net/tls.hh>
#include <optional>
#include "alternator/auth.hh"
#include "utils/small_vector.hh"
#include <alternator/auth.hh>
#include <utils/small_vector.hh>
#include <seastar/core/units.hh>
namespace alternator {

View File

@@ -511,21 +511,6 @@
}
]
},
{
"path":"/storage_service/cdc_streams_check_and_repair",
"operations":[
{
"method":"POST",
"summary":"Checks that CDC streams reflect current cluster topology and regenerates them if not.",
"type":"void",
"nickname":"cdc_streams_check_and_repair",
"produces":[
"application/json"
],
"parameters":[]
}
]
},
{
"path":"/storage_service/snapshots",
"operations":[

View File

@@ -93,22 +93,6 @@ static future<> register_api(http_context& ctx, const sstring& api_name,
});
}
future<> set_transport_controller(http_context& ctx, cql_transport::controller& ctl) {
return ctx.http_server.set_routes([&ctx, &ctl] (routes& r) { set_transport_controller(ctx, r, ctl); });
}
future<> unset_transport_controller(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_transport_controller(ctx, r); });
}
future<> set_rpc_controller(http_context& ctx, thrift_controller& ctl) {
return ctx.http_server.set_routes([&ctx, &ctl] (routes& r) { set_rpc_controller(ctx, r, ctl); });
}
future<> unset_rpc_controller(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_rpc_controller(ctx, r); });
}
future<> set_server_storage_service(http_context& ctx) {
return register_api(ctx, "storage_service", "The storage service API", set_storage_service);
}

View File

@@ -25,8 +25,6 @@
namespace service { class load_meter; }
namespace locator { class token_metadata; }
namespace cql_transport { class controller; }
class thrift_controller;
namespace api {
@@ -50,10 +48,6 @@ future<> set_server_init(http_context& ctx);
future<> set_server_config(http_context& ctx);
future<> set_server_snitch(http_context& ctx);
future<> set_server_storage_service(http_context& ctx);
future<> set_transport_controller(http_context& ctx, cql_transport::controller& ctl);
future<> unset_transport_controller(http_context& ctx);
future<> set_rpc_controller(http_context& ctx, thrift_controller& ctl);
future<> unset_rpc_controller(http_context& ctx);
future<> set_server_snapshot(http_context& ctx);
future<> set_server_gossip(http_context& ctx);
future<> set_server_load_sstable(http_context& ctx);

View File

@@ -650,7 +650,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
return s + sst->filter_size();
return sst->filter_size();
});
}, std::plus<uint64_t>());
});
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
return s + sst->filter_size();
return sst->filter_size();
});
}, std::plus<uint64_t>());
});
@@ -666,7 +666,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
return s + sst->filter_memory_size();
return sst->filter_memory_size();
});
}, std::plus<uint64_t>());
});
@@ -674,7 +674,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
return s + sst->filter_memory_size();
return sst->filter_memory_size();
});
}, std::plus<uint64_t>());
});
@@ -682,7 +682,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
return s + sst->get_summary().memory_footprint();
return sst->get_summary().memory_footprint();
});
}, std::plus<uint64_t>());
});
@@ -690,7 +690,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
return s + sst->get_summary().memory_footprint();
return sst->get_summary().memory_footprint();
});
}, std::plus<uint64_t>());
});

View File

@@ -20,7 +20,7 @@
*/
#include "commitlog.hh"
#include "db/commitlog/commitlog.hh"
#include <db/commitlog/commitlog.hh>
#include "api/api-doc/commitlog.json.hh"
#include "database.hh"
#include <vector>

View File

@@ -21,7 +21,7 @@
#include "gossiper.hh"
#include "api/api-doc/gossiper.json.hh"
#include "gms/gossiper.hh"
#include <gms/gossiper.hh>
namespace api {
using namespace json;

View File

@@ -116,23 +116,6 @@ static future<json::json_return_type> sum_timed_rate_as_long(distributed<proxy>
});
}
utils_json::estimated_histogram time_to_json_histogram(const utils::time_estimated_histogram& val) {
utils_json::estimated_histogram res;
for (size_t i = 0; i < val.size(); i++) {
res.buckets.push(val.get(i));
res.bucket_offsets.push(val.get_bucket_lower_limit(i));
}
return res;
}
static future<json::json_return_type> sum_estimated_histogram(http_context& ctx, utils::time_estimated_histogram service::storage_proxy_stats::stats::*f) {
return two_dimensional_map_reduce(ctx.sp, f, utils::time_estimated_histogram_merge,
utils::time_estimated_histogram()).then([](const utils::time_estimated_histogram& val) {
return make_ready_future<json::json_return_type>(time_to_json_histogram(val));
});
}
static future<json::json_return_type> sum_estimated_histogram(http_context& ctx, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
return two_dimensional_map_reduce(ctx.sp, f, utils::estimated_histogram_merge,

View File

@@ -41,8 +41,6 @@
#include "sstables/sstables.hh"
#include "database.hh"
#include "db/extensions.hh"
#include "transport/controller.hh"
#include "thrift/controller.hh"
namespace api {
@@ -87,66 +85,21 @@ static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
};
}
future<json::json_return_type> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
future<> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
if (tables.empty()) {
tables = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
return service::get_local_storage_service().set_tables_autocompaction(keyspace, tables, enabled).then([]{
return make_ready_future<json::json_return_type>(json_void());
});
}
void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
ss::start_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
return ctl.start_server().then([] {
return make_ready_future<json::json_return_type>(json_void());
return ctx.db.invoke_on_all([keyspace, tables, enabled] (database& db) {
return parallel_for_each(tables, [&db, keyspace, enabled](const sstring& table) mutable {
column_family& cf = db.find_column_family(keyspace, table);
if (enabled) {
cf.enable_auto_compaction();
} else {
cf.disable_auto_compaction();
}
return make_ready_future<>();
});
});
ss::stop_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
return ctl.stop_server().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::is_native_transport_running.set(r, [&ctl] (std::unique_ptr<request> req) {
return ctl.is_server_running().then([] (bool running) {
return make_ready_future<json::json_return_type>(running);
});
});
}
void unset_transport_controller(http_context& ctx, routes& r) {
ss::start_native_transport.unset(r);
ss::stop_native_transport.unset(r);
ss::is_native_transport_running.unset(r);
}
void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
return ctl.stop_server().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::start_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
return ctl.start_server().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::is_rpc_server_running.set(r, [&ctl] (std::unique_ptr<request> req) {
return ctl.is_server_running().then([] (bool running) {
return make_ready_future<json::json_return_type>(running);
});
});
}
void unset_rpc_controller(http_context& ctx, routes& r) {
ss::stop_rpc_server.unset(r);
ss::start_rpc_server.unset(r);
ss::is_rpc_server_running.unset(r);
}
void set_storage_service(http_context& ctx, routes& r) {
@@ -279,12 +232,6 @@ void set_storage_service(http_context& ctx, routes& r) {
req.get_query_param("key")));
});
ss::cdc_streams_check_and_repair.set(r, [&ctx] (std::unique_ptr<request> req) {
return service::get_local_storage_service().check_and_repair_cdc_streams().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto column_families = split_cf(req->get_query_param("cf"));
@@ -549,6 +496,42 @@ void set_storage_service(http_context& ctx, routes& r) {
});
});
ss::stop_rpc_server.set(r, [](std::unique_ptr<request> req) {
return service::get_local_storage_service().stop_rpc_server().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::start_rpc_server.set(r, [](std::unique_ptr<request> req) {
return service::get_local_storage_service().start_rpc_server().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::is_rpc_server_running.set(r, [] (std::unique_ptr<request> req) {
return service::get_local_storage_service().is_rpc_server_running().then([] (bool running) {
return make_ready_future<json::json_return_type>(running);
});
});
ss::start_native_transport.set(r, [](std::unique_ptr<request> req) {
return service::get_local_storage_service().start_native_transport().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::stop_native_transport.set(r, [](std::unique_ptr<request> req) {
return service::get_local_storage_service().stop_native_transport().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::is_native_transport_running.set(r, [] (std::unique_ptr<request> req) {
return service::get_local_storage_service().is_native_transport_running().then([] (bool running) {
return make_ready_future<json::json_return_type>(running);
});
});
ss::join_ring.set(r, [](std::unique_ptr<request> req) {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -735,15 +718,17 @@ void set_storage_service(http_context& ctx, routes& r) {
ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto tables = split_cf(req->get_query_param("cf"));
return set_tables_autocompaction(ctx, keyspace, tables, true);
return set_tables_autocompaction(ctx, keyspace, tables, true).then([]{
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto tables = split_cf(req->get_query_param("cf"));
return set_tables_autocompaction(ctx, keyspace, tables, false);
return set_tables_autocompaction(ctx, keyspace, tables, false).then([]{
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::deliver_hints.set(r, [](std::unique_ptr<request> req) {
@@ -1020,12 +1005,12 @@ void set_snapshot(http_context& ctx, routes& r) {
ss::take_snapshot.set(r, [](std::unique_ptr<request> req) {
auto tag = req->get_query_param("tag");
auto column_families = split(req->get_query_param("cf"), ",");
auto column_family = req->get_query_param("cf");
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
auto resp = make_ready_future<>();
if (column_families.empty()) {
if (column_family.empty()) {
resp = service::get_local_storage_service().take_snapshot(tag, keynames);
} else {
if (keynames.empty()) {
@@ -1034,7 +1019,7 @@ void set_snapshot(http_context& ctx, routes& r) {
if (keynames.size() > 1) {
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
}
resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_families, tag);
resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_family, tag);
}
return resp.then([] {
return make_ready_future<json::json_return_type>(json_void());

View File

@@ -23,16 +23,9 @@
#include "api.hh"
namespace cql_transport { class controller; }
class thrift_controller;
namespace api {
void set_storage_service(http_context& ctx, routes& r);
void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl);
void unset_transport_controller(http_context& ctx, routes& r);
void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl);
void unset_rpc_controller(http_context& ctx, routes& r);
void set_snapshot(http_context& ctx, routes& r);
}

View File

@@ -29,6 +29,7 @@
#include <seastar/net//byteorder.hh>
#include <cstdint>
#include <iosfwd>
#include <seastar/util/gcc6-concepts.hh>
#include "data/cell.hh"
#include "data/schema_info.hh"
#include "imr/utils.hh"

View File

@@ -178,7 +178,7 @@ future<> service::start(::service::migration_manager& mm) {
return create_keyspace_if_missing(mm);
}).then([this] {
return _role_manager->start().then([this] {
return when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
return when_all_succeed(_authorizer->start(), _authenticator->start());
});
}).then([this] {
_permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
@@ -199,7 +199,7 @@ future<> service::stop() {
}
return make_ready_future<>();
}).then([this] {
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
});
}
@@ -458,9 +458,7 @@ future<> drop_role(const service& ser, std::string_view name) {
return when_all_succeed(
a.revoke_all(name),
a.revoke_all(r))
.discard_result()
.handle_exception_type([](const unsupported_authorization_operation&) {
a.revoke_all(r)).handle_exception_type([](const unsupported_authorization_operation&) {
// Nothing.
});
}).then([&ser, name] {
@@ -473,7 +471,7 @@ future<> drop_role(const service& ser, std::string_view name) {
future<bool> has_role(const service& ser, std::string_view grantee, std::string_view name) {
return when_all_succeed(
validate_role_exists(ser, name),
ser.get_roles(grantee)).then_unpack([name](role_set all_roles) {
ser.get_roles(grantee)).then([name](role_set all_roles) {
return make_ready_future<bool>(all_roles.count(sstring(name)) != 0);
});
}

View File

@@ -161,7 +161,7 @@ future<> standard_role_manager::create_metadata_tables_if_missing() const {
meta::role_members_table::name,
_qp,
create_role_members_query,
_migration_manager)).discard_result();
_migration_manager));
}
future<> standard_role_manager::create_default_role_if_missing() const {
@@ -367,7 +367,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
{sstring(role_name)}).discard_result();
};
return when_all_succeed(revoke_from_members(), revoke_members_of()).then_unpack([delete_role = std::move(delete_role)] {
return when_all_succeed(revoke_from_members(), revoke_members_of()).then([delete_role = std::move(delete_role)] {
return delete_role();
});
});
@@ -416,7 +416,7 @@ standard_role_manager::modify_membership(
return make_ready_future<>();
};
return when_all_succeed(modify_roles(), modify_role_members()).discard_result();
return when_all_succeed(modify_roles(), modify_role_members());
}
future<>
@@ -445,7 +445,7 @@ standard_role_manager::grant(std::string_view grantee_name, std::string_view rol
});
};
return when_all_succeed(check_redundant(), check_cycle()).then_unpack([this, role_name, grantee_name] {
return when_all_succeed(check_redundant(), check_cycle()).then([this, role_name, grantee_name] {
return this->modify_membership(grantee_name, role_name, membership_change::add);
});
}

View File

@@ -39,10 +39,7 @@ class caching_options {
sstring _key_cache;
sstring _row_cache;
bool _enabled = true;
caching_options(sstring k, sstring r, bool enabled)
: _key_cache(k), _row_cache(r), _enabled(enabled)
{
caching_options(sstring k, sstring r) : _key_cache(k), _row_cache(r) {
if ((k != "ALL") && (k != "NONE")) {
throw exceptions::configuration_exception("Invalid key value: " + k);
}
@@ -62,53 +59,36 @@ class caching_options {
caching_options() : _key_cache(default_key), _row_cache(default_row) {}
public:
bool enabled() const {
return _enabled;
}
std::map<sstring, sstring> to_map() const {
std::map<sstring, sstring> res = {{ "keys", _key_cache },
{ "rows_per_partition", _row_cache }};
if (!_enabled) {
res.insert({"enabled", "false"});
}
return res;
return {{ "keys", _key_cache }, { "rows_per_partition", _row_cache }};
}
sstring to_sstring() const {
return json::to_json(to_map());
}
static caching_options get_disabled_caching_options() {
return caching_options("NONE", "NONE", false);
}
template<typename Map>
static caching_options from_map(const Map & map) {
sstring k = default_key;
sstring r = default_row;
bool e = true;
for (auto& p : map) {
if (p.first == "keys") {
k = p.second;
} else if (p.first == "rows_per_partition") {
r = p.second;
} else if (p.first == "enabled") {
e = p.second == "true";
} else {
throw exceptions::configuration_exception("Invalid caching option: " + p.first);
}
}
return caching_options(k, r, e);
return caching_options(k, r);
}
static caching_options from_sstring(const sstring& str) {
return from_map(json::to_map(str));
}
bool operator==(const caching_options& other) const {
return _key_cache == other._key_cache && _row_cache == other._row_cache
&& _enabled == other._enabled;
return _key_cache == other._key_cache && _row_cache == other._row_cache;
}
bool operator!=(const caching_options& other) const {
return !(*this == other);

View File

@@ -190,7 +190,12 @@ public:
, _bootstrap_tokens(bootstrap_tokens)
, _token_metadata(token_metadata)
, _gossiper(gossiper)
{}
{
if (_bootstrap_tokens.empty()) {
throw std::runtime_error(
"cdc: bootstrap tokens is empty in generate_topology_description");
}
}
/*
* Generate a set of CDC stream identifiers such that for each shard
@@ -252,6 +257,8 @@ db_clock::time_point make_new_cdc_generation(
db::system_distributed_keyspace& sys_dist_ks,
std::chrono::milliseconds ring_delay,
bool for_testing) {
assert(!bootstrap_tokens.empty());
auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();
// Begin the race.

View File

@@ -51,7 +51,6 @@
#include "types/listlike_partial_deserializing_iterator.hh"
#include "tracing/trace_state.hh"
#include "stats.hh"
#include "compaction_strategy.hh"
namespace std {
@@ -174,7 +173,6 @@ public:
auto& db = _ctxt._proxy.get_db().local();
auto logname = log_name(schema.cf_name());
check_that_cdc_log_table_does_not_exist(db, schema, logname);
ensure_that_table_has_no_counter_columns(schema);
// in seastar thread
auto log_schema = create_log_schema(schema);
@@ -201,7 +199,6 @@ public:
}
if (is_cdc) {
check_for_attempt_to_create_nested_cdc_log(new_schema);
ensure_that_table_has_no_counter_columns(new_schema);
}
auto logname = log_name(old_schema.cf_name());
@@ -266,13 +263,6 @@ private:
schema.ks_name(), logname));
}
}
static void ensure_that_table_has_no_counter_columns(const schema& schema) {
if (schema.is_counter()) {
throw exceptions::invalid_request_exception(format("Cannot create CDC log for table {}.{}. Counter support not implemented",
schema.ks_name(), schema.cf_name()));
}
}
};
cdc::cdc_service::cdc_service(service::storage_proxy& proxy)
@@ -286,7 +276,6 @@ cdc::cdc_service::cdc_service(db_context ctxt)
}
future<> cdc::cdc_service::stop() {
_impl->_ctxt._proxy.set_cdc_service(nullptr);
return _impl->stop();
}
@@ -403,37 +392,12 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) {
static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid) {
schema_builder b(s.ks_name(), log_name(s.cf_name()));
b.with_partitioner("com.scylladb.dht.CDCPartitioner");
b.set_compaction_strategy(sstables::compaction_strategy_type::time_window);
b.set_comment(sprint("CDC log for %s.%s", s.ks_name(), s.cf_name()));
auto ttl_seconds = s.cdc_options().ttl();
if (ttl_seconds > 0) {
b.set_gc_grace_seconds(0);
auto ceil = [] (int dividend, int divisor) {
return dividend / divisor + (dividend % divisor == 0 ? 0 : 1);
};
auto seconds_to_minutes = [] (int seconds_value) {
using namespace std::chrono;
return std::chrono::ceil<minutes>(seconds(seconds_value)).count();
};
// What's the minimum window that won't create more than 24 sstables.
auto window_seconds = ceil(ttl_seconds, 24);
auto window_minutes = seconds_to_minutes(window_seconds);
b.set_compaction_strategy_options({
{"compaction_window_unit", "MINUTES"},
{"compaction_window_size", std::to_string(window_minutes)},
// A new SSTable will become fully expired every
// `window_seconds` seconds so we shouldn't check for expired
// sstables too often.
{"expired_sstable_check_frequency_seconds",
std::to_string(std::max(1, window_seconds / 2))},
});
}
b.with_column(log_meta_column_name_bytes("stream_id"), bytes_type, column_kind::partition_key);
b.with_column(log_meta_column_name_bytes("time"), timeuuid_type, column_kind::clustering_key);
b.with_column(log_meta_column_name_bytes("batch_seq_no"), int32_type, column_kind::clustering_key);
b.with_column(log_meta_column_name_bytes("operation"), data_type_for<operation_native_type>());
b.with_column(log_meta_column_name_bytes("ttl"), long_type);
b.set_caching_options(caching_options::get_disabled_caching_options());
auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
for (const auto& column : columns) {
auto type = column.type;
@@ -479,7 +443,7 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
if (uuid) {
b.set_uuid(*uuid);
}
return b.build();
}
@@ -557,12 +521,6 @@ api::timestamp_type find_timestamp(const schema& s, const mutation& m) {
[&] (collection_mutation_view_description mview) {
t = mview.tomb.timestamp;
if (t != api::missing_timestamp) {
// A collection tombstone with timestamp T can be created with:
// UPDATE ks.t USING TIMESTAMP T + 1 SET X = null WHERE ...
// where X is a non-atomic column.
// This is, among others, the reason why we show it in the CDC log
// with cdc$time using timestamp T + 1 instead of T.
t += 1;
return stop_iteration::yes;
}
@@ -758,79 +716,17 @@ private:
const column_definition& _op_col;
const column_definition& _ttl_col;
ttl_opt _cdc_ttl_opt;
/**
* #6070, #6084
* Non-atomic column assignments which use a TTL are broken into two invocations
* of `transform`, such as in the following example:
* CREATE TABLE t (a int PRIMARY KEY, b map<int, int>) WITH cdc = {'enabled':true};
* UPDATE t USING TTL 5 SET b = {0:0} WHERE a = 0;
*
* The above UPDATE creates a tombstone and a (0, 0) cell; because tombstones don't have the notion
* of a TTL, we split the UPDATE into two separate changes (represented as two separate delta rows in the log,
* resulting in two invocations of `transform`): one change for the deletion with no TTL,
* and one change for adding cells with TTL = 5.
*
* In other words, we use the fact that
* UPDATE t USING TTL 5 SET b = {0:0} WHERE a = 0;
* is equivalent to
* BEGIN UNLOGGED BATCH
* UPDATE t SET b = null WHERE a = 0;
* UPDATE t USING TTL 5 SET b = b + {0:0} WHERE a = 0;
* APPLY BATCH;
* (the mutations are the same in both cases),
* and perform a separate `transform` call for each statement in the batch.
*
* An assignment also happens when an INSERT statement is used as follows:
* INSERT INTO t (a, b) VALUES (0, {0:0}) USING TTL 5;
* #6070
* When mutation splitting was added, non-atomic column assignments were broken
* into two invocation of transform. This means the second (actual data assignment)
* does not know about the tombstone in first one -> postimage is created as if
* we were _adding_ to the collection, not replacing it.
*
* This will be split into three separate changes (three invocations of `transform`):
* 1. One with TTL = 5 for the row marker (introduces by the INSERT), indicating that a row was inserted.
* 2. One without a TTL for the tombstone, indicating that the collection was cleared.
* 3. One with TTL = 5 for the addition of cell (0, 0), indicating that the collection
* was extended by a new key/value.
*
* Why do we need three changes and not two, like in the UPDATE case?
* The tombstone needs to be a separate change because it doesn't have a TTL,
* so only the row marker change could potentially be merged with the cell change (1 and 3 above).
* However, we cannot do that: the row marker change is of INSERT type (cdc$operation == cdc::operation::insert),
* but there is no way to create a statement that
* - has a row marker,
* - adds cells to a collection,
* - but *doesn't* add a tombstone for this collection.
* INSERT statements that modify collections *always* add tombstones.
*
* Merging the row marker with the cell addition would result in such an impossible statement.
*
* Instead, we observe that
* INSERT INTO t (a, b) VALUES (0, {0:0}) USING TTL 5;
* is equivalent to
* BEGIN UNLOGGED BATCH
* INSERT INTO t (a) VALUES (0) USING TTL 5;
* UPDATE t SET b = null WHERE a = 0;
* UPDATE t USING TTL 5 SET b = b + {0:0} WHERE a = 0;
* APPLY BATCH;
* and perform a separate `transform` call for each statement in the batch.
*
* Unfortunately, due to splitting, the cell addition call (b + b {0:0}) does not know about the tombstone.
* If it was performed independently from the tombstone call, it would create a wrong post-image:
* the post-image would look as if the previous cells still existed.
* For example, suppose that b was equal to {1:1} before the above statement was performed.
* Then the final post-image for b for above statement/batch would be {0:0, 1:1}, when instead it should be {0:0}.
*
* To handle this we use the fact that
* 1. changes without a TTL are treated as if TTL = 0,
* 2. `transform` is invoked in order of increasing TTLs,
* and we maintain state between `transform` invocations (`_non_atomic_column_deletes`).
*
* Thus, the tombstone call will happen *before* the cell addition call,
* so the cell addition call will know that there previously was a tombstone
* and create a correct post-image.
*
* Furthermore, `transform` calls for INSERT changes (i.e. with a row marker)
* happen before `transform` calls for UPDATE changes, so in the case of an INSERT
* which modifies a collection column as above, the row marker call will happen first;
* its post-image will still show {1:1} for the collection column. Good.
* Not pretty, but to handle this we use the knowledge that we always get
* invoked in timestamp order -> tombstone first, then assign.
* So we simply keep track of non-atomic columns deleted across calls
* and filter out preimage data post this.
*/
std::unordered_set<const column_definition*> _non_atomic_column_deletes;
@@ -1033,9 +929,6 @@ public:
: value.value().first_fragment()
;
value_callback(key, val, live);
if (value.is_live_and_has_ttl()) {
ttl = value.ttl();
}
}
};
@@ -1489,7 +1382,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
tracing::trace(tr_state, "CDC: Preimage not enabled for the table, not querying current value of {}", m.decorated_key());
}
return f.then([trans = std::move(trans), &mutations, idx, tr_state, &details] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
return f.then([trans = std::move(trans), &mutations, idx, tr_state = std::move(tr_state), &details] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
auto& m = mutations[idx];
auto& s = m.schema();
details.had_preimage |= s->cdc_options().preimage();

View File

@@ -75,7 +75,7 @@ class metadata;
/// CDC service will listen for schema changes and iff CDC is enabled/changed
/// create/modify/delete corresponding log tables etc as part of the schema change.
///
class cdc_service final : public async_sharded_service<cdc::cdc_service> {
class cdc_service {
class impl;
std::unique_ptr<impl> _impl;
public:

View File

@@ -30,16 +30,23 @@ struct atomic_column_update {
atomic_cell cell;
};
// see the comment inside `clustered_row_insert` for motivation for separating
// nonatomic deletions from nonatomic updates
struct nonatomic_column_deletion {
column_id id;
tombstone t;
};
struct nonatomic_column_update {
column_id id;
tombstone t; // optional
utils::chunked_vector<std::pair<bytes, atomic_cell>> cells;
};
struct static_row_update {
gc_clock::duration ttl;
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_update> nonatomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
std::vector<nonatomic_column_update> nonatomic_updates;
};
struct clustered_row_insert {
@@ -47,14 +54,19 @@ struct clustered_row_insert {
clustering_key key;
row_marker marker;
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_update> nonatomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
// INSERTs can't express updates of individual cells inside a non-atomic
// (without deleting the entire field first), so no `nonatomic_updates` field
// overwriting a nonatomic column inside an INSERT will be split into two changes:
// one with a nonatomic deletion, and one with a nonatomic update
};
struct clustered_row_update {
gc_clock::duration ttl;
clustering_key key;
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_update> nonatomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
std::vector<nonatomic_column_update> nonatomic_updates;
};
struct clustered_row_deletion {
@@ -83,7 +95,8 @@ using set_of_changes = std::map<api::timestamp_type, batch>;
struct row_update {
std::vector<atomic_column_update> atomic_entries;
std::vector<nonatomic_column_update> nonatomic_entries;
std::vector<nonatomic_column_deletion> nonatomic_deletions;
std::vector<nonatomic_column_update> nonatomic_updates;
};
static
@@ -109,7 +122,7 @@ extract_row_updates(const row& r, column_kind ckind, const schema& schema) {
v.timestamp(),
v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0)
);
auto& updates = result[timestamp_and_ttl].nonatomic_entries;
auto& updates = result[timestamp_and_ttl].nonatomic_updates;
if (updates.empty() || updates.back().id != id) {
updates.push_back({id, {}});
}
@@ -117,12 +130,8 @@ extract_row_updates(const row& r, column_kind ckind, const schema& schema) {
}
if (desc.tomb) {
auto timestamp_and_ttl = std::pair(desc.tomb.timestamp + 1, gc_clock::duration(0));
auto& updates = result[timestamp_and_ttl].nonatomic_entries;
if (updates.empty() || updates.back().id != id) {
updates.push_back({id, {}});
}
updates.back().t = std::move(desc.tomb);
auto timestamp_and_ttl = std::pair(desc.tomb.timestamp, gc_clock::duration(0));
result[timestamp_and_ttl].nonatomic_deletions.push_back({id, desc.tomb});
}
});
});
@@ -139,7 +148,8 @@ set_of_changes extract_changes(const mutation& base_mutation, const schema& base
res[timestamp].static_updates.push_back({
ttl,
std::move(up.atomic_entries),
std::move(up.nonatomic_entries)
std::move(up.nonatomic_deletions),
std::move(up.nonatomic_updates)
});
}
@@ -163,9 +173,6 @@ set_of_changes extract_changes(const mutation& base_mutation, const schema& base
};
for (auto& [k, up]: cr_updates) {
// It is important that changes in the resulting `set_of_changes` are listed
// in increasing TTL order. The reason is explained in a comment in cdc/log.cc,
// search for "#6070".
auto [timestamp, ttl] = k;
if (is_insert(timestamp, ttl)) {
@@ -174,70 +181,25 @@ set_of_changes extract_changes(const mutation& base_mutation, const schema& base
cr.key(),
marker,
std::move(up.atomic_entries),
{}
std::move(up.nonatomic_deletions)
});
auto& cr_insert = res[timestamp].clustered_inserts.back();
bool clustered_update_exists = false;
for (auto& nonatomic_up: up.nonatomic_entries) {
// Updating a collection column with an INSERT statement implies inserting a tombstone.
//
// For example, suppose that we have:
// CREATE TABLE t (a int primary key, b map<int, int>);
// Then the following statement:
// INSERT INTO t (a, b) VALUES (0, {0:0}) USING TIMESTAMP T;
// creates a tombstone in column b with timestamp T-1.
// It also creates a cell (0, 0) with timestamp T.
//
// There is no way to create just the cell using an INSERT statement.
// This can only be done using an UPDATE, as follows:
// UPDATE t USING TIMESTAMP T SET b = b + {0:0} WHERE a = 0;
// note that this is different than
// UPDATE t USING TIMESTAMP T SET b = {0:0} WHERE a = 0;
// which also creates a tombstone with timestamp T-1.
//
// It follows that:
// - if `nonatomic_up` has a tombstone, it can be made merged with our `cr_insert`,
// which represents an INSERT change.
// - but if `nonatomic_up` only has cells, we must create a separate UPDATE change
// for the cells alone.
if (nonatomic_up.t) {
cr_insert.nonatomic_entries.push_back(std::move(nonatomic_up));
} else {
if (!clustered_update_exists) {
res[timestamp].clustered_updates.push_back({
ttl,
cr.key(),
{},
{}
});
// Multiple iterations of this `for` loop (for different collection columns)
// might want to put their `nonatomic_up`s into an UPDATE change;
// but we don't want to create a separate change for each of them, reusing one instead.
//
// Example:
// CREATE TABLE t (a int primary key, b map<int, int>, c map <int, int>) with cdc = {'enabled':true};
// insert into t (a, b, c) values (0, {1:1}, {2:2}) USING TTL 5;
//
// this should create 3 delta rows:
// 1. one for the row marker (indicating an INSERT), with TTL 5
// 2. one for the b and c tombstones, without TTL (cdc$ttl = null)
// 3. one for the b and c cells, with TTL 5
// This logic takes care that b cells and c cells are put into a single change (3. above).
clustered_update_exists = true;
}
auto& cr_update = res[timestamp].clustered_updates.back();
cr_update.nonatomic_entries.push_back(std::move(nonatomic_up));
}
if (!up.nonatomic_updates.empty()) {
// nonatomic updates cannot be expressed with an INSERT.
res[timestamp].clustered_updates.push_back({
ttl,
cr.key(),
{},
{},
std::move(up.nonatomic_updates)
});
}
} else {
res[timestamp].clustered_updates.push_back({
ttl,
cr.key(),
std::move(up.atomic_entries),
std::move(up.nonatomic_entries)
std::move(up.nonatomic_deletions),
std::move(up.nonatomic_updates)
});
}
}
@@ -309,7 +271,7 @@ bool should_split(const mutation& base_mutation, const schema& base_schema) {
}
if (desc.tomb) {
if (check_or_set(desc.tomb.timestamp + 1, gc_clock::duration(0))) {
if (check_or_set(desc.tomb.timestamp, gc_clock::duration(0))) {
should_split = true;
return;
}
@@ -364,7 +326,7 @@ bool should_split(const mutation& base_mutation, const schema& base_schema) {
}
if (mview.tomb) {
if (check_or_set(mview.tomb.timestamp + 1, gc_clock::duration(0))) {
if (check_or_set(mview.tomb.timestamp, gc_clock::duration(0))) {
should_split = true;
return;
}
@@ -430,9 +392,13 @@ void for_each_change(const mutation& base_mutation, const schema_ptr& base_schem
auto& cdef = base_schema->column_at(column_kind::static_column, atomic_update.id);
m.set_static_cell(cdef, std::move(atomic_update.cell));
}
for (auto& nonatomic_update : sr_update.nonatomic_entries) {
for (auto& nonatomic_delete : sr_update.nonatomic_deletions) {
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_delete.id);
m.set_static_cell(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
}
for (auto& nonatomic_update : sr_update.nonatomic_updates) {
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_update.id);
m.set_static_cell(cdef, collection_mutation_description{nonatomic_update.t, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
m.set_static_cell(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
}
f(std::move(m), change_ts, tuuid, batch_no);
}
@@ -445,9 +411,9 @@ void for_each_change(const mutation& base_mutation, const schema_ptr& base_schem
auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id);
row.cells().apply(cdef, std::move(atomic_update.cell));
}
for (auto& nonatomic_update : cr_insert.nonatomic_entries) {
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_update.id);
row.cells().apply(cdef, collection_mutation_description{nonatomic_update.t, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
for (auto& nonatomic_delete : cr_insert.nonatomic_deletions) {
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id);
row.cells().apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
}
row.apply(cr_insert.marker);
@@ -462,9 +428,13 @@ void for_each_change(const mutation& base_mutation, const schema_ptr& base_schem
auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id);
row.apply(cdef, std::move(atomic_update.cell));
}
for (auto& nonatomic_update : cr_update.nonatomic_entries) {
for (auto& nonatomic_delete : cr_update.nonatomic_deletions) {
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id);
row.apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
}
for (auto& nonatomic_update : cr_update.nonatomic_updates) {
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_update.id);
row.apply(cdef, collection_mutation_description{nonatomic_update.t, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
row.apply(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
}
f(std::move(m), change_ts, tuuid, batch_no);

View File

@@ -122,26 +122,26 @@ public:
return {_empty_prefix, bound_kind::incl_end};
}
template<template<typename> typename R>
requires Range<R, clustering_key_prefix_view>
GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
static bound_view from_range_start(const R<clustering_key_prefix>& range) {
return range.start()
? bound_view(range.start()->value(), range.start()->is_inclusive() ? bound_kind::incl_start : bound_kind::excl_start)
: bottom();
}
template<template<typename> typename R>
requires Range<R, clustering_key_prefix>
GCC6_CONCEPT( requires Range<R, clustering_key_prefix> )
static bound_view from_range_end(const R<clustering_key_prefix>& range) {
return range.end()
? bound_view(range.end()->value(), range.end()->is_inclusive() ? bound_kind::incl_end : bound_kind::excl_end)
: top();
}
template<template<typename> typename R>
requires Range<R, clustering_key_prefix>
GCC6_CONCEPT( requires Range<R, clustering_key_prefix> )
static std::pair<bound_view, bound_view> from_range(const R<clustering_key_prefix>& range) {
return {from_range_start(range), from_range_end(range)};
}
template<template<typename> typename R>
requires Range<R, clustering_key_prefix_view>
GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
static std::optional<typename R<clustering_key_prefix_view>::bound> to_range_bound(const bound_view& bv) {
if (&bv._prefix.get() == &_empty_prefix) {
return {};

View File

@@ -61,7 +61,7 @@ bool collection_mutation_view::is_empty() const {
}
template <typename F>
requires std::is_invocable_r_v<const data::type_info&, F, collection_mutation_input_stream&>
GCC6_CONCEPT(requires std::is_invocable_r_v<const data::type_info&, F, collection_mutation_input_stream&>)
static bool is_any_live(const atomic_cell_value_view& data, tombstone tomb, gc_clock::time_point now, F&& read_cell_type_info) {
auto in = collection_mutation_input_stream(data);
auto has_tomb = in.read_trivial<bool>();
@@ -108,7 +108,7 @@ bool collection_mutation_view::is_any_live(const abstract_type& type, tombstone
}
template <typename F>
requires std::is_invocable_r_v<const data::type_info&, F, collection_mutation_input_stream&>
GCC6_CONCEPT(requires std::is_invocable_r_v<const data::type_info&, F, collection_mutation_input_stream&>)
static api::timestamp_type last_update(const atomic_cell_value_view& data, F&& read_cell_type_info) {
auto in = collection_mutation_input_stream(data);
api::timestamp_type max = api::missing_timestamp;
@@ -313,7 +313,7 @@ collection_mutation collection_mutation_view_description::serialize(const abstra
}
template <typename C>
requires std::is_base_of_v<abstract_type, std::remove_reference_t<C>>
GCC6_CONCEPT(requires std::is_base_of_v<abstract_type, std::remove_reference_t<C>>)
static collection_mutation_view_description
merge(collection_mutation_view_description a, collection_mutation_view_description b, C&& key_type) {
using element_type = std::pair<bytes_view, atomic_cell_view>;
@@ -375,7 +375,7 @@ collection_mutation merge(const abstract_type& type, collection_mutation_view a,
}
template <typename C>
requires std::is_base_of_v<abstract_type, std::remove_reference_t<C>>
GCC6_CONCEPT(requires std::is_base_of_v<abstract_type, std::remove_reference_t<C>>)
static collection_mutation_view_description
difference(collection_mutation_view_description a, collection_mutation_view_description b, C&& key_type)
{
@@ -421,7 +421,7 @@ collection_mutation difference(const abstract_type& type, collection_mutation_vi
}
template <typename F>
requires std::is_invocable_r_v<std::pair<bytes_view, atomic_cell_view>, F, collection_mutation_input_stream&>
GCC6_CONCEPT(requires std::is_invocable_r_v<std::pair<bytes_view, atomic_cell_view>, F, collection_mutation_input_stream&>)
static collection_mutation_view_description
deserialize_collection_mutation(collection_mutation_input_stream& in, F&& read_kv) {
collection_mutation_view_description ret;

View File

@@ -23,13 +23,11 @@
#include <seastar/core/future.hh>
#include <seastar/util/noncopyable_function.hh>
#include <seastar/core/file.hh>
#include "schema_fwd.hh"
#include "sstables/shared_sstable.hh"
#include "exceptions/exceptions.hh"
#include "sstables/compaction_backlog_manager.hh"
#include "compaction_strategy_type.hh"
class table;
using column_family = table;
@@ -39,6 +37,15 @@ struct mutation_source_metadata;
namespace sstables {
enum class compaction_strategy_type {
null,
major,
size_tiered,
leveled,
date_tiered,
time_window,
};
class compaction_strategy_impl;
class sstable;
class sstable_set;
@@ -63,6 +70,8 @@ public:
compaction_descriptor get_major_compaction_job(column_family& cf, std::vector<shared_sstable> candidates);
std::vector<resharding_descriptor> get_resharding_jobs(column_family& cf, std::vector<shared_sstable> candidates);
// Some strategies may look at the compacted and resulting sstables to
// get some useful information for subsequent compactions.
void notify_completion(const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added);
@@ -134,20 +143,6 @@ public:
// Returns whether or not interposer consumer is used by a given strategy.
bool use_interposer_consumer() const;
// Informs the caller (usually the compaction manager) about what would it take for this set of
// SSTables closer to becoming in-strategy. If this returns an empty compaction descriptor, this
// means that the sstable set is already in-strategy.
//
// The caller can specify one of two modes: strict or relaxed. In relaxed mode the tolerance for
// what is considered offstrategy is higher. It can be used, for instance, for when the system
// is restarting and previous compactions were likely in-flight. In strict mode, we are less
// tolerant to invariant breakages.
//
// The caller should also pass a maximum number of SSTables which is the maximum amount of
// SSTables that can be added into a single job.
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode);
};
// Creates a compaction_strategy object from one of the strategies available.

View File

@@ -1,36 +0,0 @@
/*
* Copyright (C) 2020 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace sstables {
enum class compaction_strategy_type {
null,
major,
size_tiered,
leveled,
date_tiered,
time_window,
};
enum class reshape_mode { strict, relaxed };
}

View File

@@ -29,6 +29,7 @@
#include <boost/range/adaptor/transformed.hpp>
#include "utils/serialization.hh"
#include <seastar/util/backtrace.hh>
#include "unimplemented.hh"
enum class allow_prefixes { no, yes };
@@ -90,7 +91,7 @@ private:
return len;
}
public:
bytes serialize_single(bytes&& v) const {
bytes serialize_single(bytes&& v) {
return serialize_value({std::move(v)});
}
template<typename RangeOfSerializedComponents>
@@ -108,7 +109,7 @@ public:
static bytes serialize_value(std::initializer_list<T> values) {
return serialize_value(boost::make_iterator_range(values.begin(), values.end()));
}
bytes serialize_optionals(const std::vector<bytes_opt>& values) const {
bytes serialize_optionals(const std::vector<bytes_opt>& values) {
return serialize_value(values | boost::adaptors::transformed([] (const bytes_opt& bo) -> bytes_view {
if (!bo) {
throw std::logic_error("attempted to create key component from empty optional");
@@ -116,7 +117,7 @@ public:
return *bo;
}));
}
bytes serialize_value_deep(const std::vector<data_value>& values) const {
bytes serialize_value_deep(const std::vector<data_value>& values) {
// TODO: Optimize
std::vector<bytes> partial;
partial.reserve(values.size());
@@ -127,7 +128,7 @@ public:
}
return serialize_value(partial);
}
bytes decompose_value(const value_type& values) const {
bytes decompose_value(const value_type& values) {
return serialize_value(values);
}
class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
@@ -179,7 +180,7 @@ public:
static boost::iterator_range<iterator> components(const bytes_view& v) {
return { begin(v), end(v) };
}
value_type deserialize_value(bytes_view v) const {
value_type deserialize_value(bytes_view v) {
std::vector<bytes> result;
result.reserve(_types.size());
std::transform(begin(v), end(v), std::back_inserter(result), [] (auto&& v) {
@@ -187,10 +188,10 @@ public:
});
return result;
}
bool less(bytes_view b1, bytes_view b2) const {
bool less(bytes_view b1, bytes_view b2) {
return compare(b1, b2) < 0;
}
size_t hash(bytes_view v) const {
size_t hash(bytes_view v) {
if (_byte_order_equal) {
return std::hash<bytes_view>()(v);
}
@@ -202,7 +203,7 @@ public:
}
return h;
}
int compare(bytes_view b1, bytes_view b2) const {
int compare(bytes_view b1, bytes_view b2) {
if (_byte_order_comparable) {
if (_is_reversed) {
return compare_unsigned(b2, b1);
@@ -223,21 +224,11 @@ public:
bool is_empty(bytes_view v) const {
return begin(v) == end(v);
}
void validate(bytes_view v) const {
std::vector<bytes_view> values(begin(v), end(v));
if (AllowPrefixes == allow_prefixes::no && values.size() < _types.size()) {
throw marshal_exception(fmt::format("compound::validate(): non-prefixable compound cannot be a prefix"));
}
if (values.size() > _types.size()) {
throw marshal_exception(fmt::format("compound::validate(): cannot have more values than types, have {} values but only {} types",
values.size(), _types.size()));
}
for (size_t i = 0; i != values.size(); ++i) {
//FIXME: is it safe to assume internal serialization-format format?
_types[i]->validate(values[i], cql_serialization_format::internal());
}
void validate(bytes_view v) {
// FIXME: implement
warn(unimplemented::cause::VALIDATION);
}
bool equal(bytes_view v1, bytes_view v2) const {
bool equal(bytes_view v1, bytes_view v2) {
if (_byte_order_equal) {
return compare_unsigned(v1, v2) == 0;
}

View File

@@ -213,8 +213,6 @@ public:
, _is_compound(true)
{ }
explicit composite(const composite_view& v);
composite()
: _bytes()
, _is_compound(true)
@@ -505,7 +503,6 @@ public:
};
class composite_view final {
friend class composite;
bytes_view _bytes;
bool _is_compound;
public:
@@ -605,11 +602,6 @@ public:
}
};
inline
composite::composite(const composite_view& v)
: composite(bytes(v._bytes), v._is_compound)
{ }
inline
std::ostream& operator<<(std::ostream& os, const composite& v) {
return os << composite_view(v);

View File

@@ -152,39 +152,41 @@ struct uuid_type_impl final : public concrete_type<utils::UUID> {
template <typename Func> using visit_ret_type = std::invoke_result_t<Func, const ascii_type_impl&>;
template <typename Func> concept CanHandleAllTypes = requires(Func f) {
{ f(*static_cast<const ascii_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const boolean_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const byte_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const bytes_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const counter_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const date_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const decimal_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const double_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const duration_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const empty_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const float_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const inet_addr_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const int32_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const list_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const long_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const map_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const reversed_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const set_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const short_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const simple_date_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const time_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const timestamp_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const timeuuid_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const tuple_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const user_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const utf8_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const uuid_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
{ f(*static_cast<const varint_type_impl*>(nullptr)) } -> std::same_as<visit_ret_type<Func>>;
GCC6_CONCEPT(
template <typename Func> concept bool CanHandleAllTypes = requires(Func f) {
{ f(*static_cast<const ascii_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const boolean_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const byte_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const bytes_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const counter_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const date_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const decimal_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const double_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const duration_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const empty_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const float_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const inet_addr_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const int32_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const list_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const long_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const map_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const reversed_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const set_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const short_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const simple_date_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const time_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const timestamp_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const timeuuid_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const tuple_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const user_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const utf8_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const uuid_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
{ f(*static_cast<const varint_type_impl*>(nullptr)) } -> visit_ret_type<Func>;
};
)
template<typename Func>
requires CanHandleAllTypes<Func>
GCC6_CONCEPT(requires CanHandleAllTypes<Func>)
static inline visit_ret_type<Func> visit(const abstract_type& t, Func&& f) {
switch (t.get_kind()) {
case abstract_type::kind::ascii:

View File

@@ -32,8 +32,6 @@ import tempfile
import textwrap
from distutils.spawn import find_executable
curdir = os.getcwd()
tempfile.tempdir = "./build/tmp"
configure_args = str.join(' ', [shlex.quote(x) for x in sys.argv[1:]])
@@ -168,27 +166,9 @@ def maybe_static(flag, libs):
return libs
class Source(object):
def __init__(self, source, hh_prefix, cc_prefix):
self.source = source
self.hh_prefix = hh_prefix
self.cc_prefix = cc_prefix
def headers(self, gen_dir):
return [x for x in self.generated(gen_dir) if x.endswith(self.hh_prefix)]
def sources(self, gen_dir):
return [x for x in self.generated(gen_dir) if x.endswith(self.cc_prefix)]
def objects(self, gen_dir):
return [x.replace(self.cc_prefix, '.o') for x in self.sources(gen_dir)]
def endswith(self, end):
return self.source.endswith(end)
class Thrift(Source):
class Thrift(object):
def __init__(self, source, service):
Source.__init__(self, source, '.h', '.cpp')
self.source = source
self.service = service
def generated(self, gen_dir):
@@ -199,6 +179,19 @@ class Thrift(Source):
for ext in ['.cpp', '.h']]
return [os.path.join(gen_dir, file) for file in files]
def headers(self, gen_dir):
return [x for x in self.generated(gen_dir) if x.endswith('.h')]
def sources(self, gen_dir):
return [x for x in self.generated(gen_dir) if x.endswith('.cpp')]
def objects(self, gen_dir):
return [x.replace('.cpp', '.o') for x in self.sources(gen_dir)]
def endswith(self, end):
return self.source.endswith(end)
def default_target_arch():
if platform.machine() in ['i386', 'i686', 'x86_64']:
return 'westmere' # support PCLMUL
@@ -208,9 +201,9 @@ def default_target_arch():
return ''
class Antlr3Grammar(Source):
class Antlr3Grammar(object):
def __init__(self, source):
Source.__init__(self, source, '.hpp', '.cpp')
self.source = source
def generated(self, gen_dir):
basename = os.path.splitext(self.source)[0]
@@ -218,12 +211,18 @@ class Antlr3Grammar(Source):
for ext in ['Lexer.cpp', 'Lexer.hpp', 'Parser.cpp', 'Parser.hpp']]
return [os.path.join(gen_dir, file) for file in files]
class Json2Code(Source):
def __init__(self, source):
Source.__init__(self, source, '.hh', '.cc')
def headers(self, gen_dir):
return [x for x in self.generated(gen_dir) if x.endswith('.hpp')]
def sources(self, gen_dir):
return [x for x in self.generated(gen_dir) if x.endswith('.cpp')]
def objects(self, gen_dir):
return [x.replace('.cpp', '.o') for x in self.sources(gen_dir)]
def endswith(self, end):
return self.source.endswith(end)
def generated(self, gen_dir):
return [os.path.join(gen_dir, self.source + '.hh'), os.path.join(gen_dir, self.source + '.cc')]
def find_headers(repodir, excluded_dirs):
walker = os.walk(repodir)
@@ -249,7 +248,7 @@ def find_headers(repodir, excluded_dirs):
modes = {
'debug': {
'cxxflags': '-DDEBUG -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
'cxxflags': '-DDEBUG -DDEBUG_LSA_SANITIZER -DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
'cxx_ld_flags': '-Wstack-usage=%s' % (1024*40),
},
'release': {
@@ -270,7 +269,6 @@ scylla_tests = set([
'test/boost/UUID_test',
'test/boost/aggregate_fcts_test',
'test/boost/allocation_strategy_test',
'test/boost/alternator_base64_test',
'test/boost/anchorless_list_test',
'test/boost/auth_passwords_test',
'test/boost/auth_resource_test',
@@ -280,7 +278,6 @@ scylla_tests = set([
'test/boost/broken_sstable_test',
'test/boost/bytes_ostream_test',
'test/boost/cache_flat_mutation_reader_test',
'test/boost/cached_file_test',
'test/boost/caching_options_test',
'test/boost/canonical_mutation_test',
'test/boost/cartesian_product_test',
@@ -329,7 +326,6 @@ scylla_tests = set([
'test/boost/linearizing_input_stream_test',
'test/boost/loading_cache_test',
'test/boost/log_heap_test',
'test/boost/estimated_histogram_test',
'test/boost/logalloc_test',
'test/boost/managed_vector_test',
'test/boost/map_difference_test',
@@ -369,7 +365,6 @@ scylla_tests = set([
'test/boost/schema_changes_test',
'test/boost/sstable_conforms_to_mutation_source_test',
'test/boost/sstable_resharding_test',
'test/boost/sstable_directory_test',
'test/boost/sstable_test',
'test/boost/storage_proxy_test',
'test/boost/top_k_test',
@@ -419,13 +414,12 @@ perf_tests = set([
'test/perf/perf_mutation_fragment',
'test/perf/perf_idl',
'test/perf/perf_vint',
'test/perf/perf_big_decimal',
])
apps = set([
'scylla',
'test/tools/cql_repl',
'tools/scylla-types',
'tools/scylla_types',
])
tests = scylla_tests | perf_tests
@@ -459,8 +453,8 @@ arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc'
help='C compiler path')
arg_parser.add_argument('--with-osv', action='store', dest='with_osv', default='',
help='Shortcut for compile for OSv')
add_tristate(arg_parser, name='dpdk', dest='dpdk',
help='Use dpdk (from seastar dpdk sources) (default=True for release builds)')
arg_parser.add_argument('--enable-dpdk', action='store_true', dest='dpdk', default=False,
help='Enable dpdk (from seastar dpdk sources)')
arg_parser.add_argument('--dpdk-target', action='store', dest='dpdk_target', default='',
help='Path to DPDK SDK target location (e.g. <DPDK SDK dir>/x86_64-native-linuxapp-gcc)')
arg_parser.add_argument('--debuginfo', action='store', dest='debuginfo', type=int, default=1,
@@ -479,6 +473,8 @@ arg_parser.add_argument('--python', action='store', dest='python', default='pyth
help='Python3 path')
arg_parser.add_argument('--split-dwarf', dest='split_dwarf', action='store_true', default=False,
help='use of split dwarf (https://gcc.gnu.org/wiki/DebugFission) to speed up linking')
arg_parser.add_argument('--enable-gcc6-concepts', dest='gcc6_concepts', action='store_true', default=False,
help='enable experimental support for C++ Concepts as implemented in GCC 6')
arg_parser.add_argument('--enable-alloc-failure-injector', dest='alloc_failure_injector', action='store_true', default=False,
help='enable allocation failure injection')
arg_parser.add_argument('--with-antlr3', dest='antlr3_exec', action='store', default=None,
@@ -497,7 +493,6 @@ extra_cxxflags = {}
cassandra_interface = Thrift(source='interface/cassandra.thrift', service='Cassandra')
scylla_core = (['database.cc',
'absl-flat_hash_map.cc',
'table.cc',
'atomic_cell.cc',
'collection_mutation.cc',
@@ -516,13 +511,13 @@ scylla_core = (['database.cc',
'frozen_mutation.cc',
'memtable.cc',
'schema_mutations.cc',
'supervisor.cc',
'utils/logalloc.cc',
'utils/large_bitset.cc',
'utils/buffer_input_stream.cc',
'utils/limiting_data_source.cc',
'utils/updateable_value.cc',
'utils/directories.cc',
'utils/generation-number.cc',
'mutation_partition.cc',
'mutation_partition_view.cc',
'mutation_partition_serializer.cc',
@@ -551,11 +546,9 @@ scylla_core = (['database.cc',
'sstables/integrity_checked_file_impl.cc',
'sstables/prepended_input_stream.cc',
'sstables/m_format_read_helpers.cc',
'sstables/sstable_directory.cc',
'transport/event.cc',
'transport/event_notifier.cc',
'transport/server.cc',
'transport/controller.cc',
'transport/messages/result_message.cc',
'cdc/cdc_partitioner.cc',
'cdc/log.cc',
@@ -578,7 +571,6 @@ scylla_core = (['database.cc',
'cql3/functions/functions.cc',
'cql3/functions/aggregate_fcts.cc',
'cql3/functions/castas_fcts.cc',
'cql3/functions/error_injection_fcts.cc',
'cql3/statements/cf_prop_defs.cc',
'cql3/statements/cf_statement.cc',
'cql3/statements/authentication_statement.cc',
@@ -625,7 +617,6 @@ scylla_core = (['database.cc',
'cql3/role_name.cc',
'thrift/handler.cc',
'thrift/server.cc',
'thrift/controller.cc',
'thrift/thrift_validation.cc',
'utils/runtime.cc',
'utils/murmur_hash.cc',
@@ -683,7 +674,6 @@ scylla_core = (['database.cc',
'db/view/view.cc',
'db/view/view_update_generator.cc',
'db/view/row_locking.cc',
'db/sstables-format-selector.cc',
'index/secondary_index_manager.cc',
'index/secondary_index.cc',
'utils/UUID_gen.cc',
@@ -805,41 +795,41 @@ scylla_core = (['database.cc',
)
api = ['api/api.cc',
Json2Code('api/api-doc/storage_service.json'),
Json2Code('api/api-doc/lsa.json'),
'api/api-doc/storage_service.json',
'api/api-doc/lsa.json',
'api/storage_service.cc',
Json2Code('api/api-doc/commitlog.json'),
'api/api-doc/commitlog.json',
'api/commitlog.cc',
Json2Code('api/api-doc/gossiper.json'),
'api/api-doc/gossiper.json',
'api/gossiper.cc',
Json2Code('api/api-doc/failure_detector.json'),
'api/api-doc/failure_detector.json',
'api/failure_detector.cc',
Json2Code('api/api-doc/column_family.json'),
'api/api-doc/column_family.json',
'api/column_family.cc',
'api/messaging_service.cc',
Json2Code('api/api-doc/messaging_service.json'),
Json2Code('api/api-doc/storage_proxy.json'),
'api/api-doc/messaging_service.json',
'api/api-doc/storage_proxy.json',
'api/storage_proxy.cc',
Json2Code('api/api-doc/cache_service.json'),
'api/api-doc/cache_service.json',
'api/cache_service.cc',
Json2Code('api/api-doc/collectd.json'),
'api/api-doc/collectd.json',
'api/collectd.cc',
Json2Code('api/api-doc/endpoint_snitch_info.json'),
'api/api-doc/endpoint_snitch_info.json',
'api/endpoint_snitch.cc',
Json2Code('api/api-doc/compaction_manager.json'),
'api/api-doc/compaction_manager.json',
'api/compaction_manager.cc',
Json2Code('api/api-doc/hinted_handoff.json'),
'api/api-doc/hinted_handoff.json',
'api/hinted_handoff.cc',
Json2Code('api/api-doc/utils.json'),
'api/api-doc/utils.json',
'api/lsa.cc',
Json2Code('api/api-doc/stream_manager.json'),
'api/api-doc/stream_manager.json',
'api/stream_manager.cc',
Json2Code('api/api-doc/system.json'),
'api/api-doc/system.json',
'api/system.cc',
'api/config.cc',
Json2Code('api/api-doc/config.json'),
'api/error_injection.cc',
Json2Code('api/api-doc/error_injection.json'),
'api/api-doc/config.json',
'api/error_injection.cc',
'api/api-doc/error_injection.json',
]
alternator = [
@@ -905,8 +895,6 @@ scylla_tests_generic_dependencies = [
'test/lib/cql_test_env.cc',
'test/lib/test_services.cc',
'test/lib/log.cc',
'test/lib/reader_permit.cc',
'test/lib/test_utils.cc',
]
scylla_tests_dependencies = scylla_core + idls + scylla_tests_generic_dependencies + [
@@ -923,7 +911,7 @@ deps = {
'scylla': idls + ['main.cc', 'release.cc', 'build_id.cc'] + scylla_core + api + alternator + redis,
'test/tools/cql_repl': idls + ['test/tools/cql_repl.cc'] + scylla_core + scylla_tests_generic_dependencies,
#FIXME: we don't need all of scylla_core here, only the types module, need to modularize scylla_core.
'tools/scylla-types': idls + ['tools/scylla-types.cc'] + scylla_core,
'tools/scylla_types': idls + ['tools/scylla_types.cc'] + scylla_core,
}
pure_boost_tests = set([
@@ -962,7 +950,6 @@ pure_boost_tests = set([
])
tests_not_using_seastar_test_framework = set([
'test/boost/alternator_base64_test',
'test/boost/small_vector_test',
'test/manual/gossip',
'test/manual/message',
@@ -1013,7 +1000,6 @@ deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc',
deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
deps['test/boost/estimated_histogram_test'] = ['test/boost/estimated_histogram_test.cc']
deps['test/boost/anchorless_list_test'] = ['test/boost/anchorless_list_test.cc']
deps['test/perf/perf_fast_forward'] += ['release.cc']
deps['test/perf/perf_simple_query'] += ['release.cc']
@@ -1033,7 +1019,6 @@ deps['test/boost/linearizing_input_stream_test'] = [
]
deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
deps['test/boost/alternator_base64_test'] += ['alternator/base64.cc']
deps['utils/gz/gen_crc_combine_table'] = ['utils/gz/gen_crc_combine_table.cc']
@@ -1096,14 +1081,34 @@ else:
# a list element means a list of alternative packages to consider
# the first element becomes the HAVE_pkg define
# a string element is a package name with no alternatives
optional_packages = [[]]
optional_packages = [['libsystemd', 'libsystemd-daemon']]
pkgs = []
# Lua can be provided by lua53 package on Debian-like
# systems and by Lua on others.
pkgs.append('lua53' if have_pkg('lua53') else 'lua')
pkgs.append('libsystemd')
def setup_first_pkg_of_list(pkglist):
# The HAVE_pkg symbol is taken from the first alternative
upkg = pkglist[0].upper().replace('-', '_')
for pkg in pkglist:
if have_pkg(pkg):
pkgs.append(pkg)
defines.append('HAVE_{}=1'.format(upkg))
return True
return False
for pkglist in optional_packages:
if isinstance(pkglist, str):
pkglist = [pkglist]
if not setup_first_pkg_of_list(pkglist):
if len(pkglist) == 1:
print('Missing optional package {pkglist[0]}'.format(**locals()))
else:
alternatives = ':'.join(pkglist[1:])
print('Missing optional package {pkglist[0]} (or alteratives {alternatives})'.format(**locals()))
compiler_test_src = '''
@@ -1176,24 +1181,8 @@ extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\
for m in ['debug', 'release', 'sanitize']:
modes[m]['cxxflags'] += ' ' + dbgflag
# The relocatable package includes its own dynamic linker. We don't
# know the path it will be installed to, so for now use a very long
# path so that patchelf doesn't need to edit the program headers. The
# kernel imposes a limit of 4096 bytes including the null. The other
# constraint is that the build-id has to be in the first page, so we
# can't use all 4096 bytes for the dynamic linker.
# In here we just guess that 2000 extra / should be enough to cover
# any path we get installed to but not so large that the build-id is
# pushed to the second page.
# At the end of the build we check that the build-id is indeed in the
# first page. At install time we check that patchelf doesn't modify
# the program headers.
gcc_linker_output = subprocess.check_output(['gcc', '-###', '/dev/null', '-o', 't'], stderr=subprocess.STDOUT).decode('utf-8')
original_dynamic_linker = re.search('-dynamic-linker ([^ ]*)', gcc_linker_output).groups()[0]
# gdb has a SO_NAME_MAX_PATH_SIZE of 512, so limit the path size to
# that. The 512 includes the null at the end, hence the 511 bellow.
dynamic_linker = '/' * (511 - len(original_dynamic_linker)) + original_dynamic_linker
get_dynamic_linker_output = subprocess.check_output(['./reloc/get-dynamic-linker.sh'], shell=True)
dynamic_linker = get_dynamic_linker_output.decode('utf-8').strip()
forced_ldflags = '-Wl,'
@@ -1209,14 +1198,13 @@ args.user_ldflags = forced_ldflags + ' ' + args.user_ldflags
args.user_cflags += ' -Wno-error=stack-usage='
args.user_cflags += f"-ffile-prefix-map={curdir}=."
seastar_cflags = args.user_cflags
if args.target != '':
seastar_cflags += ' -march=' + args.target
seastar_ldflags = args.user_ldflags
libdeflate_cflags = seastar_cflags
zstd_cflags = seastar_cflags + ' -Wno-implicit-fallthrough'
MODE_TO_CMAKE_BUILD_TYPE = {'release' : 'RelWithDebInfo', 'debug' : 'Debug', 'dev' : 'Dev', 'sanitize' : 'Sanitize' }
@@ -1230,8 +1218,8 @@ def configure_seastar(build_dir, mode):
'-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
'-DSeastar_CXX_FLAGS={}'.format((seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']).replace(' ', ';')),
'-DSeastar_LD_FLAGS={}'.format(seastar_ldflags),
'-DSeastar_CXX_DIALECT=gnu++20',
'-DSeastar_API_LEVEL=4',
'-DSeastar_CXX_DIALECT=gnu++17',
'-DSeastar_STD_OPTIONAL_VARIANT_STRINGVIEW=ON',
'-DSeastar_UNUSED_RESULT_ERROR=ON',
]
@@ -1239,11 +1227,10 @@ def configure_seastar(build_dir, mode):
stack_guards = 'ON' if args.stack_guards else 'OFF'
seastar_cmake_args += ['-DSeastar_STACK_GUARDS={}'.format(stack_guards)]
dpdk = args.dpdk
if dpdk is None:
dpdk = mode == 'release'
if dpdk:
if args.dpdk:
seastar_cmake_args += ['-DSeastar_DPDK=ON', '-DSeastar_DPDK_MACHINE=wsm']
if args.gcc6_concepts:
seastar_cmake_args += ['-DSeastar_GCC6_CONCEPTS=ON']
if args.split_dwarf:
seastar_cmake_args += ['-DSeastar_SPLIT_DWARF=ON']
if args.alloc_failure_injector:
@@ -1251,7 +1238,7 @@ def configure_seastar(build_dir, mode):
seastar_cmd = ['cmake', '-G', 'Ninja', os.path.relpath(args.seastar_path, seastar_build_dir)] + seastar_cmake_args
cmake_dir = seastar_build_dir
if dpdk:
if args.dpdk:
# need to cook first
cmake_dir = args.seastar_path # required by cooking.sh
relative_seastar_build_dir = os.path.join('..', seastar_build_dir) # relative to seastar/
@@ -1284,6 +1271,25 @@ for mode in build_modes:
modes[mode]['seastar_cflags'] = seastar_pc_cflags
modes[mode]['seastar_libs'] = seastar_pc_libs
# We need to use experimental features of the zstd library (to use our own allocators for the (de)compression context),
# which are available only when the library is linked statically.
def configure_zstd(build_dir, mode):
zstd_build_dir = os.path.join(build_dir, mode, 'zstd')
zstd_cmake_args = [
'-DCMAKE_BUILD_TYPE={}'.format(MODE_TO_CMAKE_BUILD_TYPE[mode]),
'-DCMAKE_C_COMPILER={}'.format(args.cc),
'-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
'-DCMAKE_C_FLAGS={}'.format(zstd_cflags),
'-DZSTD_BUILD_PROGRAMS=OFF'
]
zstd_cmd = ['cmake', '-G', 'Ninja', os.path.relpath('zstd/build/cmake', zstd_build_dir)] + zstd_cmake_args
print(zstd_cmd)
os.makedirs(zstd_build_dir, exist_ok=True)
subprocess.check_call(zstd_cmd, shell=False, cwd=zstd_build_dir)
def configure_abseil(build_dir, mode):
abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
@@ -1328,9 +1334,6 @@ args.user_cflags += " " + pkg_config('jsoncpp', '--cflags')
args.user_cflags += ' -march=' + args.target
libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-llz4', '-lz', '-lsnappy', pkg_config('jsoncpp', '--libs'),
' -lstdc++fs', ' -lcrypt', ' -lcryptopp', ' -lpthread',
# Must link with static version of libzstd, since
# experimental APIs that we use are only present there.
maybe_static(True, '-lzstd'),
maybe_static(args.staticboost, '-lboost_date_time -lboost_regex -licuuc'), ])
pkgconfig_libs = [
@@ -1385,6 +1388,9 @@ if args.ragel_exec:
else:
ragel_exec = "ragel"
for mode in build_modes:
configure_zstd(outdir, mode)
for mode in build_modes:
configure_abseil(outdir, mode)
@@ -1411,7 +1417,7 @@ with open(buildfile_tmp, 'w') as f:
command = echo -e $text > $out
description = GEN $out
rule swagger
command = {args.seastar_path}/scripts/seastar-json2code.py --create-cc -f $in -o $out
command = {args.seastar_path}/scripts/seastar-json2code.py -f $in -o $out
description = SWAGGER $out
rule serializer
command = {python} ./idl-compiler.py --ns ser -f $in -o $out
@@ -1433,10 +1439,6 @@ with open(buildfile_tmp, 'w') as f:
description = COPY $out
rule package
command = scripts/create-relocatable-package.py --mode $mode $out
rule rpmbuild
command = reloc/build_rpm.sh --reloc-pkg $in --builddir $out
rule debbuild
command = reloc/build_deb.sh --reloc-pkg $in --builddir $out
''').format(**globals()))
for mode in build_modes:
modeval = modes[mode]
@@ -1444,7 +1446,7 @@ with open(buildfile_tmp, 'w') as f:
f.write(textwrap.dedent('''\
cxx_ld_flags_{mode} = {cxx_ld_flags}
ld_flags_{mode} = $cxx_ld_flags_{mode}
cxxflags_{mode} = $cxx_ld_flags_{mode} {cxxflags} -iquote. -iquote $builddir/{mode}/gen
cxxflags_{mode} = $cxx_ld_flags_{mode} {cxxflags} -I. -I $builddir/{mode}/gen
libs_{mode} = -l{fmt_lib}
seastar_libs_{mode} = {seastar_libs}
rule cxx.{mode}
@@ -1501,7 +1503,7 @@ with open(buildfile_tmp, 'w') as f:
)
)
compiles = {}
swaggers = set()
swaggers = {}
serializers = {}
thrifts = set()
ragels = {}
@@ -1523,13 +1525,12 @@ with open(buildfile_tmp, 'w') as f:
objs += dep.objects('$builddir/' + mode + '/gen')
if isinstance(dep, Antlr3Grammar):
objs += dep.objects('$builddir/' + mode + '/gen')
if isinstance(dep, Json2Code):
objs += dep.objects('$builddir/' + mode + '/gen')
if binary.endswith('.a'):
f.write('build $builddir/{}/{}: ar.{} {}\n'.format(mode, binary, mode, str.join(' ', objs)))
else:
objs.extend(['$builddir/' + mode + '/' + artifact for artifact in [
'libdeflate/libdeflate.a',
'zstd/lib/libzstd.a',
] + [
'abseil/' + x for x in abseil_libs
]])
@@ -1564,7 +1565,8 @@ with open(buildfile_tmp, 'w') as f:
hh = '$builddir/' + mode + '/gen/' + src.replace('.idl.hh', '.dist.hh')
serializers[hh] = src
elif src.endswith('.json'):
swaggers.add(src)
hh = '$builddir/' + mode + '/gen/' + src + '.hh'
swaggers[hh] = src
elif src.endswith('.rl'):
hh = '$builddir/' + mode + '/gen/' + src.replace('.rl', '.hh')
ragels[hh] = src
@@ -1606,14 +1608,12 @@ with open(buildfile_tmp, 'w') as f:
)
)
gen_dir = '$builddir/{}/gen'.format(mode)
gen_headers = []
for th in thrifts:
gen_headers += th.headers('$builddir/{}/gen'.format(mode))
for g in antlr3_grammars:
gen_headers += g.headers('$builddir/{}/gen'.format(mode))
for g in swaggers:
gen_headers += g.headers('$builddir/{}/gen'.format(mode))
gen_headers += list(swaggers.keys())
gen_headers += list(serializers.keys())
gen_headers += list(ragels.keys())
gen_headers_dep = ' '.join(gen_headers)
@@ -1623,13 +1623,9 @@ with open(buildfile_tmp, 'w') as f:
f.write('build {}: cxx.{} {} || {} {}\n'.format(obj, mode, src, seastar_dep, gen_headers_dep))
if src in extra_cxxflags:
f.write(' cxxflags = {seastar_cflags} $cxxflags $cxxflags_{mode} {extra_cxxflags}\n'.format(mode=mode, extra_cxxflags=extra_cxxflags[src], **modeval))
for swagger in swaggers:
hh = swagger.headers(gen_dir)[0]
cc = swagger.sources(gen_dir)[0]
obj = swagger.objects(gen_dir)[0]
src = swagger.source
f.write('build {} | {} : swagger {} | {}/scripts/seastar-json2code.py\n'.format(hh, cc, src, args.seastar_path))
f.write('build {}: cxx.{} {}\n'.format(obj, mode, cc))
for hh in swaggers:
src = swaggers[hh]
f.write('build {}: swagger {} | {}/scripts/seastar-json2code.py\n'.format(hh, src, args.seastar_path))
for hh in serializers:
src = serializers[hh]
f.write('build {}: serializer {} | idl-compiler.py\n'.format(hh, src))
@@ -1678,20 +1674,17 @@ with open(buildfile_tmp, 'w') as f:
f.write(textwrap.dedent('''\
build build/{mode}/iotune: copy build/{mode}/seastar/apps/iotune/iotune
''').format(**locals()))
f.write('build build/{mode}/scylla-package.tar.gz: package build/{mode}/scylla build/{mode}/iotune build/SCYLLA-RELEASE-FILE build/SCYLLA-VERSION-FILE build/debian/debian | always\n'.format(**locals()))
f.write('build build/{mode}/scylla-package.tar.gz: package build/{mode}/scylla build/{mode}/iotune build/SCYLLA-RELEASE-FILE build/SCYLLA-VERSION-FILE | always\n'.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(' mode = {mode}\n'.format(**locals()))
f.write(f'build build/dist/{mode}/redhat: rpmbuild build/{mode}/scylla-package.tar.gz\n')
f.write(f' pool = submodule_pool\n')
f.write(f' mode = {mode}\n')
f.write(f'build build/dist/{mode}/debian: debbuild build/{mode}/scylla-package.tar.gz\n')
f.write(f' pool = submodule_pool\n')
f.write(f' mode = {mode}\n')
f.write(f'build dist-server-{mode}: phony build/dist/{mode}/redhat build/dist/{mode}/debian\n')
f.write('rule libdeflate.{mode}\n'.format(**locals()))
f.write(' command = make -C libdeflate BUILD_DIR=../build/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../build/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
f.write('build build/{mode}/libdeflate/libdeflate.a: libdeflate.{mode}\n'.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write('build build/{mode}/zstd/lib/libzstd.a: ninja\n'.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(' subdir = build/{mode}/zstd\n'.format(**locals()))
f.write(' target = libzstd.a\n'.format(**locals()))
for lib in abseil_libs:
f.write('build build/{mode}/abseil/{lib}: ninja\n'.format(**locals()))
@@ -1709,65 +1702,6 @@ with open(buildfile_tmp, 'w') as f:
'build check: phony {}\n'.format(' '.join(['{mode}-check'.format(mode=mode) for mode in modes]))
)
f.write(textwrap.dedent(f'''\
build dist-server-deb: phony {' '.join(['build/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
build dist-server-rpm: phony {' '.join(['build/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
build dist-server: phony dist-server-rpm dist-server-deb
rule build-submodule-reloc
command = cd $reloc_dir && ./reloc/build_reloc.sh
rule build-submodule-rpm
command = cd $dir && ./reloc/build_rpm.sh --reloc-pkg $artifact
rule build-submodule-deb
command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact
build scylla-jmx/build/scylla-jmx-package.tar.gz: build-submodule-reloc
reloc_dir = scylla-jmx
build dist-jmx-rpm: build-submodule-rpm scylla-jmx/build/scylla-jmx-package.tar.gz
dir = scylla-jmx
artifact = build/scylla-jmx-package.tar.gz
build dist-jmx-deb: build-submodule-deb scylla-jmx/build/scylla-jmx-package.tar.gz
dir = scylla-jmx
artifact = build/scylla-jmx-package.tar.gz
build dist-jmx: phony dist-jmx-rpm dist-jmx-deb
build scylla-tools/build/scylla-tools-package.tar.gz: build-submodule-reloc
reloc_dir = scylla-tools
build dist-tools-rpm: build-submodule-rpm scylla-tools/build/scylla-tools-package.tar.gz
dir = scylla-tools
artifact = build/scylla-tools-package.tar.gz
build dist-tools-deb: build-submodule-deb scylla-tools/build/scylla-tools-package.tar.gz
dir = scylla-tools
artifact = build/scylla-tools-package.tar.gz
build dist-tools: phony dist-tools-rpm dist-tools-deb
rule build-python-reloc
command = ./reloc/python3/build_reloc.sh
rule build-python-rpm
command = ./reloc/python3/build_rpm.sh
rule build-python-deb
command = ./reloc/python3/build_deb.sh
build build/release/scylla-python3-package.tar.gz: build-python-reloc
build dist-python-rpm: build-python-rpm build/release/scylla-python3-package.tar.gz
build dist-python-deb: build-python-deb build/release/scylla-python3-package.tar.gz
build dist-python: phony dist-python-rpm dist-python-deb
build dist-deb: phony dist-server-deb dist-python-deb dist-jmx-deb dist-tools-deb
build dist-rpm: phony dist-server-rpm dist-python-rpm dist-jmx-rpm dist-tools-rpm
build dist: phony dist-server dist-python dist-jmx dist-tools
'''))
f.write(textwrap.dedent(f'''\
build dist-check: phony {' '.join(['dist-check-{mode}'.format(mode=mode) for mode in build_modes])}
rule dist-check
command = ./tools/testing/dist-check/dist-check.sh --mode $mode
'''))
for mode in build_modes:
f.write(textwrap.dedent(f'''\
build dist-check-{mode}: dist-check
mode = {mode}
'''))
f.write(textwrap.dedent('''\
rule configure
command = {python} configure.py $configure_args
@@ -1792,9 +1726,6 @@ with open(buildfile_tmp, 'w') as f:
rule scylla_version_gen
command = ./SCYLLA-VERSION-GEN
build build/SCYLLA-RELEASE-FILE build/SCYLLA-VERSION-FILE: scylla_version_gen
rule debian_files_gen
command = ./dist/debian/debian_files_gen.py
build build/debian/debian: debian_files_gen | always
''').format(modes_list=' '.join(build_modes), **globals()))
os.rename(buildfile_tmp, buildfile)

View File

@@ -73,9 +73,7 @@ public:
return counter_id(utils::make_random_uuid());
}
};
static_assert(
std::is_standard_layout_v<counter_id> && std::is_trivial_v<counter_id>,
"counter_id should be a POD type");
static_assert(std::is_pod<counter_id>::value, "counter_id should be a POD type");
std::ostream& operator<<(std::ostream& os, const counter_id& id);
@@ -156,10 +154,10 @@ private:
// Shared logic for applying counter_shards and counter_shard_views.
// T is either counter_shard or basic_counter_shard_view<U>.
template<typename T>
requires requires(T shard) {
{ shard.value() } -> std::same_as<int64_t>;
{ shard.logical_clock() } -> std::same_as<int64_t>;
}
GCC6_CONCEPT(requires requires(T shard) {
{ shard.value() } -> int64_t;
{ shard.logical_clock() } -> int64_t;
})
counter_shard& do_apply(T&& other) noexcept {
auto other_clock = other.logical_clock();
if (_logical_clock < other_clock) {

View File

@@ -106,7 +106,7 @@ using namespace cql3::statements;
using namespace cql3::selection;
using cql3::cql3_type;
using conditions_type = std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,lw_shared_ptr<cql3::column_condition::raw>>>;
using operations_type = std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>, std::unique_ptr<cql3::operation::raw_update>>>;
using operations_type = std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,::shared_ptr<cql3::operation::raw_update>>>;
// ANTLR forces us to define a default-initialized return value
// for every rule (e.g. [returns ut_name name]), but not every type
@@ -255,8 +255,8 @@ struct uninitialized {
return to_lower(s) == "true";
}
void add_raw_update(std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>, std::unique_ptr<cql3::operation::raw_update>>>& operations,
::shared_ptr<cql3::column_identifier::raw> key, std::unique_ptr<cql3::operation::raw_update> update)
void add_raw_update(std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,::shared_ptr<cql3::operation::raw_update>>>& operations,
::shared_ptr<cql3::column_identifier::raw> key, ::shared_ptr<cql3::operation::raw_update> update)
{
for (auto&& p : operations) {
if (*p.first == *key && !p.second->is_compatible_with(update)) {
@@ -532,7 +532,7 @@ updateStatement returns [std::unique_ptr<raw::update_statement> expr]
@init {
bool if_exists = false;
auto attrs = std::make_unique<cql3::attributes::raw>();
std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>, std::unique_ptr<cql3::operation::raw_update>>> operations;
std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>, ::shared_ptr<cql3::operation::raw_update>>> operations;
}
: K_UPDATE cf=columnFamilyName
( usingClause[attrs] )?
@@ -563,7 +563,7 @@ updateConditions returns [conditions_type conditions]
deleteStatement returns [std::unique_ptr<raw::delete_statement> expr]
@init {
auto attrs = std::make_unique<cql3::attributes::raw>();
std::vector<std::unique_ptr<cql3::operation::raw_deletion>> column_deletions;
std::vector<::shared_ptr<cql3::operation::raw_deletion>> column_deletions;
bool if_exists = false;
}
: K_DELETE ( dels=deleteSelection { column_deletions = std::move(dels); } )?
@@ -581,15 +581,15 @@ deleteStatement returns [std::unique_ptr<raw::delete_statement> expr]
}
;
deleteSelection returns [std::vector<std::unique_ptr<cql3::operation::raw_deletion>> operations]
deleteSelection returns [std::vector<::shared_ptr<cql3::operation::raw_deletion>> operations]
: t1=deleteOp { $operations.emplace_back(std::move(t1)); }
(',' tN=deleteOp { $operations.emplace_back(std::move(tN)); })*
;
deleteOp returns [std::unique_ptr<cql3::operation::raw_deletion> op]
: c=cident { $op = std::make_unique<cql3::operation::column_deletion>(std::move(c)); }
| c=cident '[' t=term ']' { $op = std::make_unique<cql3::operation::element_deletion>(std::move(c), std::move(t)); }
| c=cident '.' field=ident { $op = std::make_unique<cql3::operation::field_deletion>(std::move(c), std::move(field)); }
deleteOp returns [::shared_ptr<cql3::operation::raw_deletion> op]
: c=cident { $op = ::make_shared<cql3::operation::column_deletion>(std::move(c)); }
| c=cident '[' t=term ']' { $op = ::make_shared<cql3::operation::element_deletion>(std::move(c), std::move(t)); }
| c=cident '.' field=ident { $op = ::make_shared<cql3::operation::field_deletion>(std::move(c), std::move(field)); }
;
usingClauseDelete[std::unique_ptr<cql3::attributes::raw>& attrs]
@@ -1416,12 +1416,12 @@ normalColumnOperation[operations_type& operations, ::shared_ptr<cql3::column_ide
: t=term ('+' c=cident )?
{
if (!c) {
add_raw_update(operations, key, std::make_unique<cql3::operation::set_value>(t));
add_raw_update(operations, key, ::make_shared<cql3::operation::set_value>(t));
} else {
if (*key != *c) {
add_recognition_error("Only expressions of the form X = <value> + X are supported.");
}
add_raw_update(operations, key, std::make_unique<cql3::operation::prepend>(t));
add_raw_update(operations, key, ::make_shared<cql3::operation::prepend>(t));
}
}
| c=cident sig=('+' | '-') t=term
@@ -1429,11 +1429,11 @@ normalColumnOperation[operations_type& operations, ::shared_ptr<cql3::column_ide
if (*key != *c) {
add_recognition_error("Only expressions of the form X = X " + $sig.text + "<value> are supported.");
}
std::unique_ptr<cql3::operation::raw_update> op;
shared_ptr<cql3::operation::raw_update> op;
if ($sig.text == "+") {
op = std::make_unique<cql3::operation::addition>(t);
op = make_shared<cql3::operation::addition>(t);
} else {
op = std::make_unique<cql3::operation::subtraction>(t);
op = make_shared<cql3::operation::subtraction>(t);
}
add_raw_update(operations, key, std::move(op));
}
@@ -1444,11 +1444,11 @@ normalColumnOperation[operations_type& operations, ::shared_ptr<cql3::column_ide
// We don't yet allow a '+' in front of an integer, but we could in the future really, so let's be future-proof in our error message
add_recognition_error("Only expressions of the form X = X " + sstring($i.text[0] == '-' ? "-" : "+") + " <value> are supported.");
}
add_raw_update(operations, key, std::make_unique<cql3::operation::addition>(cql3::constants::literal::integer($i.text)));
add_raw_update(operations, key, make_shared<cql3::operation::addition>(cql3::constants::literal::integer($i.text)));
}
| K_SCYLLA_COUNTER_SHARD_LIST '(' t=term ')'
{
add_raw_update(operations, key, std::make_unique<cql3::operation::set_counter_value_from_tuple_list>(t));
add_raw_update(operations, key, ::make_shared<cql3::operation::set_counter_value_from_tuple_list>(t));
}
;
@@ -1458,7 +1458,7 @@ collectionColumnOperation[operations_type& operations,
bool by_uuid]
: '=' t=term
{
add_raw_update(operations, key, std::make_unique<cql3::operation::set_element>(k, t, by_uuid));
add_raw_update(operations, key, make_shared<cql3::operation::set_element>(k, t, by_uuid));
}
;
@@ -1467,7 +1467,7 @@ udtColumnOperation[operations_type& operations,
shared_ptr<cql3::column_identifier> field]
: '=' t=term
{
add_raw_update(operations, std::move(key), std::make_unique<cql3::operation::set_field>(std::move(field), std::move(t)));
add_raw_update(operations, std::move(key), make_shared<cql3::operation::set_field>(std::move(field), std::move(t)));
}
;

View File

@@ -87,7 +87,7 @@ abstract_marker::raw::raw(int32_t bind_index)
return ::make_shared<constants::marker>(_bind_index, receiver);
}
assignment_testable::test_result abstract_marker::raw::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
assignment_testable::test_result abstract_marker::raw::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}

View File

@@ -72,7 +72,7 @@ public:
virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
virtual sstring to_string() const override;
};

View File

@@ -70,7 +70,7 @@ public:
// Test all elements of toTest for assignment. If all are exact match, return exact match. If any is not assignable,
// return not assignable. Otherwise, return weakly assignable.
template <typename AssignmentTestablePtrRange>
static test_result test_all(database& db, const sstring& keyspace, const column_specification& receiver,
static test_result test_all(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver,
AssignmentTestablePtrRange&& to_test) {
test_result res = test_result::EXACT_MATCH;
for (auto&& rt : to_test) {
@@ -99,7 +99,7 @@ public:
* Most caller should just call the isAssignable() method on the result, though functions have a use for
* testing "strong" equality to decide the most precise overload to pick when multiple could match.
*/
virtual test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const = 0;
virtual test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const = 0;
// for error reporting
virtual sstring assignment_testable_source_context() const = 0;

View File

@@ -139,6 +139,16 @@ static inline
return def.column_specification->name;
}
static inline
std::vector<::shared_ptr<column_identifier>> to_identifiers(const std::vector<const column_definition*>& defs) {
std::vector<::shared_ptr<column_identifier>> r;
r.reserve(defs.size());
for (auto&& def : defs) {
r.push_back(to_identifier(*def));
}
return r;
}
}
namespace std {

View File

@@ -82,9 +82,9 @@ constants::literal::parsed_value(data_type validator) const
}
assignment_testable::test_result
constants::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const
constants::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
{
auto receiver_type = receiver.type->as_cql3_type();
auto receiver_type = receiver->type->as_cql3_type();
if (receiver_type.is_collection() || receiver_type.is_user_type()) {
return test_result::NOT_ASSIGNABLE;
}
@@ -157,7 +157,7 @@ constants::literal::test_assignment(database& db, const sstring& keyspace, const
::shared_ptr<term>
constants::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
{
if (!is_assignable(test_assignment(db, keyspace, *receiver))) {
if (!is_assignable(test_assignment(db, keyspace, receiver))) {
throw exceptions::invalid_request_exception(format("Invalid {} constant ({}) for \"{}\" of type {}",
_type, _text, *receiver->name, receiver->type->as_cql3_type().to_string()));
}

View File

@@ -88,7 +88,7 @@ public:
public:
static thread_local const ::shared_ptr<terminal> NULL_VALUE;
virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
if (!is_assignable(test_assignment(db, keyspace, *receiver))) {
if (!is_assignable(test_assignment(db, keyspace, receiver))) {
throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
}
return NULL_VALUE;
@@ -96,8 +96,8 @@ public:
virtual assignment_testable::test_result test_assignment(database& db,
const sstring& keyspace,
const column_specification& receiver) const override {
return receiver.type->is_counter()
lw_shared_ptr<column_specification> receiver) const override {
return receiver->type->is_counter()
? assignment_testable::test_result::NOT_ASSIGNABLE
: assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
@@ -161,7 +161,7 @@ public:
return _text;
}
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const;
virtual sstring to_string() const override {
return _type == type::STRING ? sstring(format("'{}'", _text)) : _text;

View File

@@ -95,6 +95,10 @@ public:
return _name.keyspace == ks_name && _name.name == function_name;
}
virtual bool has_reference_to(function& f) const override {
return false;
}
virtual sstring column_name(const std::vector<sstring>& column_names) const override {
return format("{}({})", _name, join(", ", column_names));
}

View File

@@ -144,6 +144,10 @@ public:
return false;
}
virtual bool has_reference_to(function& f) const override {
return false;
}
virtual sstring column_name(const std::vector<sstring>& column_names) const override {
return "[json]";
}

View File

@@ -1,122 +0,0 @@
/*
* Copyright (C) 2019 ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "error_injection_fcts.hh"
#include "utils/error_injection.hh"
#include "types/list.hh"
namespace cql3
{
namespace functions
{
namespace error_injection
{
namespace
{
template <typename Func, bool Pure>
class failure_injection_function_for : public failure_injection_function {
Func _func;
public:
failure_injection_function_for(sstring name,
data_type return_type,
const std::vector<data_type> arg_types,
Func&& func)
: failure_injection_function(std::move(name), std::move(return_type), std::move(arg_types))
, _func(std::forward<Func>(func)) {}
bool is_pure() const override {
return Pure;
}
bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
return _func(sf, parameters);
}
};
template <bool Pure, typename Func>
shared_ptr<function>
make_failure_injection_function(sstring name,
data_type return_type,
std::vector<data_type> args_type,
Func&& func) {
return ::make_shared<failure_injection_function_for<Func, Pure>>(std::move(name),
std::move(return_type),
std::move(args_type),
std::forward<Func>(func));
}
} // anonymous namespace
shared_ptr<function> make_enable_injection_function() {
return make_failure_injection_function<false>("enable_injection", empty_type, { ascii_type, ascii_type },
[] (cql_serialization_format, const std::vector<bytes_opt>& parameters) {
sstring injection_name = ascii_type->get_string(parameters[0].value());
const bool one_shot = ascii_type->get_string(parameters[1].value()) == "true";
smp::invoke_on_all([injection_name, one_shot] () mutable {
utils::get_local_injector().enable(injection_name, one_shot);
}).get0();
return std::nullopt;
});
}
shared_ptr<function> make_disable_injection_function() {
return make_failure_injection_function<false>("disable_injection", empty_type, { ascii_type },
[] (cql_serialization_format, const std::vector<bytes_opt>& parameters) {
sstring injection_name = ascii_type->get_string(parameters[0].value());
smp::invoke_on_all([injection_name] () mutable {
utils::get_local_injector().disable(injection_name);
}).get0();
return std::nullopt;
});
}
shared_ptr<function> make_enabled_injections_function() {
const auto list_type_inst = list_type_impl::get_instance(ascii_type, false);
return make_failure_injection_function<true>("enabled_injections", list_type_inst, {},
[list_type_inst] (cql_serialization_format, const std::vector<bytes_opt>&) -> bytes {
return seastar::map_reduce(smp::all_cpus(), [] (unsigned) {
return make_ready_future<std::vector<sstring>>(utils::get_local_injector().enabled_injections());
}, std::vector<data_value>(),
[](std::vector<data_value> a, std::vector<sstring>&& b) -> std::vector<data_value> {
for (auto&& x : b) {
if (a.end() == std::find(a.begin(), a.end(), x)) {
a.push_back(data_value(std::move(x)));
}
}
return a;
}).then([list_type_inst](std::vector<data_value> const& active_injections) {
auto list_val = make_list_value(list_type_inst, active_injections);
return list_type_inst->decompose(list_val);
}).get0();
});
}
} // namespace error_injection
} // namespace functions
} // namespace cql3

View File

@@ -1,56 +0,0 @@
/*
* Copyright (C) 2019 ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "native_scalar_function.hh"
namespace cql3
{
namespace functions
{
namespace error_injection
{
class failure_injection_function : public native_scalar_function {
protected:
failure_injection_function(sstring name, data_type return_type, std::vector<data_type> args_type)
: native_scalar_function(std::move(name), std::move(return_type), std::move(args_type)) {
}
bool requires_thread() const override {
return true;
}
};
shared_ptr<function> make_enable_injection_function();
shared_ptr<function> make_disable_injection_function();
shared_ptr<function> make_enabled_injections_function();
} // namespace error_injection
} // namespace functions
} // namespace cql3

View File

@@ -82,6 +82,7 @@ public:
virtual void print(std::ostream& os) const = 0;
virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0;
virtual bool has_reference_to(function& f) const = 0;
/**
* Returns the name of the function to use within a ResultSet.

View File

@@ -79,7 +79,7 @@ public:
// All parameters must be terminal
static bytes_opt execute(scalar_function& fun, std::vector<shared_ptr<term>> parameters);
public:
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
virtual sstring to_string() const override;
};
};

View File

@@ -37,8 +37,6 @@
#include "concrete_types.hh"
#include "as_json_function.hh"
#include "error_injection_fcts.hh"
namespace std {
std::ostream& operator<<(std::ostream& os, const std::vector<data_type>& arg_types) {
for (size_t i = 0; i < arg_types.size(); ++i) {
@@ -109,10 +107,6 @@ functions::init() {
declare(make_blob_as_varchar_fct());
add_agg_functions(ret);
declare(error_injection::make_enable_injection_function());
declare(error_injection::make_disable_injection_function());
declare(error_injection::make_enabled_injections_function());
// also needed for smp:
#if 0
MigrationManager.instance.register(new FunctionsMigrationListener());
@@ -158,6 +152,11 @@ functions::make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
fun.arg_types()[i]);
}
int
functions::get_overload_count(const function_name& name) {
return _declared.count(name);
}
inline
shared_ptr<function>
make_to_json_function(data_type t) {
@@ -188,7 +187,7 @@ functions::get(database& db,
const std::vector<shared_ptr<assignment_testable>>& provided_args,
const sstring& receiver_ks,
const sstring& receiver_cf,
const column_specification* receiver) {
lw_shared_ptr<column_specification> receiver) {
static const function_name TOKEN_FUNCTION_NAME = function_name::native_function("token");
static const function_name TO_JSON_FUNCTION_NAME = function_name::native_function("tojson");
@@ -371,7 +370,7 @@ functions::validate_types(database& db,
}
auto&& expected = make_arg_spec(receiver_ks, receiver_cf, *fun, i);
if (!is_assignable(provided->test_assignment(db, keyspace, *expected))) {
if (!is_assignable(provided->test_assignment(db, keyspace, expected))) {
throw exceptions::invalid_request_exception(
format("Type error: {} cannot be passed as argument {:d} of function {} of type {}",
provided, i, fun->name(), expected->type->as_cql3_type()));
@@ -398,7 +397,7 @@ functions::match_arguments(database& db, const sstring& keyspace,
continue;
}
auto&& expected = make_arg_spec(receiver_ks, receiver_cf, *fun, i);
auto arg_res = provided->test_assignment(db, keyspace, *expected);
auto arg_res = provided->test_assignment(db, keyspace, expected);
if (arg_res == assignment_testable::test_result::NOT_ASSIGNABLE) {
return assignment_testable::test_result::NOT_ASSIGNABLE;
}
@@ -515,7 +514,7 @@ function_call::raw::prepare(database& db, const sstring& keyspace, lw_shared_ptr
[] (auto&& x) -> shared_ptr<assignment_testable> {
return x;
});
auto&& fun = functions::functions::get(db, keyspace, _name, args, receiver->ks_name, receiver->cf_name, receiver.get());
auto&& fun = functions::functions::get(db, keyspace, _name, args, receiver->ks_name, receiver->cf_name, receiver);
if (!fun) {
throw exceptions::invalid_request_exception(format("Unknown function {} called", _name));
}
@@ -573,16 +572,16 @@ function_call::raw::execute(scalar_function& fun, std::vector<shared_ptr<term>>
}
assignment_testable::test_result
function_call::raw::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
function_call::raw::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
// Note: Functions.get() will return null if the function doesn't exist, or throw is no function matching
// the arguments can be found. We may get one of those if an undefined/wrong function is used as argument
// of another, existing, function. In that case, we return true here because we'll throw a proper exception
// later with a more helpful error message that if we were to return false here.
try {
auto&& fun = functions::get(db, keyspace, _name, _terms, receiver.ks_name, receiver.cf_name, &receiver);
if (fun && receiver.type == fun->return_type()) {
auto&& fun = functions::get(db, keyspace, _name, _terms, receiver->ks_name, receiver->cf_name, receiver);
if (fun && receiver->type == fun->return_type()) {
return assignment_testable::test_result::EXACT_MATCH;
} else if (!fun || receiver.type->is_value_compatible_with(*fun->return_type())) {
} else if (!fun || receiver->type->is_value_compatible_with(*fun->return_type())) {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
} else {
return assignment_testable::test_result::NOT_ASSIGNABLE;

View File

@@ -69,6 +69,7 @@ private:
public:
static lw_shared_ptr<column_specification> make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
const function& fun, size_t i);
static int get_overload_count(const function_name& name);
public:
static shared_ptr<function> get(database& db,
const sstring& keyspace,
@@ -76,7 +77,7 @@ public:
const std::vector<shared_ptr<assignment_testable>>& provided_args,
const sstring& receiver_ks,
const sstring& receiver_cf,
const column_specification* receiver = nullptr);
lw_shared_ptr<column_specification> receiver = nullptr);
template <typename AssignmentTestablePtrRange>
static shared_ptr<function> get(database& db,
const sstring& keyspace,
@@ -84,7 +85,7 @@ public:
AssignmentTestablePtrRange&& provided_args,
const sstring& receiver_ks,
const sstring& receiver_cf,
const column_specification* receiver = nullptr) {
lw_shared_ptr<column_specification> receiver = nullptr) {
const std::vector<shared_ptr<assignment_testable>> args(std::begin(provided_args), std::end(provided_args));
return get(db, keyspace, name, args, receiver_ks, receiver_cf, receiver);
}

View File

@@ -93,7 +93,7 @@ lists::literal::validate_assignable_to(database& db, const sstring keyspace, con
}
auto&& value_spec = value_spec_of(receiver);
for (auto rt : _elements) {
if (!is_assignable(rt->test_assignment(db, keyspace, *value_spec))) {
if (!is_assignable(rt->test_assignment(db, keyspace, value_spec))) {
throw exceptions::invalid_request_exception(format("Invalid list literal for {}: value {} is not of type {}",
*receiver.name, *rt, value_spec->type->as_cql3_type()));
}
@@ -101,8 +101,8 @@ lists::literal::validate_assignable_to(database& db, const sstring keyspace, con
}
assignment_testable::test_result
lists::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
if (!dynamic_pointer_cast<const list_type_impl>(receiver.type)) {
lists::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
if (!dynamic_pointer_cast<const list_type_impl>(receiver->type)) {
return assignment_testable::test_result::NOT_ASSIGNABLE;
}
@@ -111,11 +111,11 @@ lists::literal::test_assignment(database& db, const sstring& keyspace, const col
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
auto&& value_spec = value_spec_of(receiver);
auto&& value_spec = value_spec_of(*receiver);
std::vector<shared_ptr<assignment_testable>> to_test;
to_test.reserve(_elements.size());
std::copy(_elements.begin(), _elements.end(), std::back_inserter(to_test));
return assignment_testable::test_all(db, keyspace, *value_spec, to_test);
return assignment_testable::test_all(db, keyspace, value_spec, to_test);
}
sstring

View File

@@ -68,7 +68,7 @@ public:
private:
void validate_assignable_to(database& db, const sstring keyspace, const column_specification& receiver) const;
public:
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
virtual sstring to_string() const override;
};

View File

@@ -104,31 +104,31 @@ maps::literal::validate_assignable_to(database& db, const sstring& keyspace, con
auto&& key_spec = maps::key_spec_of(receiver);
auto&& value_spec = maps::value_spec_of(receiver);
for (auto&& entry : entries) {
if (!is_assignable(entry.first->test_assignment(db, keyspace, *key_spec))) {
if (!is_assignable(entry.first->test_assignment(db, keyspace, key_spec))) {
throw exceptions::invalid_request_exception(format("Invalid map literal for {}: key {} is not of type {}", *receiver.name, *entry.first, key_spec->type->as_cql3_type()));
}
if (!is_assignable(entry.second->test_assignment(db, keyspace, *value_spec))) {
if (!is_assignable(entry.second->test_assignment(db, keyspace, value_spec))) {
throw exceptions::invalid_request_exception(format("Invalid map literal for {}: value {} is not of type {}", *receiver.name, *entry.second, value_spec->type->as_cql3_type()));
}
}
}
assignment_testable::test_result
maps::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
if (!dynamic_pointer_cast<const map_type_impl>(receiver.type)) {
maps::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
if (!dynamic_pointer_cast<const map_type_impl>(receiver->type)) {
return assignment_testable::test_result::NOT_ASSIGNABLE;
}
// If there is no elements, we can't say it's an exact match (an empty map if fundamentally polymorphic).
if (entries.empty()) {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
auto key_spec = maps::key_spec_of(receiver);
auto value_spec = maps::value_spec_of(receiver);
auto key_spec = maps::key_spec_of(*receiver);
auto value_spec = maps::value_spec_of(*receiver);
// It's an exact match if all are exact match, but is not assignable as soon as any is non assignable.
auto res = assignment_testable::test_result::EXACT_MATCH;
for (auto entry : entries) {
auto t1 = entry.first->test_assignment(db, keyspace, *key_spec);
auto t2 = entry.second->test_assignment(db, keyspace, *value_spec);
auto t1 = entry.first->test_assignment(db, keyspace, key_spec);
auto t2 = entry.second->test_assignment(db, keyspace, value_spec);
if (t1 == assignment_testable::test_result::NOT_ASSIGNABLE || t2 == assignment_testable::test_result::NOT_ASSIGNABLE)
return assignment_testable::test_result::NOT_ASSIGNABLE;
if (t1 != assignment_testable::test_result::EXACT_MATCH || t2 != assignment_testable::test_result::EXACT_MATCH)

View File

@@ -70,7 +70,7 @@ public:
private:
void validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const;
public:
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
virtual sstring to_string() const override;
};

View File

@@ -87,10 +87,10 @@ operation::set_element::prepare(database& db, const sstring& keyspace, const col
}
bool
operation::set_element::is_compatible_with(const std::unique_ptr<raw_update>& other) const {
operation::set_element::is_compatible_with(shared_ptr<raw_update> other) const {
// TODO: we could check that the other operation is not setting the same element
// too (but since the index/key set may be a bind variables we can't always do it at this point)
return !dynamic_cast<const set_value*>(other.get());
return !dynamic_pointer_cast<set_value>(std::move(other));
}
sstring
@@ -120,13 +120,13 @@ operation::set_field::prepare(database& db, const sstring& keyspace, const colum
}
bool
operation::set_field::is_compatible_with(const std::unique_ptr<raw_update>& other) const {
auto x = dynamic_cast<const set_field*>(other.get());
operation::set_field::is_compatible_with(shared_ptr<raw_update> other) const {
auto x = dynamic_pointer_cast<set_field>(other);
if (x) {
return _field != x->_field;
}
return !dynamic_cast<const set_value*>(other.get());
return !dynamic_pointer_cast<set_value>(std::move(other));
}
const column_identifier::raw&
@@ -185,8 +185,8 @@ operation::addition::prepare(database& db, const sstring& keyspace, const column
}
bool
operation::addition::is_compatible_with(const std::unique_ptr<raw_update>& other) const {
return !dynamic_cast<const set_value*>(other.get());
operation::addition::is_compatible_with(shared_ptr<raw_update> other) const {
return !dynamic_pointer_cast<set_value>(other);
}
sstring
@@ -227,8 +227,8 @@ operation::subtraction::prepare(database& db, const sstring& keyspace, const col
}
bool
operation::subtraction::is_compatible_with(const std::unique_ptr<raw_update>& other) const {
return !dynamic_cast<const set_value*>(other.get());
operation::subtraction::is_compatible_with(shared_ptr<raw_update> other) const {
return !dynamic_pointer_cast<set_value>(other);
}
sstring
@@ -250,8 +250,8 @@ operation::prepend::prepare(database& db, const sstring& keyspace, const column_
}
bool
operation::prepend::is_compatible_with(const std::unique_ptr<raw_update>& other) const {
return !dynamic_cast<const set_value*>(other.get());
operation::prepend::is_compatible_with(shared_ptr<raw_update> other) const {
return !dynamic_pointer_cast<set_value>(other);
}
@@ -356,7 +356,7 @@ operation::set_counter_value_from_tuple_list::prepare(database& db, const sstrin
};
bool
operation::set_value::is_compatible_with(const std::unique_ptr<raw_update>& other) const {
operation::set_value::is_compatible_with(::shared_ptr <raw_update> other) const {
// We don't allow setting multiple time the same column, because 1)
// it's stupid and 2) the result would seem random to the user.
return false;

View File

@@ -168,7 +168,7 @@ public:
* @return whether this operation can be applied alongside the {@code
* other} update (in the same UPDATE statement for the same column).
*/
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const = 0;
virtual bool is_compatible_with(::shared_ptr<raw_update> other) const = 0;
};
/**
@@ -181,7 +181,7 @@ public:
*/
class raw_deletion {
public:
virtual ~raw_deletion() = default;
~raw_deletion() {}
/**
* The name of the column affected by this delete operation.
@@ -218,7 +218,7 @@ public:
virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const override;
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const override;
virtual bool is_compatible_with(shared_ptr<raw_update> other) const override;
};
// Set a single field inside a user-defined type.
@@ -234,7 +234,7 @@ public:
virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const override;
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const override;
virtual bool is_compatible_with(shared_ptr<raw_update> other) const override;
};
// Delete a single field inside a user-defined type.
@@ -263,7 +263,7 @@ public:
virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const override;
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const override;
virtual bool is_compatible_with(shared_ptr<raw_update> other) const override;
};
class subtraction : public raw_update {
@@ -277,7 +277,7 @@ public:
virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const override;
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const override;
virtual bool is_compatible_with(shared_ptr<raw_update> other) const override;
};
class prepend : public raw_update {
@@ -291,7 +291,7 @@ public:
virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const override;
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const override;
virtual bool is_compatible_with(shared_ptr<raw_update> other) const override;
};
class column_deletion;

View File

@@ -65,7 +65,7 @@ public:
}
#endif
virtual bool is_compatible_with(const std::unique_ptr<raw_update>& other) const override;
virtual bool is_compatible_with(::shared_ptr <raw_update> other) const override;
};
class operation::set_counter_value_from_tuple_list : public set_value {

View File

@@ -41,7 +41,7 @@
#pragma once
#include <concepts>
#include <seastar/util/gcc6-concepts.hh>
#include "timestamp.hh"
#include "bytes.hh"
#include "db/consistency_level_type.hh"
@@ -97,11 +97,11 @@ private:
* @param values_ranges a vector of values ranges for each statement in the batch.
*/
template<typename OneMutationDataRange>
requires requires (OneMutationDataRange range) {
GCC6_CONCEPT( requires requires (OneMutationDataRange range) {
std::begin(range);
std::end(range);
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) )
explicit query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges);
public:
@@ -145,11 +145,11 @@ public:
* @param values_ranges a vector of values ranges for each statement in the batch.
*/
template<typename OneMutationDataRange>
requires requires (OneMutationDataRange range) {
GCC6_CONCEPT( requires requires (OneMutationDataRange range) {
std::begin(range);
std::end(range);
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) )
static query_options make_batch_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges) {
return query_options(std::move(o), std::move(values_ranges));
}
@@ -251,11 +251,11 @@ private:
};
template<typename OneMutationDataRange>
requires requires (OneMutationDataRange range) {
GCC6_CONCEPT( requires requires (OneMutationDataRange range) {
std::begin(range);
std::end(range);
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) )
query_options::query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges)
: query_options(std::move(o))
{

View File

@@ -562,6 +562,27 @@ query_processor::prepare(sstring query_string, const service::client_state& clie
}
}
::shared_ptr<cql_transport::messages::result_message::prepared>
query_processor::get_stored_prepared_statement(
const std::string_view& query_string,
const sstring& keyspace,
bool for_thrift) {
using namespace cql_transport::messages;
if (for_thrift) {
return get_stored_prepared_statement_one<result_message::prepared::thrift>(
query_string,
keyspace,
compute_thrift_id,
prepared_cache_key_type::thrift_id);
} else {
return get_stored_prepared_statement_one<result_message::prepared::cql>(
query_string,
keyspace,
compute_id,
prepared_cache_key_type::cql_id);
}
}
static std::string hash_target(std::string_view query_string, std::string_view keyspace) {
std::string ret(keyspace);
ret += query_string;

View File

@@ -414,6 +414,28 @@ private:
});
});
};
template <typename ResultMsgType, typename KeyGenerator, typename IdGetter>
::shared_ptr<cql_transport::messages::result_message::prepared>
get_stored_prepared_statement_one(
const std::string_view& query_string,
const sstring& keyspace,
KeyGenerator&& key_gen,
IdGetter&& id_getter) {
auto cache_key = key_gen(query_string, keyspace);
auto it = _prepared_cache.find(cache_key);
if (it == _prepared_cache.end()) {
return ::shared_ptr<cql_transport::messages::result_message::prepared>();
}
return ::make_shared<ResultMsgType>(id_getter(cache_key), *it);
}
::shared_ptr<cql_transport::messages::result_message::prepared>
get_stored_prepared_statement(
const std::string_view& query_string,
const sstring& keyspace,
bool for_thrift);
};
class query_processor::migration_subscriber : public service::migration_listener {

View File

@@ -50,6 +50,7 @@
#include "result_generator.hh"
#include <seastar/util/gcc6-concepts.hh>
namespace cql3 {
@@ -149,13 +150,17 @@ public:
const std::vector<uint16_t>& partition_key_bind_indices() const;
};
GCC6_CONCEPT(
template<typename Visitor>
concept ResultVisitor = requires(Visitor& visitor) {
concept bool ResultVisitor = requires(Visitor& visitor) {
visitor.start_row();
visitor.accept_value(std::optional<query::result_bytes_view>());
visitor.end_row();
};
)
class result_set {
::shared_ptr<metadata> _metadata;
std::deque<std::vector<bytes_opt>> _rows;
@@ -194,7 +199,7 @@ public:
const std::deque<std::vector<bytes_opt>>& rows() const;
template<typename Visitor>
requires ResultVisitor<Visitor>
GCC6_CONCEPT(requires ResultVisitor<Visitor>)
void visit(Visitor&& visitor) const {
auto column_count = get_metadata().column_count();
for (auto& row : _rows) {
@@ -259,7 +264,7 @@ public:
}
template<typename Visitor>
requires ResultVisitor<Visitor>
GCC6_CONCEPT(requires ResultVisitor<Visitor>)
void visit(Visitor&& visitor) const {
if (_result_set) {
_result_set->visit(std::forward<Visitor>(visitor));

View File

@@ -107,8 +107,8 @@ public:
*/
virtual void reset() = 0;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override {
auto t1 = receiver.type->underlying_type();
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
auto t1 = receiver->type->underlying_type();
auto t2 = get_type()->underlying_type();
// We want columns of `counter_type' to be served by underlying type's overloads
// (here: `counter_cell_view::total_value_type()') with an `EXACT_MATCH'.

View File

@@ -98,17 +98,17 @@ sets::literal::validate_assignable_to(database& db, const sstring& keyspace, con
auto&& value_spec = value_spec_of(receiver);
for (shared_ptr<term::raw> rt : _elements) {
if (!is_assignable(rt->test_assignment(db, keyspace, *value_spec))) {
if (!is_assignable(rt->test_assignment(db, keyspace, value_spec))) {
throw exceptions::invalid_request_exception(format("Invalid set literal for {}: value {} is not of type {}", *receiver.name, *rt, value_spec->type->as_cql3_type()));
}
}
}
assignment_testable::test_result
sets::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
if (!dynamic_pointer_cast<const set_type_impl>(receiver.type)) {
sets::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
if (!dynamic_pointer_cast<const set_type_impl>(receiver->type)) {
// We've parsed empty maps as a set literal to break the ambiguity so handle that case now
if (dynamic_pointer_cast<const map_type_impl>(receiver.type) && _elements.empty()) {
if (dynamic_pointer_cast<const map_type_impl>(receiver->type) && _elements.empty()) {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
@@ -120,10 +120,10 @@ sets::literal::test_assignment(database& db, const sstring& keyspace, const colu
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
auto&& value_spec = value_spec_of(receiver);
auto&& value_spec = value_spec_of(*receiver);
// FIXME: make assignment_testable::test_all() accept ranges
std::vector<shared_ptr<assignment_testable>> to_test(_elements.begin(), _elements.end());
return assignment_testable::test_all(db, keyspace, *value_spec, to_test);
return assignment_testable::test_all(db, keyspace, value_spec, to_test);
}
sstring

View File

@@ -67,7 +67,7 @@ public:
virtual shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
void validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const;
assignment_testable::test_result
test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const;
test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const;
virtual sstring to_string() const override;
};

View File

@@ -108,6 +108,10 @@ public:
return _entity;
}
::shared_ptr<term::raw> get_map_key() {
return _map_key;
}
::shared_ptr<term::raw> get_value() {
return _value;
}

View File

@@ -294,12 +294,6 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View");
}
const auto& ks = db.find_keyspace(keyspace());
auto replication_type = ks.get_replication_strategy().get_type();
if (is_local_only && replication_type != locator::replication_strategy_type::local) {
throw std::logic_error(format("Internal queries should not try to alter table schema for non-local tables, because it leads to inconsistencies: {}.{}",
s->ks_name(), s->cf_name()));
}
auto cfm = schema_builder(s);
if (_properties->get_id()) {

View File

@@ -161,7 +161,7 @@ void batch_statement::validate()
|| (boost::distance(_statements
| boost::adaptors::transformed([] (auto&& s) { return s.statement->column_family(); })
| boost::adaptors::uniqued) != 1))) {
throw exceptions::invalid_request_exception("BATCH with conditions cannot span multiple tables");
throw exceptions::invalid_request_exception("Batch with conditions cannot span multiple tables");
}
std::optional<bool> raw_counter;
for (auto& s : _statements) {

View File

@@ -146,10 +146,6 @@ void cf_prop_defs::validate(const database& db, const schema::extensions_map& sc
cp.validate();
}
if (auto caching_options = get_caching_options(); caching_options && !caching_options->enabled() && !db.features().cluster_supports_per_table_caching()) {
throw exceptions::configuration_exception(KW_CACHING + " can't contain \"'enabled':false\" unless whole cluster supports it");
}
auto cdc_options = get_cdc_options(schema_extensions);
if (cdc_options && cdc_options->enabled() && !db.features().cluster_supports_cdc()) {
throw exceptions::configuration_exception("CDC not supported by the cluster");
@@ -204,21 +200,6 @@ std::optional<utils::UUID> cf_prop_defs::get_id() const {
return std::nullopt;
}
std::optional<caching_options> cf_prop_defs::get_caching_options() const {
auto value = get(KW_CACHING);
if (!value) {
return {};
}
return std::visit(make_visitor(
[] (const property_definitions::map_type& map) {
return map.empty() ? std::nullopt : std::optional<caching_options>(caching_options::from_map(map));
},
[] (const sstring& str) {
return std::optional<caching_options>(caching_options::from_sstring(str));
}
), *value);
}
const cdc::options* cf_prop_defs::get_cdc_options(const schema::extensions_map& schema_exts) const {
auto it = schema_exts.find(cdc::cdc_extension::NAME);
if (it == schema_exts.end()) {
@@ -305,10 +286,11 @@ void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_
builder.set_compressor_params(compression_parameters(*compression_options));
}
auto caching_options = get_caching_options();
if (caching_options) {
builder.set_caching_options(std::move(*caching_options));
}
#if 0
CachingOptions cachingOptions = getCachingOptions();
if (cachingOptions != null)
cfm.caching(cachingOptions);
#endif
// for extensions that are not altered, keep the old ones
auto& old_exts = builder.get_extensions();

View File

@@ -95,7 +95,6 @@ public:
std::map<sstring, sstring> get_compaction_options() const;
std::optional<std::map<sstring, sstring>> get_compression_options() const;
const cdc::options* get_cdc_options(const schema::extensions_map&) const;
std::optional<caching_options> get_caching_options() const;
#if 0
public CachingOptions getCachingOptions() throws SyntaxException, ConfigurationException
{

View File

@@ -122,7 +122,7 @@ delete_statement::prepare_internal(database& db, schema_ptr schema, variable_spe
delete_statement::delete_statement(::shared_ptr<cf_name> name,
std::unique_ptr<attributes::raw> attrs,
std::vector<std::unique_ptr<operation::raw_deletion>> deletions,
std::vector<::shared_ptr<operation::raw_deletion>> deletions,
std::vector<::shared_ptr<relation>> where_clause,
conditions_vector conditions,
bool if_exists)

View File

@@ -90,7 +90,7 @@ cql3::statements::list_users_statement::execute(service::storage_proxy& proxy, s
return do_for_each(sorted_roles, [&as, &results](const sstring& role) {
return when_all_succeed(
as.has_superuser(role),
as.underlying_role_manager().can_login(role)).then_unpack([&results, &role](bool super, bool login) {
as.underlying_role_manager().can_login(role)).then([&results, &role](bool super, bool login) {
if (login) {
results->add_column_value(utf8_type->decompose(role));
results->add_column_value(boolean_type->decompose(super));

View File

@@ -51,6 +51,7 @@
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/indirected.hpp>
#include "db/config.hh"
#include "service/storage_service.hh"
#include "transport/messages/result_message.hh"
#include "database.hh"
#include <seastar/core/execution_stage.hh>
@@ -265,7 +266,7 @@ dht::partition_range_vector
modification_statement::build_partition_keys(const query_options& options, const json_cache_opt& json_cache) const {
auto keys = _restrictions->get_partition_key_restrictions()->bounds_ranges(options);
for (auto const& k : keys) {
validation::validate_cql_key(*s, *k.start()->value().key());
validation::validate_cql_key(s, *k.start()->value().key());
}
return keys;
}

View File

@@ -109,13 +109,6 @@ bool property_definitions::has_property(const sstring& name) const {
return _properties.find(name) != _properties.end();
}
std::optional<property_definitions::value_type> property_definitions::get(const sstring& name) const {
if (auto it = _properties.find(name); it != _properties.end()) {
return it->second;
}
return std::nullopt;
}
sstring property_definitions::get_string(sstring key, sstring default_value) const {
auto value = get_simple(key);
if (value) {

View File

@@ -86,8 +86,6 @@ protected:
public:
bool has_property(const sstring& name) const;
std::optional<value_type> get(const sstring& name) const;
sstring get_string(sstring key, sstring default_value) const;
// Return a property value, typed as a Boolean

View File

@@ -55,12 +55,12 @@ namespace raw {
class delete_statement : public modification_statement {
private:
std::vector<std::unique_ptr<operation::raw_deletion>> _deletions;
std::vector<::shared_ptr<operation::raw_deletion>> _deletions;
std::vector<::shared_ptr<relation>> _where_clause;
public:
delete_statement(::shared_ptr<cf_name> name,
std::unique_ptr<attributes::raw> attrs,
std::vector<std::unique_ptr<operation::raw_deletion>> deletions,
std::vector<::shared_ptr<operation::raw_deletion>> deletions,
std::vector<::shared_ptr<relation>> where_clause,
conditions_vector conditions,
bool if_exists);

View File

@@ -62,7 +62,7 @@ namespace raw {
class update_statement : public raw::modification_statement {
private:
// Provided for an UPDATE
std::vector<std::pair<::shared_ptr<column_identifier::raw>, std::unique_ptr<operation::raw_update>>> _updates;
std::vector<std::pair<::shared_ptr<column_identifier::raw>, ::shared_ptr<operation::raw_update>>> _updates;
std::vector<relation_ptr> _where_clause;
public:
/**
@@ -76,7 +76,7 @@ public:
*/
update_statement(::shared_ptr<cf_name> name,
std::unique_ptr<attributes::raw> attrs,
std::vector<std::pair<::shared_ptr<column_identifier::raw>, std::unique_ptr<operation::raw_update>>> updates,
std::vector<std::pair<::shared_ptr<column_identifier::raw>, ::shared_ptr<operation::raw_update>>> updates,
std::vector<relation_ptr> where_clause,
conditions_vector conditions, bool if_exists);
protected:

View File

@@ -375,7 +375,7 @@ list_roles_statement::execute(service::storage_proxy&, service::query_state& sta
return when_all_succeed(
rm.can_login(role),
rm.is_superuser(role),
a.query_custom_options(role)).then_unpack([&results, &role](
a.query_custom_options(role)).then([&results, &role](
bool login,
bool super,
auth::custom_options os) {

View File

@@ -59,7 +59,6 @@
#include "db/timeout_clock.hh"
#include "db/consistency_level_validations.hh"
#include "database.hh"
#include "test/lib/select_statement_utils.hh"
#include <boost/algorithm/cxx11/any_of.hpp>
bool is_system_keyspace(const sstring& name);
@@ -68,8 +67,6 @@ namespace cql3 {
namespace statements {
static constexpr int DEFAULT_INTERNAL_PAGING_SIZE = select_statement::DEFAULT_COUNT_PAGE_SIZE;
thread_local int internal_paging_size = DEFAULT_INTERNAL_PAGING_SIZE;
thread_local const lw_shared_ptr<const select_statement::parameters> select_statement::_default_parameters = make_lw_shared<select_statement::parameters>();
select_statement::parameters::parameters()
@@ -336,7 +333,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
const bool aggregate = _selection->is_aggregate() || has_group_by();
const bool nonpaged_filtering = restrictions_need_filtering && page_size <= 0;
if (aggregate || nonpaged_filtering) {
page_size = internal_paging_size;
page_size = DEFAULT_COUNT_PAGE_SIZE;
}
auto key_ranges = _restrictions->get_partition_key_ranges(options);
@@ -363,7 +360,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
command->slice.options.set<query::partition_slice::option::allow_short_read>();
auto timeout_duration = options.get_timeout_config().*get_timeout_config_selector();
auto p = service::pager::query_pagers::pager(_schema, _selection,
state, options, command, std::move(key_ranges), restrictions_need_filtering ? _restrictions : nullptr);
state, options, command, std::move(key_ranges), _stats, restrictions_need_filtering ? _restrictions : nullptr);
if (aggregate || nonpaged_filtering) {
return do_with(
@@ -375,11 +372,10 @@ select_statement::do_execute(service::storage_proxy& proxy,
auto timeout = db::timeout_clock::now() + timeout_duration;
return p->fetch_page(builder, page_size, now, timeout);
}
).then([this, p, &builder, restrictions_need_filtering] {
return builder.with_thread_if_needed([this, p, &builder, restrictions_need_filtering] {
).then([this, &builder, restrictions_need_filtering] {
return builder.with_thread_if_needed([this, &builder, restrictions_need_filtering] {
auto rs = builder.build();
if (restrictions_need_filtering) {
_stats.filtered_rows_read_total += p->stats().rows_read_total;
_stats.filtered_rows_matched_total += rs->size();
}
update_stats_rows_read(rs->size());
@@ -423,7 +419,6 @@ select_statement::do_execute(service::storage_proxy& proxy,
}
if (restrictions_need_filtering) {
_stats.filtered_rows_read_total += p->stats().rows_read_total;
_stats.filtered_rows_matched_total += rs->size();
}
update_stats_rows_read(rs->size());
@@ -433,7 +428,9 @@ select_statement::do_execute(service::storage_proxy& proxy,
}
template<typename KeyType>
requires (std::is_same_v<KeyType, partition_key> || std::is_same_v<KeyType, clustering_key_prefix>)
GCC6_CONCEPT(
requires (std::is_same_v<KeyType, partition_key> || std::is_same_v<KeyType, clustering_key_prefix>)
)
static KeyType
generate_base_key_from_index_pk(const partition_key& index_pk, const std::optional<clustering_key>& index_ck, const schema& base_schema, const schema& view_schema) {
const auto& base_columns = std::is_same_v<KeyType, partition_key> ? base_schema.partition_key_columns() : base_schema.clustering_key_columns();
@@ -533,29 +530,13 @@ indexed_table_select_statement::do_execute_base_query(
if (old_paging_state && concurrency == 1) {
auto base_pk = generate_base_key_from_index_pk<partition_key>(old_paging_state->get_partition_key(),
old_paging_state->get_clustering_key(), *_schema, *_view_schema);
auto row_ranges = command->slice.default_row_ranges();
if (old_paging_state->get_clustering_key() && _schema->clustering_key_size() > 0) {
auto base_ck = generate_base_key_from_index_pk<clustering_key>(old_paging_state->get_partition_key(),
old_paging_state->get_clustering_key(), *_schema, *_view_schema);
query::trim_clustering_row_ranges_to(*_schema, row_ranges, base_ck, false);
command->slice.set_range(*_schema, base_pk, row_ranges);
command->slice.set_range(*_schema, base_pk,
std::vector<query::clustering_range>{query::clustering_range::make_starting_with(range_bound<clustering_key>(base_ck, false))});
} else {
// There is no clustering key in old_paging_state and/or no clustering key in
// _schema, therefore read an entire partition (whole clustering range).
//
// The only exception to applying no restrictions on clustering key
// is a case when we have a secondary index on the first column
// of clustering key. In such a case we should not read the
// entire clustering range - only a range in which first column
// of clustering key has the correct value.
//
// This means that we should not set a open_ended_both_sides
// clustering range on base_pk, instead intersect it with
// _row_ranges (which contains the restrictions neccessary for the
// case described above). The result of such intersection is just
// _row_ranges, which we explicity set on base_pk.
command->slice.set_range(*_schema, base_pk, row_ranges);
command->slice.set_range(*_schema, base_pk, std::vector<query::clustering_range>{query::clustering_range::make_open_ended_both_sides()});
}
}
concurrency *= 2;
@@ -863,7 +844,9 @@ indexed_table_select_statement::indexed_table_select_statement(schema_ptr schema
}
template<typename KeyType>
requires (std::is_same_v<KeyType, partition_key> || std::is_same_v<KeyType, clustering_key_prefix>)
GCC6_CONCEPT(
requires (std::is_same_v<KeyType, partition_key> || std::is_same_v<KeyType, clustering_key_prefix>)
)
static void append_base_key_to_index_ck(std::vector<bytes_view>& exploded_index_ck, const KeyType& base_key, const column_definition& index_cdef) {
auto key_view = base_key.view();
auto begin = key_view.begin();
@@ -993,41 +976,36 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
const bool aggregate = _selection->is_aggregate() || has_group_by();
if (aggregate) {
const bool restrictions_need_filtering = _restrictions->need_filtering();
return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format(), *_group_by_cell_indices), std::make_unique<cql3::query_options>(cql3::query_options(options)),
return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format()), std::make_unique<cql3::query_options>(cql3::query_options(options)),
[this, &options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] (cql3::selection::result_set_builder& builder, std::unique_ptr<cql3::query_options>& internal_options) {
// page size is set to the internal count page size, regardless of the user-provided value
internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), internal_paging_size));
internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), DEFAULT_COUNT_PAGE_SIZE));
return repeat([this, &builder, &options, &internal_options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] () {
auto consume_results = [this, &builder, &options, &internal_options, &proxy, &state, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd, lw_shared_ptr<const service::pager::paging_state> paging_state) {
if (paging_state) {
paging_state = generate_view_paging_state_from_base_query_results(paging_state, results, proxy, state, options);
}
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
auto consume_results = [this, &builder, &options, &internal_options, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
if (restrictions_need_filtering) {
_stats.filtered_rows_read_total += *results->row_count();
query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection,
cql3::selection::result_set_builder::restrictions_filter(_restrictions, options, cmd->row_limit, _schema, cmd->slice.partition_row_limit())));
} else {
query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
}
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
return stop_iteration(!has_more_pages);
};
if (whole_partitions || partition_slices) {
return find_index_partition_ranges(proxy, state, *internal_options).then(
[this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (dht::partition_range_vector partition_ranges, lw_shared_ptr<const service::pager::paging_state> paging_state) {
return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, paging_state)
.then([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
return stop_iteration(!has_more_pages);
});
});
} else {
return find_index_clustering_rows(proxy, state, *internal_options).then(
[this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (std::vector<primary_key> primary_keys, lw_shared_ptr<const service::pager::paging_state> paging_state) {
return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, paging_state)
.then([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
return stop_iteration(!has_more_pages);
});
});
}
@@ -1194,7 +1172,7 @@ indexed_table_select_statement::read_posting_list(service::storage_proxy& proxy,
}
auto p = service::pager::query_pagers::pager(_view_schema, selection,
state, options, cmd, std::move(partition_ranges), nullptr);
state, options, cmd, std::move(partition_ranges), _stats, nullptr);
return p->fetch_page(options.get_page_size(), now, timeout).then([p, &options, limit, now] (std::unique_ptr<cql3::result_set> rs) {
rs->get_metadata().set_paging_state(p->state());
return ::make_shared<cql_transport::messages::result_message::rows>(result(std::move(rs)));
@@ -1684,16 +1662,6 @@ std::vector<size_t> select_statement::prepare_group_by(const schema& schema, sel
}
future<> set_internal_paging_size(int paging_size) {
return seastar::smp::invoke_on_all([paging_size] {
internal_paging_size = paging_size;
});
}
future<> reset_internal_paging_size() {
return set_internal_paging_size(DEFAULT_INTERNAL_PAGING_SIZE);
}
}
namespace util {

View File

@@ -379,7 +379,7 @@ insert_json_statement::prepare_internal(database& db, schema_ptr schema,
update_statement::update_statement(::shared_ptr<cf_name> name,
std::unique_ptr<attributes::raw> attrs,
std::vector<std::pair<::shared_ptr<column_identifier::raw>, std::unique_ptr<operation::raw_update>>> updates,
std::vector<std::pair<::shared_ptr<column_identifier::raw>, ::shared_ptr<operation::raw_update>>> updates,
std::vector<relation_ptr> where_clause,
conditions_vector conditions, bool if_exists)
: raw::modification_statement(std::move(name), std::move(attrs), std::move(conditions), false, if_exists)

View File

@@ -82,15 +82,15 @@ public:
auto&& value = _elements[i];
auto&& spec = component_spec_of(receiver, i);
if (!assignment_testable::is_assignable(value->test_assignment(db, keyspace, *spec))) {
if (!assignment_testable::is_assignable(value->test_assignment(db, keyspace, spec))) {
throw exceptions::invalid_request_exception(format("Invalid tuple literal for {}: component {:d} is not of type {}", receiver.name, i, spec->type->as_cql3_type()));
}
}
}
public:
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override {
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
try {
validate_assignable_to(db, keyspace, receiver);
validate_assignable_to(db, keyspace, *receiver);
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
} catch (exceptions::invalid_request_exception& e) {
return assignment_testable::test_result::NOT_ASSIGNABLE;

View File

@@ -53,10 +53,10 @@ public:
}
virtual shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
if (!is_assignable(_term->test_assignment(db, keyspace, *casted_spec_of(db, keyspace, *receiver)))) {
if (!is_assignable(_term->test_assignment(db, keyspace, casted_spec_of(db, keyspace, *receiver)))) {
throw exceptions::invalid_request_exception(format("Cannot cast value {} to type {}", _term, _type));
}
if (!is_assignable(test_assignment(db, keyspace, *receiver))) {
if (!is_assignable(test_assignment(db, keyspace, receiver))) {
throw exceptions::invalid_request_exception(format("Cannot assign value {} to {} of type {}", *this, receiver->name, receiver->type->as_cql3_type()));
}
return _term->prepare(db, keyspace, receiver);
@@ -67,12 +67,12 @@ private:
::make_shared<column_identifier>(to_string(), true), _type->prepare(db, keyspace).get_type());
}
public:
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override {
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
try {
auto&& casted_type = _type->prepare(db, keyspace).get_type();
if (receiver.type == casted_type) {
if (receiver->type == casted_type) {
return assignment_testable::test_result::EXACT_MATCH;
} else if (receiver.type->is_value_compatible_with(*casted_type)) {
} else if (receiver->type->is_value_compatible_with(*casted_type)) {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
} else {
return assignment_testable::test_result::NOT_ASSIGNABLE;

View File

@@ -47,14 +47,14 @@
#include "result_set.hh"
#include "transport/messages/result_message.hh"
cql3::untyped_result_set_row::untyped_result_set_row(const map_t& data)
cql3::untyped_result_set_row::untyped_result_set_row(const std::unordered_map<sstring, bytes_opt>& data)
: _data(data)
{}
cql3::untyped_result_set_row::untyped_result_set_row(const std::vector<lw_shared_ptr<column_specification>>& columns, std::vector<bytes_opt> data)
: _columns(columns)
, _data([&columns, data = std::move(data)] () mutable {
map_t tmp;
std::unordered_map<sstring, bytes_opt> tmp;
std::transform(columns.begin(), columns.end(), data.begin(), std::inserter(tmp, tmp.end()), [](lw_shared_ptr<column_specification> c, bytes_opt& d) {
return std::make_pair<sstring, bytes_opt>(c->name->to_string(), std::move(d));
});
@@ -62,7 +62,7 @@ cql3::untyped_result_set_row::untyped_result_set_row(const std::vector<lw_shared
}())
{}
bool cql3::untyped_result_set_row::has(std::string_view name) const {
bool cql3::untyped_result_set_row::has(const sstring& name) const {
auto i = _data.find(name);
return i != _data.end() && i->second;
}

View File

@@ -47,8 +47,6 @@
#include "types/list.hh"
#include "types/set.hh"
#include "transport/messages/result_message_base.hh"
#include "column_specification.hh"
#include "absl-flat_hash_map.hh"
#pragma once
@@ -57,27 +55,26 @@ namespace cql3 {
class untyped_result_set_row {
private:
const std::vector<lw_shared_ptr<column_specification>> _columns;
using map_t = flat_hash_map<sstring, bytes_opt>;
const map_t _data;
const std::unordered_map<sstring, bytes_opt> _data;
public:
untyped_result_set_row(const map_t&);
untyped_result_set_row(const std::unordered_map<sstring, bytes_opt>&);
untyped_result_set_row(const std::vector<lw_shared_ptr<column_specification>>&, std::vector<bytes_opt>);
untyped_result_set_row(untyped_result_set_row&&) = default;
untyped_result_set_row(const untyped_result_set_row&) = delete;
bool has(std::string_view) const;
bytes_view get_view(std::string_view name) const {
bool has(const sstring&) const;
bytes_view get_view(const sstring& name) const {
return *_data.at(name);
}
bytes get_blob(std::string_view name) const {
bytes get_blob(const sstring& name) const {
return bytes(get_view(name));
}
template<typename T>
T get_as(std::string_view name) const {
T get_as(const sstring& name) const {
return value_cast<T>(data_type_for<T>()->deserialize(get_view(name)));
}
template<typename T>
std::optional<T> get_opt(std::string_view name) const {
std::optional<T> get_opt(const sstring& name) const {
return has(name) ? get_as<T>(name) : std::optional<T>{};
}
bytes_view_opt get_view_opt(const sstring& name) const {
@@ -87,13 +84,13 @@ public:
return std::nullopt;
}
template<typename T>
T get_or(std::string_view name, T t) const {
T get_or(const sstring& name, T t) const {
return has(name) ? get_as<T>(name) : t;
}
// this could maybe be done as an overload of get_as (or something), but that just
// muddles things for no real gain. Let user (us) attempt to know what he is doing instead.
template<typename K, typename V, typename Iter>
void get_map_data(std::string_view name, Iter out, data_type keytype =
void get_map_data(const sstring& name, Iter out, data_type keytype =
data_type_for<K>(), data_type valtype =
data_type_for<V>()) const {
auto vec =
@@ -106,7 +103,7 @@ public:
});
}
template<typename K, typename V, typename ... Rest>
std::unordered_map<K, V, Rest...> get_map(std::string_view name,
std::unordered_map<K, V, Rest...> get_map(const sstring& name,
data_type keytype = data_type_for<K>(), data_type valtype =
data_type_for<V>()) const {
std::unordered_map<K, V, Rest...> res;
@@ -114,7 +111,7 @@ public:
return res;
}
template<typename V, typename Iter>
void get_list_data(std::string_view name, Iter out, data_type valtype = data_type_for<V>()) const {
void get_list_data(const sstring& name, Iter out, data_type valtype = data_type_for<V>()) const {
auto vec =
value_cast<list_type_impl::native_type>(
list_type_impl::get_instance(valtype, false)->deserialize(
@@ -122,13 +119,13 @@ public:
std::transform(vec.begin(), vec.end(), out, [](auto& v) { return value_cast<V>(v); });
}
template<typename V, typename ... Rest>
std::vector<V, Rest...> get_list(std::string_view name, data_type valtype = data_type_for<V>()) const {
std::vector<V, Rest...> get_list(const sstring& name, data_type valtype = data_type_for<V>()) const {
std::vector<V, Rest...> res;
get_list_data<V>(name, std::back_inserter(res), valtype);
return res;
}
template<typename V, typename Iter>
void get_set_data(std::string_view name, Iter out, data_type valtype =
void get_set_data(const sstring& name, Iter out, data_type valtype =
data_type_for<V>()) const {
auto vec =
value_cast<set_type_impl::native_type>(
@@ -140,7 +137,7 @@ public:
});
}
template<typename V, typename ... Rest>
std::unordered_set<V, Rest...> get_set(std::string_view name,
std::unordered_set<V, Rest...> get_set(const sstring& name,
data_type valtype =
data_type_for<V>()) const {
std::unordered_set<V, Rest...> res;

View File

@@ -122,15 +122,15 @@ void user_types::literal::validate_assignable_to(database& db, const sstring& ke
}
const shared_ptr<term::raw>& value = _entries.at(field);
auto&& field_spec = field_spec_of(receiver, i);
if (!assignment_testable::is_assignable(value->test_assignment(db, keyspace, *field_spec))) {
if (!assignment_testable::is_assignable(value->test_assignment(db, keyspace, field_spec))) {
throw exceptions::invalid_request_exception(format("Invalid user type literal for {}: field {} is not of type {}", receiver.name, field, field_spec->type->as_cql3_type()));
}
}
}
assignment_testable::test_result user_types::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
assignment_testable::test_result user_types::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
try {
validate_assignable_to(db, keyspace, receiver);
validate_assignable_to(db, keyspace, *receiver);
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
} catch (exceptions::invalid_request_exception& e) {
return assignment_testable::test_result::NOT_ASSIGNABLE;

View File

@@ -67,7 +67,7 @@ public:
private:
void validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const;
public:
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const override;
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
virtual sstring assignment_testable_source_context() const override;
virtual sstring to_string() const override;
};

View File

@@ -292,10 +292,12 @@ public:
/// \arg data needs to remain valid as long as the writer is in use.
/// \returns imr::WriterAllocator for cell::structure.
template<typename FragmentRange, typename = std::enable_if_t<is_fragment_range_v<std::decay_t<FragmentRange>>>>
requires std::is_nothrow_move_constructible_v<std::decay_t<FragmentRange>> &&
std::is_nothrow_copy_constructible_v<std::decay_t<FragmentRange>> &&
std::is_nothrow_copy_assignable_v<std::decay_t<FragmentRange>> &&
std::is_nothrow_move_assignable_v<std::decay_t<FragmentRange>>
GCC6_CONCEPT(
requires std::is_nothrow_move_constructible_v<std::decay_t<FragmentRange>> &&
std::is_nothrow_copy_constructible_v<std::decay_t<FragmentRange>> &&
std::is_nothrow_copy_assignable_v<std::decay_t<FragmentRange>> &&
std::is_nothrow_move_assignable_v<std::decay_t<FragmentRange>>
)
static auto make_collection(FragmentRange data) noexcept {
return [data = std::move(data)] (auto&& serializer, auto&& allocations) noexcept {
return serializer

View File

@@ -86,10 +86,12 @@ public:
{ }
template<typename Serializer, typename Allocator>
requires (imr::is_sizer_for_v<cell::variable_value::structure, Serializer>
&& std::is_same_v<Allocator, imr::alloc::object_allocator::sizer>)
|| (imr::is_serializer_for_v<cell::variable_value::structure, Serializer>
&& std::is_same_v<Allocator, imr::alloc::object_allocator::serializer>)
GCC6_CONCEPT(
requires (imr::is_sizer_for_v<cell::variable_value::structure, Serializer>
&& std::is_same_v<Allocator, imr::alloc::object_allocator::sizer>)
|| (imr::is_serializer_for_v<cell::variable_value::structure, Serializer>
&& std::is_same_v<Allocator, imr::alloc::object_allocator::serializer>)
)
auto operator()(Serializer serializer, Allocator allocations) {
auto after_size = serializer.serialize(_value_size);
if (_force_internal || _value_size <= cell::maximum_internal_storage_length) {
@@ -132,14 +134,14 @@ public:
inline value_writer<empty_fragment_range> cell::variable_value::write(size_t value_size, bool force_internal) noexcept
{
static_assert(imr::WriterAllocator<value_writer<empty_fragment_range>, structure>);
GCC6_CONCEPT(static_assert(imr::WriterAllocator<value_writer<empty_fragment_range>, structure>));
return value_writer<empty_fragment_range>(empty_fragment_range(), value_size, force_internal);
}
template<typename FragmentRange>
inline value_writer<std::decay_t<FragmentRange>> cell::variable_value::write(FragmentRange&& value, bool force_internal) noexcept
{
static_assert(imr::WriterAllocator<value_writer<std::decay_t<FragmentRange>>, structure>);
GCC6_CONCEPT(static_assert(imr::WriterAllocator<value_writer<std::decay_t<FragmentRange>>, structure>));
return value_writer<std::decay_t<FragmentRange>>(std::forward<FragmentRange>(value), value.size_bytes(), force_internal);
}

Some files were not shown because too many files have changed in this diff Show More