alternator: add mandatory configurable write isolation mode
Alternator supports four ways in which write operations can use quorum writes or LWT or both, which we called "write isolation policies". Until this patch, Alternator defaulted to the most generally safe policy, "always_use_lwt". This default could have been overriden for each table separately, but there was no way to change this default for all tables. This patch adds a "--alternator-write-isolation" configuration option which allows changing the default. Moreover, @dorlaor asked that users must *explicitly* choose this default mode, and not get "always_use_lwt" without noticing. The previous default, "always_use_lwt" supports any workload correctly but because it uses LWT for all writes it may be disappointingly slow for users who run write-only workloads (including most benchmarks) - such users might find the slow writes so disappointing that they will drop Scylla. Conversely, a default of "forbid_rmw" will be faster and still correct, but will fail on workloads which need read-modify-write operations - and suprise users that need these operations. So Dor asked that that *none* of the write modes be made the default, and users must make an informed choice between the different write modes, rather than being disappointed by a default choice they weren't aware of. So after this patch, Scylla refuses to boot if Alternator is enabled but a "--alternator-write-isolation" option is missing. The patch also modifies the relevant documentation, adds the same option to our docker image, and the modifies the test-running script test/alternator/run to run Scylla with the old default mode (always_use_lwt), which we need because we want to test RMW operations as well. Fixes #6452 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Message-Id: <20200524160338.108417-1-nyh@scylladb.com>
This commit is contained in:
@@ -573,24 +573,61 @@ static bool validate_legal_tag_chars(std::string_view tag) {
|
||||
return std::all_of(tag.begin(), tag.end(), &is_legal_tag_char);
|
||||
}
|
||||
|
||||
static const std::unordered_set<std::string_view> allowed_write_isolation_values = {
|
||||
"f", "forbid", "forbid_rmw",
|
||||
"a", "always", "always_use_lwt",
|
||||
"o", "only_rmw_uses_lwt",
|
||||
"u", "unsafe", "unsafe_rmw",
|
||||
};
|
||||
|
||||
static void validate_tags(const std::map<sstring, sstring>& tags) {
|
||||
static const std::unordered_set<std::string_view> allowed_values = {
|
||||
"f", "forbid", "forbid_rmw",
|
||||
"a", "always", "always_use_lwt",
|
||||
"o", "only_rmw_uses_lwt",
|
||||
"u", "unsafe", "unsafe_rmw",
|
||||
};
|
||||
auto it = tags.find(rmw_operation::WRITE_ISOLATION_TAG_KEY);
|
||||
if (it != tags.end()) {
|
||||
std::string_view value = it->second;
|
||||
elogger.warn("Allowed values count {} {}", value, allowed_values.count(value));
|
||||
if (allowed_values.count(value) == 0) {
|
||||
if (allowed_write_isolation_values.count(value) == 0) {
|
||||
throw api_error("ValidationException",
|
||||
format("Incorrect write isolation tag {}. Allowed values: {}", value, allowed_values));
|
||||
format("Incorrect write isolation tag {}. Allowed values: {}", value, allowed_write_isolation_values));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static rmw_operation::write_isolation parse_write_isolation(std::string_view value) {
|
||||
if (!value.empty()) {
|
||||
switch (value[0]) {
|
||||
case 'f':
|
||||
return rmw_operation::write_isolation::FORBID_RMW;
|
||||
case 'a':
|
||||
return rmw_operation::write_isolation::LWT_ALWAYS;
|
||||
case 'o':
|
||||
return rmw_operation::write_isolation::LWT_RMW_ONLY;
|
||||
case 'u':
|
||||
return rmw_operation::write_isolation::UNSAFE_RMW;
|
||||
}
|
||||
}
|
||||
// Shouldn't happen as validate_tags() / set_default_write_isolation()
|
||||
// verify allow only a closed set of values.
|
||||
return rmw_operation::default_write_isolation;
|
||||
|
||||
}
|
||||
// This default_write_isolation is always overwritten in main.cc, which calls
|
||||
// set_default_write_isolation().
|
||||
rmw_operation::write_isolation rmw_operation::default_write_isolation =
|
||||
rmw_operation::write_isolation::LWT_ALWAYS;
|
||||
void rmw_operation::set_default_write_isolation(std::string_view value) {
|
||||
if (value.empty()) {
|
||||
throw std::runtime_error("When Alternator is enabled, write "
|
||||
"isolation policy must be selected, using the "
|
||||
"'--alternator-write-isolation' option. "
|
||||
"See docs/alternator/alternator.md for instructions.");
|
||||
}
|
||||
if (allowed_write_isolation_values.count(value) == 0) {
|
||||
throw std::runtime_error(format("Invalid --alternator-write-isolation "
|
||||
"setting '{}'. Allowed values: {}.",
|
||||
value, allowed_write_isolation_values));
|
||||
}
|
||||
default_write_isolation = parse_write_isolation(value);
|
||||
}
|
||||
|
||||
// FIXME: Updating tags currently relies on updating schema, which may be subject
|
||||
// to races during concurrent updates of the same table. Once Scylla schema updates
|
||||
// are fixed, this issue will automatically get fixed as well.
|
||||
@@ -1203,22 +1240,9 @@ rmw_operation::write_isolation rmw_operation::get_write_isolation_for_schema(sch
|
||||
const auto& tags = get_tags_of_table(schema);
|
||||
auto it = tags.find(WRITE_ISOLATION_TAG_KEY);
|
||||
if (it == tags.end() || it->second.empty()) {
|
||||
// By default, fall back to always enforcing LWT
|
||||
return write_isolation::LWT_ALWAYS;
|
||||
}
|
||||
switch (it->second[0]) {
|
||||
case 'f':
|
||||
return write_isolation::FORBID_RMW;
|
||||
case 'a':
|
||||
return write_isolation::LWT_ALWAYS;
|
||||
case 'o':
|
||||
return write_isolation::LWT_RMW_ONLY;
|
||||
case 'u':
|
||||
return write_isolation::UNSAFE_RMW;
|
||||
default:
|
||||
// In case of an incorrect tag, fall back to the safest option: LWT_ALWAYS
|
||||
return write_isolation::LWT_ALWAYS;
|
||||
return default_write_isolation;
|
||||
}
|
||||
return parse_write_isolation(it->second);
|
||||
}
|
||||
|
||||
// shard_for_execute() checks whether execute() must be called on a specific
|
||||
|
||||
@@ -63,6 +63,10 @@ public:
|
||||
|
||||
static write_isolation get_write_isolation_for_schema(schema_ptr schema);
|
||||
|
||||
static write_isolation default_write_isolation;
|
||||
public:
|
||||
static void set_default_write_isolation(std::string_view mode);
|
||||
|
||||
protected:
|
||||
// The full request JSON
|
||||
rjson::value _request;
|
||||
|
||||
@@ -736,6 +736,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port")
|
||||
, alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address")
|
||||
, alternator_enforce_authorization(this, "alternator_enforce_authorization", value_status::Used, false, "Enforce checking the authorization header for every request in Alternator")
|
||||
, alternator_write_isolation(this, "alternator_write_isolation", value_status::Used, "", "Default write isolation policy for Alternator")
|
||||
, abort_on_ebadf(this, "abort_on_ebadf", value_status::Used, true, "Abort the server on incorrect file descriptor access. Throws exception when disabled.")
|
||||
, redis_port(this, "redis_port", value_status::Used, 0, "Port on which the REDIS transport listens for clients.")
|
||||
, redis_ssl_port(this, "redis_ssl_port", value_status::Used, 0, "Port on which the REDIS TLS native transport listens for clients.")
|
||||
|
||||
@@ -314,6 +314,8 @@ public:
|
||||
named_value<uint16_t> alternator_https_port;
|
||||
named_value<sstring> alternator_address;
|
||||
named_value<bool> alternator_enforce_authorization;
|
||||
named_value<sstring> alternator_write_isolation;
|
||||
|
||||
named_value<bool> abort_on_ebadf;
|
||||
|
||||
named_value<uint16_t> redis_port;
|
||||
|
||||
1
dist/docker/redhat/commandlineparser.py
vendored
1
dist/docker/redhat/commandlineparser.py
vendored
@@ -19,6 +19,7 @@ def parse():
|
||||
parser.add_argument('--api-address', default=None, dest='apiAddress')
|
||||
parser.add_argument('--alternator-address', default=None, dest='alternatorAddress', help="Alternator API address to listen to. Defaults to listen address.")
|
||||
parser.add_argument('--alternator-port', default=None, dest='alternatorPort', help="Alternator API port to listen to. Disabled by default.")
|
||||
parser.add_argument('--alternator-write-isolation', default=None, dest='alternatorWriteIsolation', help="Alternator default write isolation policy.")
|
||||
parser.add_argument('--disable-version-check', default=False, action='store_true', dest='disable_housekeeping', help="Disable version check")
|
||||
parser.add_argument('--authenticator', default=None, dest='authenticator', help="Set authenticator class")
|
||||
parser.add_argument('--authorizer', default=None, dest='authorizer', help="Set authorizer class")
|
||||
|
||||
4
dist/docker/redhat/scyllasetup.py
vendored
4
dist/docker/redhat/scyllasetup.py
vendored
@@ -16,6 +16,7 @@ class ScyllaSetup:
|
||||
self._broadcastRpcAddress = arguments.broadcastRpcAddress
|
||||
self._apiAddress = arguments.apiAddress
|
||||
self._alternatorPort = arguments.alternatorPort
|
||||
self._alternatorWriteIsolation = arguments.alternatorWriteIsolation
|
||||
self._smp = arguments.smp
|
||||
self._memory = arguments.memory
|
||||
self._reserveMemory = arguments.reserveMemory
|
||||
@@ -120,6 +121,9 @@ class ScyllaSetup:
|
||||
args += ["--alternator-address %s" % self._alternatorAddress]
|
||||
args += ["--alternator-port %s" % self._alternatorPort]
|
||||
|
||||
if self._alternatorWriteIsolation is not None:
|
||||
args += ["--alternator-write-isolation %s" % self._alternatorWriteIsolation]
|
||||
|
||||
if self._authenticator is not None:
|
||||
args += ["--authenticator %s" % self._authenticator]
|
||||
|
||||
|
||||
@@ -25,6 +25,14 @@ By default, Scylla listens on this port on all network interfaces.
|
||||
To listen only on a specific interface, pass also an "`alternator-address`"
|
||||
option.
|
||||
|
||||
As we explain below in the "Write isolation policies", Alternator has
|
||||
four different choices for the implementation of writes, each with
|
||||
different advantages. You should consider which of the options makes
|
||||
more sense for your intended use case, and use the "`--alternator-write-isolation`"
|
||||
option to choose one. There is currently no default for this option: Trying
|
||||
to run Scylla with Alternator enabled without passing this option will
|
||||
result in an error asking you to set it.
|
||||
|
||||
DynamoDB clients usually specify a single "endpoint" address, e.g.,
|
||||
`dynamodb.us-east-1.amazonaws.com`, and a DNS server and/or load balancers
|
||||
distribute the connections to many different backend nodes. Alternator
|
||||
@@ -154,23 +162,28 @@ implemented, with the following limitations:
|
||||
|
||||
### Write isolation policies
|
||||
DynamoDB API update requests may involve a read before the write - e.g., a
|
||||
_conditional_ update, or an update based on the old value of an attribute.
|
||||
_conditional_ update or an update based on the old value of an attribute.
|
||||
The read and the write should be treated as a single transaction - protected
|
||||
(_isolated_) from other parallel writes to the same item.
|
||||
|
||||
By default, Alternator does this isolation by using Scylla's LWT (lightweight
|
||||
transactions) for every write operation. However, LWT significantly slows
|
||||
writes down, so Alternator supports three additional _write isolation
|
||||
policies_, which can be chosen on a per-table basis and may make sense for
|
||||
certain workloads as explained below.
|
||||
Alternator could do this isolation by using Scylla's LWT (lightweight
|
||||
transactions) for every write operation, but this significantly slows
|
||||
down writes, and not necessary for workloads which don't use read-modify-write
|
||||
(RMW) updates.
|
||||
|
||||
The write isolation policy of a table is configured by tagging the table (at
|
||||
CreateTable time, or any time later with TagResource) with the key
|
||||
So Alternator supports four _write isolation policies_, which can be chosen
|
||||
on a per-table basis and may make sense for certain workloads as explained
|
||||
below.
|
||||
|
||||
A default write isolation policy **must** be chosen using the
|
||||
`--alternator-write-isolation` configuration option. Additionally, the write
|
||||
isolation policy for a specific table can be overriden by tagging the table
|
||||
(at CreateTable time, or any time later with TagResource) with the key
|
||||
`system:write_isolation`, and one of the following values:
|
||||
|
||||
* `a`, `always`, or `always_use_lwt` - This is the default choice.
|
||||
It performs every write operation - even those that do not need a read
|
||||
before the write - as a lightweight transaction.
|
||||
* `a`, `always`, or `always_use_lwt` - This mode performs every write
|
||||
operation - even those that do not need a read before the write - as a
|
||||
lightweight transaction.
|
||||
|
||||
This is the slowest choice, but also the only choice guaranteed to work
|
||||
correctly for every workload.
|
||||
|
||||
@@ -10,10 +10,12 @@ This section will guide you through the steps for setting up the cluster:
|
||||
nightly image by running: `docker pull scylladb/scylla-nightly:latest`
|
||||
2. Follow the steps in the [Scylla official download web page](https://www.scylladb.com/download/open-source/#docker)
|
||||
add to every "docker run" command: `-p 8000:8000` before the image name
|
||||
and `--alternator-port=8000` at the end. The "alternator-port" option
|
||||
specifies on which port Scylla will listen for the (unencrypted) DynamoDB API.
|
||||
and `--alternator-port=8000 --alternator-write-isolation=always` at the end.
|
||||
The "alternator-port" option specifies on which port Scylla will listen for
|
||||
the (unencrypted) DynamoDB API, and the "alternator-write-isolation" chooses
|
||||
whether or not Alternator will use LWT for every write.
|
||||
For example,
|
||||
`docker run --name scylla -d -p 8000:8000 scylladb/scylla-nightly:latest --alternator-port=8000
|
||||
`docker run --name scylla -d -p 8000:8000 scylladb/scylla-nightly:latest --alternator-port=8000 --alternator-write-isolation=always
|
||||
|
||||
## Testing Scylla's DynamoDB API support:
|
||||
### Running AWS Tic Tac Toe demo app to test the cluster:
|
||||
|
||||
@@ -163,6 +163,12 @@ $ docker run --name some-scylla -d scylladb/scylla --alternator-port 8000
|
||||
|
||||
**Since: 3.2**
|
||||
|
||||
### `--alternator-write-isolation policy`
|
||||
|
||||
The `--alternator-write-isolation` command line option chooses between four allowed write isolation policies described in docs/alternator/alternator.md. This option must be specified if Alternator is enabled - it does not have a default.
|
||||
|
||||
**Since: 4.1**
|
||||
|
||||
### `--broadcast-address ADDR`
|
||||
|
||||
The `--broadcast-address` command line option configures the IP address the Scylla instance tells other Scylla nodes in the cluster to connect to.
|
||||
|
||||
2
main.cc
2
main.cc
@@ -79,6 +79,7 @@
|
||||
#include "cdc/log.hh"
|
||||
#include "cdc/cdc_extension.hh"
|
||||
#include "alternator/tags_extension.hh"
|
||||
#include "alternator/rmw_operation.hh"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@@ -1094,6 +1095,7 @@ int main(int ac, char** av) {
|
||||
}
|
||||
|
||||
if (cfg->alternator_port() || cfg->alternator_https_port()) {
|
||||
alternator::rmw_operation::set_default_write_isolation(cfg->alternator_write_isolation());
|
||||
static sharded<alternator::executor> alternator_executor;
|
||||
static sharded<alternator::server> alternator_server;
|
||||
|
||||
|
||||
@@ -83,6 +83,7 @@ ln -s "$SCYLLA" "$SCYLLA_LINK"
|
||||
--alternator-address $SCYLLA_IP \
|
||||
$alternator_port_option \
|
||||
--alternator-enforce-authorization=1 \
|
||||
--alternator-write-isolation=always_use_lwt \
|
||||
--developer-mode=1 \
|
||||
--ring-delay-ms 0 --collectd 0 \
|
||||
--smp 2 -m 1G \
|
||||
|
||||
Reference in New Issue
Block a user