diff --git a/alternator/executor.cc b/alternator/executor.cc index 6f5f01e51a..132e219634 100644 --- a/alternator/executor.cc +++ b/alternator/executor.cc @@ -573,24 +573,61 @@ static bool validate_legal_tag_chars(std::string_view tag) { return std::all_of(tag.begin(), tag.end(), &is_legal_tag_char); } +static const std::unordered_set allowed_write_isolation_values = { + "f", "forbid", "forbid_rmw", + "a", "always", "always_use_lwt", + "o", "only_rmw_uses_lwt", + "u", "unsafe", "unsafe_rmw", +}; + static void validate_tags(const std::map& tags) { - static const std::unordered_set allowed_values = { - "f", "forbid", "forbid_rmw", - "a", "always", "always_use_lwt", - "o", "only_rmw_uses_lwt", - "u", "unsafe", "unsafe_rmw", - }; auto it = tags.find(rmw_operation::WRITE_ISOLATION_TAG_KEY); if (it != tags.end()) { std::string_view value = it->second; - elogger.warn("Allowed values count {} {}", value, allowed_values.count(value)); - if (allowed_values.count(value) == 0) { + if (allowed_write_isolation_values.count(value) == 0) { throw api_error("ValidationException", - format("Incorrect write isolation tag {}. Allowed values: {}", value, allowed_values)); + format("Incorrect write isolation tag {}. Allowed values: {}", value, allowed_write_isolation_values)); } } } +static rmw_operation::write_isolation parse_write_isolation(std::string_view value) { + if (!value.empty()) { + switch (value[0]) { + case 'f': + return rmw_operation::write_isolation::FORBID_RMW; + case 'a': + return rmw_operation::write_isolation::LWT_ALWAYS; + case 'o': + return rmw_operation::write_isolation::LWT_RMW_ONLY; + case 'u': + return rmw_operation::write_isolation::UNSAFE_RMW; + } + } + // Shouldn't happen as validate_tags() / set_default_write_isolation() + // verify allow only a closed set of values. + return rmw_operation::default_write_isolation; + +} +// This default_write_isolation is always overwritten in main.cc, which calls +// set_default_write_isolation(). +rmw_operation::write_isolation rmw_operation::default_write_isolation = + rmw_operation::write_isolation::LWT_ALWAYS; +void rmw_operation::set_default_write_isolation(std::string_view value) { + if (value.empty()) { + throw std::runtime_error("When Alternator is enabled, write " + "isolation policy must be selected, using the " + "'--alternator-write-isolation' option. " + "See docs/alternator/alternator.md for instructions."); + } + if (allowed_write_isolation_values.count(value) == 0) { + throw std::runtime_error(format("Invalid --alternator-write-isolation " + "setting '{}'. Allowed values: {}.", + value, allowed_write_isolation_values)); + } + default_write_isolation = parse_write_isolation(value); +} + // FIXME: Updating tags currently relies on updating schema, which may be subject // to races during concurrent updates of the same table. Once Scylla schema updates // are fixed, this issue will automatically get fixed as well. @@ -1203,22 +1240,9 @@ rmw_operation::write_isolation rmw_operation::get_write_isolation_for_schema(sch const auto& tags = get_tags_of_table(schema); auto it = tags.find(WRITE_ISOLATION_TAG_KEY); if (it == tags.end() || it->second.empty()) { - // By default, fall back to always enforcing LWT - return write_isolation::LWT_ALWAYS; - } - switch (it->second[0]) { - case 'f': - return write_isolation::FORBID_RMW; - case 'a': - return write_isolation::LWT_ALWAYS; - case 'o': - return write_isolation::LWT_RMW_ONLY; - case 'u': - return write_isolation::UNSAFE_RMW; - default: - // In case of an incorrect tag, fall back to the safest option: LWT_ALWAYS - return write_isolation::LWT_ALWAYS; + return default_write_isolation; } + return parse_write_isolation(it->second); } // shard_for_execute() checks whether execute() must be called on a specific diff --git a/alternator/rmw_operation.hh b/alternator/rmw_operation.hh index 948668d57e..6f4b015eae 100644 --- a/alternator/rmw_operation.hh +++ b/alternator/rmw_operation.hh @@ -63,6 +63,10 @@ public: static write_isolation get_write_isolation_for_schema(schema_ptr schema); + static write_isolation default_write_isolation; +public: + static void set_default_write_isolation(std::string_view mode); + protected: // The full request JSON rjson::value _request; diff --git a/db/config.cc b/db/config.cc index 2aa2b61524..5f1096e957 100644 --- a/db/config.cc +++ b/db/config.cc @@ -736,6 +736,7 @@ db::config::config(std::shared_ptr exts) , alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port") , alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address") , alternator_enforce_authorization(this, "alternator_enforce_authorization", value_status::Used, false, "Enforce checking the authorization header for every request in Alternator") + , alternator_write_isolation(this, "alternator_write_isolation", value_status::Used, "", "Default write isolation policy for Alternator") , abort_on_ebadf(this, "abort_on_ebadf", value_status::Used, true, "Abort the server on incorrect file descriptor access. Throws exception when disabled.") , redis_port(this, "redis_port", value_status::Used, 0, "Port on which the REDIS transport listens for clients.") , redis_ssl_port(this, "redis_ssl_port", value_status::Used, 0, "Port on which the REDIS TLS native transport listens for clients.") diff --git a/db/config.hh b/db/config.hh index 96da0e74b3..c726e34b10 100644 --- a/db/config.hh +++ b/db/config.hh @@ -314,6 +314,8 @@ public: named_value alternator_https_port; named_value alternator_address; named_value alternator_enforce_authorization; + named_value alternator_write_isolation; + named_value abort_on_ebadf; named_value redis_port; diff --git a/dist/docker/redhat/commandlineparser.py b/dist/docker/redhat/commandlineparser.py index 53a39bde98..526a73e5c5 100644 --- a/dist/docker/redhat/commandlineparser.py +++ b/dist/docker/redhat/commandlineparser.py @@ -19,6 +19,7 @@ def parse(): parser.add_argument('--api-address', default=None, dest='apiAddress') parser.add_argument('--alternator-address', default=None, dest='alternatorAddress', help="Alternator API address to listen to. Defaults to listen address.") parser.add_argument('--alternator-port', default=None, dest='alternatorPort', help="Alternator API port to listen to. Disabled by default.") + parser.add_argument('--alternator-write-isolation', default=None, dest='alternatorWriteIsolation', help="Alternator default write isolation policy.") parser.add_argument('--disable-version-check', default=False, action='store_true', dest='disable_housekeeping', help="Disable version check") parser.add_argument('--authenticator', default=None, dest='authenticator', help="Set authenticator class") parser.add_argument('--authorizer', default=None, dest='authorizer', help="Set authorizer class") diff --git a/dist/docker/redhat/scyllasetup.py b/dist/docker/redhat/scyllasetup.py index de99e222c4..05667fe0ca 100644 --- a/dist/docker/redhat/scyllasetup.py +++ b/dist/docker/redhat/scyllasetup.py @@ -16,6 +16,7 @@ class ScyllaSetup: self._broadcastRpcAddress = arguments.broadcastRpcAddress self._apiAddress = arguments.apiAddress self._alternatorPort = arguments.alternatorPort + self._alternatorWriteIsolation = arguments.alternatorWriteIsolation self._smp = arguments.smp self._memory = arguments.memory self._reserveMemory = arguments.reserveMemory @@ -120,6 +121,9 @@ class ScyllaSetup: args += ["--alternator-address %s" % self._alternatorAddress] args += ["--alternator-port %s" % self._alternatorPort] + if self._alternatorWriteIsolation is not None: + args += ["--alternator-write-isolation %s" % self._alternatorWriteIsolation] + if self._authenticator is not None: args += ["--authenticator %s" % self._authenticator] diff --git a/docs/alternator/alternator.md b/docs/alternator/alternator.md index 3bff4a5aae..38be591b57 100644 --- a/docs/alternator/alternator.md +++ b/docs/alternator/alternator.md @@ -25,6 +25,14 @@ By default, Scylla listens on this port on all network interfaces. To listen only on a specific interface, pass also an "`alternator-address`" option. +As we explain below in the "Write isolation policies", Alternator has +four different choices for the implementation of writes, each with +different advantages. You should consider which of the options makes +more sense for your intended use case, and use the "`--alternator-write-isolation`" +option to choose one. There is currently no default for this option: Trying +to run Scylla with Alternator enabled without passing this option will +result in an error asking you to set it. + DynamoDB clients usually specify a single "endpoint" address, e.g., `dynamodb.us-east-1.amazonaws.com`, and a DNS server and/or load balancers distribute the connections to many different backend nodes. Alternator @@ -154,23 +162,28 @@ implemented, with the following limitations: ### Write isolation policies DynamoDB API update requests may involve a read before the write - e.g., a -_conditional_ update, or an update based on the old value of an attribute. +_conditional_ update or an update based on the old value of an attribute. The read and the write should be treated as a single transaction - protected (_isolated_) from other parallel writes to the same item. -By default, Alternator does this isolation by using Scylla's LWT (lightweight -transactions) for every write operation. However, LWT significantly slows -writes down, so Alternator supports three additional _write isolation -policies_, which can be chosen on a per-table basis and may make sense for -certain workloads as explained below. +Alternator could do this isolation by using Scylla's LWT (lightweight +transactions) for every write operation, but this significantly slows +down writes, and not necessary for workloads which don't use read-modify-write +(RMW) updates. -The write isolation policy of a table is configured by tagging the table (at -CreateTable time, or any time later with TagResource) with the key +So Alternator supports four _write isolation policies_, which can be chosen +on a per-table basis and may make sense for certain workloads as explained +below. + +A default write isolation policy **must** be chosen using the +`--alternator-write-isolation` configuration option. Additionally, the write +isolation policy for a specific table can be overriden by tagging the table +(at CreateTable time, or any time later with TagResource) with the key `system:write_isolation`, and one of the following values: - * `a`, `always`, or `always_use_lwt` - This is the default choice. - It performs every write operation - even those that do not need a read - before the write - as a lightweight transaction. + * `a`, `always`, or `always_use_lwt` - This mode performs every write + operation - even those that do not need a read before the write - as a + lightweight transaction. This is the slowest choice, but also the only choice guaranteed to work correctly for every workload. diff --git a/docs/alternator/getting-started.md b/docs/alternator/getting-started.md index 71e81b9865..f883ec6903 100644 --- a/docs/alternator/getting-started.md +++ b/docs/alternator/getting-started.md @@ -10,10 +10,12 @@ This section will guide you through the steps for setting up the cluster: nightly image by running: `docker pull scylladb/scylla-nightly:latest` 2. Follow the steps in the [Scylla official download web page](https://www.scylladb.com/download/open-source/#docker) add to every "docker run" command: `-p 8000:8000` before the image name - and `--alternator-port=8000` at the end. The "alternator-port" option - specifies on which port Scylla will listen for the (unencrypted) DynamoDB API. + and `--alternator-port=8000 --alternator-write-isolation=always` at the end. + The "alternator-port" option specifies on which port Scylla will listen for + the (unencrypted) DynamoDB API, and the "alternator-write-isolation" chooses + whether or not Alternator will use LWT for every write. For example, - `docker run --name scylla -d -p 8000:8000 scylladb/scylla-nightly:latest --alternator-port=8000 + `docker run --name scylla -d -p 8000:8000 scylladb/scylla-nightly:latest --alternator-port=8000 --alternator-write-isolation=always ## Testing Scylla's DynamoDB API support: ### Running AWS Tic Tac Toe demo app to test the cluster: diff --git a/docs/docker-hub.md b/docs/docker-hub.md index ae59d4b799..70d2f974b5 100644 --- a/docs/docker-hub.md +++ b/docs/docker-hub.md @@ -163,6 +163,12 @@ $ docker run --name some-scylla -d scylladb/scylla --alternator-port 8000 **Since: 3.2** +### `--alternator-write-isolation policy` + +The `--alternator-write-isolation` command line option chooses between four allowed write isolation policies described in docs/alternator/alternator.md. This option must be specified if Alternator is enabled - it does not have a default. + +**Since: 4.1** + ### `--broadcast-address ADDR` The `--broadcast-address` command line option configures the IP address the Scylla instance tells other Scylla nodes in the cluster to connect to. diff --git a/main.cc b/main.cc index a041fcf037..7198682d01 100644 --- a/main.cc +++ b/main.cc @@ -79,6 +79,7 @@ #include "cdc/log.hh" #include "cdc/cdc_extension.hh" #include "alternator/tags_extension.hh" +#include "alternator/rmw_operation.hh" namespace fs = std::filesystem; @@ -1094,6 +1095,7 @@ int main(int ac, char** av) { } if (cfg->alternator_port() || cfg->alternator_https_port()) { + alternator::rmw_operation::set_default_write_isolation(cfg->alternator_write_isolation()); static sharded alternator_executor; static sharded alternator_server; diff --git a/test/alternator/run b/test/alternator/run index 7d4e094e71..43ae27af77 100755 --- a/test/alternator/run +++ b/test/alternator/run @@ -83,6 +83,7 @@ ln -s "$SCYLLA" "$SCYLLA_LINK" --alternator-address $SCYLLA_IP \ $alternator_port_option \ --alternator-enforce-authorization=1 \ + --alternator-write-isolation=always_use_lwt \ --developer-mode=1 \ --ring-delay-ms 0 --collectd 0 \ --smp 2 -m 1G \