In this patch, we re-implement the three read-modify-write operations - PutItem, UpdateItem, DeleteItem. All three operations may need to read the item before writing it to support conditional updates (the "Expected" parameter) and UpdateItem may also need the previous item's value for its update expression (e.g., a user may ask to "set a=a+1" or "set a=b"). Before this patch, the implementation of RMW operations simply did a read, and then a write - without any attempt to protect concurrent operations. In this patch, Scylla's LWT mechanism (storage_proxy::cas()) is used instead, to ensure that concurrent update operations are correctly isolated even if they are conditional. This means that Alternator now requires the experimental LWT feature to be enabled (and refuses to boot if it isn't). The version presented here is configured to always use LWT for *every* write, regardless of whether it has a condition or not. So it will will significantly slow down write-only workloads like YCSB. But the code in this patch actually includes three other modes, which can be chosen by setting an enum constant in the code. In the future we will want to let the user configure this mode, globally, per table or per attribute. Note that read requests are NOT modified, and work exactly as they did before: i.e., strongly-consistent reads are done using a normal CL=LOCAL_QUORUM read - not via LWT. I believe this is good enough given Dynamo's guarantees, and critical for our read performance. Also note that patch doesn't yet fix the BatchWriteItem operation. Although BatchWriteItem does not support any RMW operations - just pure writes - we may still need to do those pure writes using LWT. This should be fixed in a follow-up patch. Unfortunately, this patch involves a large amount of code movement and reorganization, because: 1. The cas operation requires each operation to be made into an object, with a separate apply() function, forcing a lot of code to move. 2. Moreover, we need to do this for three different operations (PutItem, UpdateItem, DeleteItem) so to avoid massive code duplication, I had to move some common code. 3. The cas operation also forced us to change some of the utility functions' APIs. The end result is that this patch focuses more on a compact and understandable *end result* than it does on an easy to understand *patch*, so reviewers - sorry about that. All alternator-test/ tests pass with this patch (and also with all of the different optional modes enabled). However, other than that, I did not yet do any real isolation tests (are concurrent operations really isolated correctly? or is LWT just faking it? :-) ), performance tests or stress tests - and I'll definitely need to do those as well. Fixes #5054 Signed-off-by: Nadav Har'El <nyh@scylladb.com>
103 lines
5.5 KiB
C++
103 lines
5.5 KiB
C++
/*
|
|
* Copyright 2019 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "stats.hh"
|
|
|
|
#include <seastar/core/metrics.hh>
|
|
|
|
namespace alternator {
|
|
|
|
const char* ALTERNATOR_METRICS = "alternator";
|
|
|
|
stats::stats() : api_operations{} {
|
|
// Register the
|
|
seastar::metrics::label op("op");
|
|
|
|
_metrics.add_group("alternator", {
|
|
#define OPERATION(name, CamelCaseName) \
|
|
seastar::metrics::make_total_operations("operation", api_operations.name, \
|
|
seastar::metrics::description("number of operations via Alternator API"), {op(CamelCaseName)}),
|
|
#define OPERATION_LATENCY(name, CamelCaseName) \
|
|
seastar::metrics::make_histogram("op_latency", \
|
|
seastar::metrics::description("Latency histogram of an operation via Alternator API"), {op(CamelCaseName)}, [this]{return api_operations.name.get_histogram(1,20);}),
|
|
OPERATION(batch_write_item, "BatchWriteItem")
|
|
OPERATION(create_backup, "CreateBackup")
|
|
OPERATION(create_global_table, "CreateGlobalTable")
|
|
OPERATION(create_table, "CreateTable")
|
|
OPERATION(delete_backup, "DeleteBackup")
|
|
OPERATION(delete_item, "DeleteItem")
|
|
OPERATION(delete_table, "DeleteTable")
|
|
OPERATION(describe_backup, "DescribeBackup")
|
|
OPERATION(describe_continuous_backups, "DescribeContinuousBackups")
|
|
OPERATION(describe_endpoints, "DescribeEndpoints")
|
|
OPERATION(describe_global_table, "DescribeGlobalTable")
|
|
OPERATION(describe_global_table_settings, "DescribeGlobalTableSettings")
|
|
OPERATION(describe_limits, "DescribeLimits")
|
|
OPERATION(describe_table, "DescribeTable")
|
|
OPERATION(describe_time_to_live, "DescribeTimeToLive")
|
|
OPERATION(get_item, "GetItem")
|
|
OPERATION(list_backups, "ListBackups")
|
|
OPERATION(list_global_tables, "ListGlobalTables")
|
|
OPERATION(list_tables, "ListTables")
|
|
OPERATION(list_tags_of_resource, "ListTagsOfResource")
|
|
OPERATION(put_item, "PutItem")
|
|
OPERATION(query, "Query")
|
|
OPERATION(restore_table_from_backup, "RestoreTableFromBackup")
|
|
OPERATION(restore_table_to_point_in_time, "RestoreTableToPointInTime")
|
|
OPERATION(scan, "Scan")
|
|
OPERATION(tag_resource, "TagResource")
|
|
OPERATION(transact_get_items, "TransactGetItems")
|
|
OPERATION(transact_write_items, "TransactWriteItems")
|
|
OPERATION(untag_resource, "UntagResource")
|
|
OPERATION(update_continuous_backups, "UpdateContinuousBackups")
|
|
OPERATION(update_global_table, "UpdateGlobalTable")
|
|
OPERATION(update_global_table_settings, "UpdateGlobalTableSettings")
|
|
OPERATION(update_item, "UpdateItem")
|
|
OPERATION(update_table, "UpdateTable")
|
|
OPERATION(update_time_to_live, "UpdateTimeToLive")
|
|
OPERATION_LATENCY(put_item_latency, "PutItem")
|
|
OPERATION_LATENCY(get_item_latency, "GetItem")
|
|
OPERATION_LATENCY(delete_item_latency, "DeleteItem")
|
|
OPERATION_LATENCY(update_item_latency, "UpdateItem")
|
|
});
|
|
_metrics.add_group("alternator", {
|
|
seastar::metrics::make_total_operations("unsupported_operations", unsupported_operations,
|
|
seastar::metrics::description("number of unsupported operations via Alternator API")),
|
|
seastar::metrics::make_total_operations("total_operations", total_operations,
|
|
seastar::metrics::description("number of total operations via Alternator API")),
|
|
seastar::metrics::make_total_operations("reads_before_write", reads_before_write,
|
|
seastar::metrics::description("number of performed read-before-write operations")),
|
|
seastar::metrics::make_total_operations("write_using_lwt", write_using_lwt,
|
|
seastar::metrics::description("number of writes that used LWT")),
|
|
seastar::metrics::make_total_operations("shard_bounce_for_lwt", shard_bounce_for_lwt,
|
|
seastar::metrics::description("number writes that had to be bounced from this shard because of LWT requirements")),
|
|
seastar::metrics::make_total_operations("filtered_rows_read_total", cql_stats.filtered_rows_read_total,
|
|
seastar::metrics::description("number of rows read during filtering operations")),
|
|
seastar::metrics::make_total_operations("filtered_rows_matched_total", cql_stats.filtered_rows_matched_total,
|
|
seastar::metrics::description("number of rows read and matched during filtering operations")),
|
|
seastar::metrics::make_total_operations("filtered_rows_dropped_total", [this] { return cql_stats.filtered_rows_read_total - cql_stats.filtered_rows_matched_total; },
|
|
seastar::metrics::description("number of rows read and dropped during filtering operations")),
|
|
});
|
|
}
|
|
|
|
|
|
}
|