Files
scylladb/tracing/trace_state.hh
Vlad Zolotarov 7e180c7bd3 tracing: introduce the tracing::global_trace_state_ptr class
This object, similarly to a global_schema_ptr, allows to dynamically
create the trace_state_ptr objects on different shards in a context
of the original tracing session.

This object would create a secondary tracing session object from the
original trace_state_ptr object when a trace_state_ptr object is needed
on a "remote" shard, similarly to what we do when we need it on a remote
Node.

Fixes #1678
Fixes #1647

Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Message-Id: <1474387767-21910-1-git-send-email-vladz@cloudius-systems.com>
2016-10-02 11:31:37 +03:00

662 lines
22 KiB
C++

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright (C) 2016 ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <deque>
#include <unordered_set>
#include <seastar/util/lazy.hh>
#include "mutation.hh"
#include "utils/UUID_gen.hh"
#include "tracing/tracing.hh"
#include "gms/inet_address.hh"
#include "auth/authenticated_user.hh"
namespace tracing {
extern logging::logger trace_state_logger;
class trace_state final {
public:
// A primary session may be in 3 states:
// - "inactive": between the creation and a begin() call.
// - "foreground": after a begin() call and before a
// stop_foreground_and_write() call.
// - "background": after a stop_foreground_and_write() call and till the
// state object is destroyed.
//
// - Traces are not allowed while state is in an "inactive" state.
// - The time the primary session was in a "foreground" state is the time
// reported as a session's "duration".
// - Traces that have arrived during the "background" state will be recorded
// as usual but their "elapsed" time will be greater or equal to the
// session's "duration".
//
// Secondary sessions may only be in an "inactive" or in a "foreground"
// states.
enum class state {
inactive,
foreground,
background
};
private:
lw_shared_ptr<one_session_records> _records;
// Used for calculation of time passed since the beginning of a tracing
// session till each tracing event.
elapsed_clock::time_point _start;
std::chrono::microseconds _slow_query_threshold;
trace_state_props_set _state_props;
state _state = state::inactive;
std::chrono::system_clock::rep _started_at;
gms::inet_address _client;
sstring _request;
int _pending_trace_events = 0;
shared_ptr<tracing> _local_tracing_ptr;
struct params_values {
std::experimental::optional<std::unordered_set<gms::inet_address>> batchlog_endpoints;
std::experimental::optional<api::timestamp_type> user_timestamp;
std::experimental::optional<sstring> query;
std::experimental::optional<db::consistency_level> cl;
std::experimental::optional<db::consistency_level> serial_cl;
std::experimental::optional<int32_t> page_size;
};
class params_ptr {
private:
std::unique_ptr<params_values> _vals;
params_values* get_ptr_safe() {
if (!_vals) {
_vals = std::make_unique<params_values>();
}
return _vals.get();
}
public:
explicit operator bool() const {
return (bool)_vals;
}
params_values* operator->() {
return get_ptr_safe();
}
params_values& operator*() {
return *get_ptr_safe();
}
} _params_ptr;
public:
trace_state(trace_type type, trace_state_props_set props)
: _state_props(props)
, _local_tracing_ptr(tracing::get_local_tracing_instance().shared_from_this())
{
if (!full_tracing() && !log_slow_query()) {
throw std::logic_error("A primary session has to be created for either full tracing or a slow query logging");
}
// This is a primary session
_state_props.set(trace_state_props::primary);
init_session_records(type, _local_tracing_ptr->slow_query_record_ttl());
_slow_query_threshold = _local_tracing_ptr->slow_query_threshold();
}
trace_state(const trace_info& info)
: _state_props(info.state_props)
, _local_tracing_ptr(tracing::get_local_tracing_instance().shared_from_this())
{
// This is a secondary session
_state_props.remove(trace_state_props::primary);
// Default a secondary session to a full tracing.
// We may get both zeroes for a full_tracing and a log_slow_query if a
// primary session is created with an older server version.
_state_props.set_if<trace_state_props::full_tracing>(!full_tracing() && !log_slow_query());
// inherit the slow query threshold and ttl from the coordinator
init_session_records(info.type, std::chrono::seconds(info.slow_query_ttl_sec), info.session_id);
_slow_query_threshold = std::chrono::microseconds(info.slow_query_threshold_us);
trace_state_logger.trace("{}: props {}, slow query threshold {}us, slow query ttl {}s", session_id(), _state_props.mask(), info.slow_query_threshold_us, info.slow_query_ttl_sec);
}
~trace_state();
/**
* Stop a foreground state and write pending records to I/O.
*
* @note The tracing session's "duration" is the time it was in the "foreground"
* state.
*/
void stop_foreground_and_write();
const utils::UUID& session_id() const {
return _records->session_id;
}
bool is_in_state(state s) const {
return _state == s;
}
void set_state(state s) {
_state = s;
}
trace_type type() const {
return _records->session_rec.command;
}
bool is_primary() const {
return _state_props.contains(trace_state_props::primary);
}
bool write_on_close() const {
return _state_props.contains(trace_state_props::write_on_close);
}
bool full_tracing() const {
return _state_props.contains(trace_state_props::full_tracing);
}
bool log_slow_query() const {
return _state_props.contains(trace_state_props::log_slow_query);
}
trace_state_props_set raw_props() const {
return _state_props;
}
/**
* @return a slow query threshold value in microseconds.
*/
uint32_t slow_query_threshold_us() const {
return _slow_query_threshold.count();
}
/**
* @return a slow query entry TTL value in seconds
*/
uint32_t slow_query_ttl_sec() const {
return _records->session_rec.slow_query_record_ttl.count();
}
private:
bool should_log_slow_query(elapsed_clock::duration e) const {
return log_slow_query() && e > _slow_query_threshold;
}
void init_session_records(trace_type type, std::chrono::seconds slow_query_ttl, const std::experimental::optional<utils::UUID>& session_id = std::experimental::nullopt)
{
_records = make_lw_shared<one_session_records>();
_records->session_id = session_id ? *session_id : utils::UUID_gen::get_time_UUID();
if (full_tracing()) {
if (!log_slow_query()) {
_records->ttl = ttl_by_type(type);
} else {
_records->ttl = std::max(ttl_by_type(type), slow_query_ttl);
}
} else {
_records->ttl = slow_query_ttl;
}
_records->session_rec.command = type;
_records->session_rec.slow_query_record_ttl = slow_query_ttl;
}
bool should_write_records() const {
return full_tracing() || _records->do_log_slow_query;
}
/**
* Returns the amount of time passed since the beginning of this tracing session.
*
* @return the amount of time passed since the beginning of this session
*/
elapsed_clock::duration elapsed();
/**
* Initiates a tracing session.
*
* Starts the tracing session time measurments.
* This overload is meant for secondary sessions.
*/
void begin() {
std::atomic_signal_fence(std::memory_order::memory_order_seq_cst);
_start = elapsed_clock::now();
std::atomic_signal_fence(std::memory_order::memory_order_seq_cst);
set_state(state::foreground);
}
/**
* Initiates a tracing session.
*
* Starts the tracing session time measurments.
* This overload is meant for primary sessions.
*
* @param request description of a request being traces
* @param client address of a client the traced request came from
*/
void begin(sstring request, gms::inet_address client) {
begin();
_records->session_rec.client = client;
_records->session_rec.request = std::move(request);
_records->session_rec.started_at = std::chrono::system_clock::now();
}
template <typename Func>
void begin(const seastar::lazy_eval<Func>& lf, gms::inet_address client) {
begin(lf(), client);
}
/**
* Stores a batchlog endpoints.
*
* This value will eventually be stored in a params<string, string> map of a tracing session
* with a 'batchlog_endpoints' key.
*
* @param val the set of batchlog endpoints
*/
void set_batchlog_endpoints(const std::unordered_set<gms::inet_address>& val) {
_params_ptr->batchlog_endpoints.emplace(val);
}
/**
* Stores a consistency level of a query being traced.
*
* This value will eventually be stored in a params<string, string> map of a tracing session
* with a 'consistency_level' key.
*
* @param val the consistency level
*/
void set_consistency_level(db::consistency_level val) {
_params_ptr->cl.emplace(val);
}
/**
* Stores an optional serial consistency level of a query being traced.
*
* This value will eventually be stored in a params<string, string> map of a tracing session
* with a 'serial_consistency_level' key.
*
* @param val the optional value with a serial consistency level
*/
void set_optional_serial_consistency_level(const std::experimental::optional<db::consistency_level>& val) {
if (val) {
_params_ptr->serial_cl.emplace(*val);
}
}
/**
* Stores a page size of a query being traced.
*
* This value will eventually be stored in a params<string, string> map of a tracing session
* with a 'page_size' key.
*
* @param val the PAGE size
*/
void set_page_size(int32_t val) {
if (val > 0) {
_params_ptr->page_size.emplace(val);
}
}
/**
* Store a query string.
*
* This value will eventually be stored in a params<string, string> map of a tracing session
* with a 'query' key.
*
* @param val the query string
*/
void set_query(const sstring& val) {
_params_ptr->query.emplace(val);
}
/**
* Store a user provided timestamp.
*
* This value will eventually be stored in a params<string, string> map of a tracing session
* with a 'user_timestamp' key.
*
* @param val the timestamp
*/
void set_user_timestamp(api::timestamp_type val) {
_params_ptr->user_timestamp.emplace(val);
}
void set_username(shared_ptr<auth::authenticated_user> user) {
if (user) {
_records->session_rec.username = user->name();
}
}
void add_table_name(sstring full_table_name) {
_records->session_rec.tables.emplace(std::move(full_table_name));
}
/**
* Fill the map in a session's record with the values set so far.
*
* @param params_map the map to fill
*/
void build_parameters_map();
/**
* Add a single trace entry - a special case for a simple string.
*
* @param msg trace message
*/
void trace(sstring msg);
void trace(const char* msg) {
trace(sstring(msg));
}
/**
* Add a single trace entry - printf-like version
*
* Add a single trace entry with a message given in a printf-like way:
* format string with positional parameters.
*
* @note Both format string and positional parameters are going to be copied
* and the final string is going to built later. A caller has to take this
* into an account and make sure that positional parameters are both
* copiable and that their copying is not expensive.
*
* @tparam A
* @param fmt format string
* @param a positional parameters
*/
template <typename... A>
void trace(const char* fmt, A&&... a);
template <typename... A>
friend void begin(const trace_state_ptr& p, A&&... a);
template <typename... A>
friend void trace(const trace_state_ptr& p, A&&... a);
friend void set_page_size(const trace_state_ptr& p, int32_t val);
friend void set_batchlog_endpoints(const trace_state_ptr& p, const std::unordered_set<gms::inet_address>& val);
friend void set_consistency_level(const trace_state_ptr& p, db::consistency_level val);
friend void set_optional_serial_consistency_level(const trace_state_ptr& p, const std::experimental::optional<db::consistency_level>&val);
friend void set_query(const trace_state_ptr& p, const sstring& val);
friend void set_user_timestamp(const trace_state_ptr& p, api::timestamp_type val);
friend void set_username(const trace_state_ptr& p, shared_ptr<auth::authenticated_user> user);
friend void add_table_name(const trace_state_ptr& p, const sstring& ks_name, const sstring& cf_name);
};
inline void trace_state::trace(sstring message) {
if (is_in_state(state::inactive)) {
throw std::logic_error("trying to use a trace() before begin() for \"" + message + "\" tracepoint");
}
// We don't want the total amount of pending, active and flushing records to
// bypass two times the maximum number of pending records.
//
// If either records are being created too fast or a backend doesn't
// keep up we want to start dropping records.
// In any case, this should be rare, therefore we don't try to optimize this
// flow.
if (!_local_tracing_ptr->have_records_budget()) {
tracing_logger.trace("{}: Maximum number of traces is reached. Some traces are going to be dropped", session_id());
if ((++_local_tracing_ptr->stats.dropped_records) % tracing::log_warning_period == 1) {
tracing_logger.warn("Maximum records limit is hit {} times", _local_tracing_ptr->stats.dropped_records);
}
return;
}
try {
auto e = elapsed();
_records->events_recs.emplace_back(std::move(message), e, i_tracing_backend_helper::wall_clock::now());
_records->consume_from_budget();
// If we have aggregated enough records - schedule them for write already.
//
// We prefer the traces to be written after the session is over. However
// if there is a session that creates a lot of traces - we want to write
// them before we start to drop new records.
//
// We don't want to write records of a tracing session if we trace only
// slow queries and the elapsed time is still below the slow query
// logging threshold.
if (_records->events_recs.size() >= tracing::exp_trace_events_per_session && (full_tracing() || should_log_slow_query(e))) {
_local_tracing_ptr->schedule_for_write(_records);
_local_tracing_ptr->write_maybe();
}
} catch (...) {
// Bump up an error counter and ignore
++_local_tracing_ptr->stats.trace_errors;
}
}
template <typename... A>
void trace_state::trace(const char* fmt, A&&... a) {
try {
trace(seastar::format(fmt, std::forward<A>(a)...));
} catch (...) {
// Bump up an error counter and ignore
++_local_tracing_ptr->stats.trace_errors;
}
}
inline elapsed_clock::duration trace_state::elapsed() {
using namespace std::chrono;
std::atomic_signal_fence(std::memory_order::memory_order_seq_cst);
elapsed_clock::duration elapsed = elapsed_clock::now() - _start;
std::atomic_signal_fence(std::memory_order::memory_order_seq_cst);
return elapsed;
}
inline void set_page_size(const trace_state_ptr& p, int32_t val) {
if (p) {
p->set_page_size(val);
}
}
inline void set_batchlog_endpoints(const trace_state_ptr& p, const std::unordered_set<gms::inet_address>& val) {
if (p) {
p->set_batchlog_endpoints(val);
}
}
inline void set_consistency_level(const trace_state_ptr& p, db::consistency_level val) {
if (p) {
p->set_consistency_level(val);
}
}
inline void set_optional_serial_consistency_level(const trace_state_ptr& p, const std::experimental::optional<db::consistency_level>& val) {
if (p) {
p->set_optional_serial_consistency_level(val);
}
}
inline void set_query(const trace_state_ptr& p, const sstring& val) {
if (p) {
p->set_query(val);
}
}
inline void set_user_timestamp(const trace_state_ptr& p, api::timestamp_type val) {
if (p) {
p->set_user_timestamp(val);
}
}
inline void set_username(const trace_state_ptr& p, shared_ptr<auth::authenticated_user> user) {
if (p) {
p->set_username(user);
}
}
inline void add_table_name(const trace_state_ptr& p, const sstring& ks_name, const sstring& cf_name) {
if (p) {
p->add_table_name(ks_name + "." + cf_name);
}
}
/**
* A helper for conditional invoking trace_state::begin() functions.
*
* If trace state is initialized the operation takes place immediatelly,
* otherwise nothing happens.
*
* @tparam A
* @param p trace state handle
* @param a optional parameters for trace_state::begin()
*/
template <typename... A>
inline void begin(const trace_state_ptr& p, A&&... a) {
if (p) {
p->begin(std::forward<A>(a)...);
}
}
/**
* A helper for conditional invoking trace_state::trace() function.
*
* Create a trace entry if a given trace state @param p is initialized.
* Otherwise, it @param p is not initialized - do nothing.
* Trace message may be passed as a printf-like format string with the
* corresponding positional parameters.
*
* If @param p is initialized both trace message string and positional
* parameters are going to be copied and the final string is going to be build
* later. Therefore a caller has to take this into an account and make sure
* that positional parameters are both copiable and that the copy is not
* expensive.
*
* @param A
* @param p trace state handle
* @param a trace message format string with optional parameters
*/
template <typename... A>
inline void trace(const trace_state_ptr& p, A&&... a) {
if (p) {
p->trace(std::forward<A>(a)...);
}
}
inline std::experimental::optional<trace_info> make_trace_info(const trace_state_ptr& state) {
// We want to trace the remote replicas' operations only when a full tracing
// is requested or when a slow query logging is enabled and the session is
// still active.
//
// When only a slow query logging is enabled we don't really care what
// happens on a remote replica after a Client has received a response for
// his/her query.
if (state && (state->full_tracing() || (state->log_slow_query() && !state->is_in_state(trace_state::state::background)))) {
return trace_info{state->session_id(), state->type(), state->write_on_close(), state->raw_props(), state->slow_query_threshold_us(), state->slow_query_ttl_sec()};
}
return std::experimental::nullopt;
}
inline void stop_foreground(const trace_state_ptr& state) {
if (state) {
state->stop_foreground_and_write();
}
}
// global_trace_state_ptr is a helper class that may be used for creating spans
// of an existing tracing session on other shards. When a tracing span on a
// different shard is needed global_trace_state_ptr would create a secondary
// tracing session on that shard similarly to what we do when we create tracing
// spans on remote Nodes.
//
// The usage is straight forward:
// 1. Create a global_trace_state_ptr from the existing trace_state_ptr object.
// 2. Pass it to the execution unit that (possibly) runs on a different shard
// and pass the global_trace_state_ptr object instead of a trace_state_ptr
// object.
class global_trace_state_ptr {
unsigned _cpu_of_origin;
trace_state_ptr _ptr;
public:
// Note: the trace_state_ptr must come from the current shard
global_trace_state_ptr(trace_state_ptr t)
: _cpu_of_origin(engine().cpu_id())
, _ptr(std::move(t))
{ }
// May be invoked across shards.
global_trace_state_ptr(const global_trace_state_ptr& other)
: global_trace_state_ptr(other.get())
{ }
// May be invoked across shards.
global_trace_state_ptr(global_trace_state_ptr&& other)
: global_trace_state_ptr(other.get())
{ }
global_trace_state_ptr& operator=(const global_trace_state_ptr&) = delete;
// May be invoked across shards.
trace_state_ptr get() const {
// optimize the "tracing not enabled" case
if (!_ptr) {
return nullptr;
}
if (_cpu_of_origin != engine().cpu_id()) {
auto opt_trace_info = make_trace_info(_ptr);
if (opt_trace_info) {
trace_state_ptr new_trace_state = tracing::get_local_tracing_instance().create_session(*opt_trace_info);
begin(new_trace_state);
return new_trace_state;
} else {
return nullptr;
}
}
return _ptr;
}
// May be invoked across shards.
operator trace_state_ptr() const { return get(); }
};
}