Merge '[Backport 6.0] qos/raft_service_level_distributed_data_accessor: print correct error message when trying to modify a service level in recovery mode' from ScyllaDB

Raft service levels are read-only in recovery mode. This patch adds check and proper error message when a user tries to modify service levels in recovery mode. Fixes https://github.com/scylladb/scylladb/issues/18827 (cherry picked from commit 2b56158d13) (cherry picked from commit ee08d7fdad) (cherry picked from commit af0b6bcc56) Refs #18841 Closes scylladb/scylladb#18913 * github.com:scylladb/scylladb: test/auth_cluster/test_raft_service_levels: try to create sl in recovery service/qos/raft_sl_dda: reject changes to service levels in recovery mode service/qos/raft_sl_dda: extract raft_sl_dda steps to common function
doc: describe Tablets in ScyllaDB
2026-05-12 19:02:12 +00:00 · 2024-05-28 16:45:52 +02:00 · 2024-05-28 11:27:53 +02:00 · 2024-05-27 18:20:36 +00:00 · 2024-05-27 18:20:36 +00:00 · 2024-05-27 18:20:36 +00:00
22 changed files with 480 additions and 178 deletions
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=5.5.0-dev
+VERSION=6.0.0-rc1

 if test -f version
 then
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -85,7 +85,7 @@ redirects: setup
 # Preview commands
 .PHONY: preview
 preview: setup
-	$(POETRY) run sphinx-autobuild -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml --host $(PREVIEW_HOST) --port 5500 --ignore *.csv --ignore *.yaml
+	$(POETRY) run sphinx-autobuild -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml --host $(PREVIEW_HOST) --port 5500 --ignore *.csv --ignore *.json --ignore *.yaml

 .PHONY: multiversionpreview
 multiversionpreview: multiversion
--- a/docs/_ext/scylladb_cc_properties.py
+++ b/docs/_ext/scylladb_cc_properties.py
@@ -1,23 +1,19 @@
-import os
 import re
-import yaml
 from typing import Any, Dict, List

-import jinja2
-
 from sphinx import addnodes
 from sphinx.application import Sphinx
 from sphinx.directives import ObjectDescription
 from sphinx.util import logging, ws_re
-from sphinx.util.display import status_iterator
 from sphinx.util.docfields import Field
 from sphinx.util.docutils import switch_source_input, SphinxDirective
 from sphinx.util.nodes import make_id, nested_parse_with_titles
-from sphinx.jinja2glue import BuiltinTemplateLoader
 from docutils import nodes
 from docutils.parsers.rst import directives
 from docutils.statemachine import StringList

+from utils import maybe_add_filters
+
 logger = logging.getLogger(__name__)

 class DBConfigParser:
@@ -152,51 +148,6 @@ class DBConfigParser:
        return DBConfigParser.all_properties[name]


-def readable_desc(description: str) -> str:
-    """
-    This function is deprecated and maintained only for backward compatibility 
-    with previous versions. Use ``readable_desc_rst``instead.
-    """
-    return (
-        description.replace("\\n", "")
-        .replace('<', '&lt;')
-        .replace('>', '&gt;')
-        .replace("\n", "<br>")
-        .replace("\\t", "- ")
-        .replace('"', "")
-    )
-
-
-def readable_desc_rst(description):
-    indent = ' ' * 3
-    lines = description.split('\n')
-    cleaned_lines = []
-    
-    for line in lines:
-
-        cleaned_line = line.replace('\\n', '\n')
-
-        if line.endswith('"'):
-            cleaned_line = cleaned_line[:-1] + ' '
-
-        cleaned_line = cleaned_line.lstrip()
-        cleaned_line = cleaned_line.replace('"', '')
-        
-        if cleaned_line != '':
-            cleaned_line = indent + cleaned_line
-            cleaned_lines.append(cleaned_line)
-    
-    return ''.join(cleaned_lines)
-
-
-def maybe_add_filters(builder):
-    env = builder.templates.environment
-    if 'readable_desc' not in env.filters:
-        env.filters['readable_desc'] = readable_desc
-
-    if 'readable_desc_rst' not in env.filters:
-        env.filters['readable_desc_rst'] = readable_desc_rst
-

 class ConfigOption(ObjectDescription):
    has_content = True
--- a/docs/_ext/scylladb_metrics.py
+++ b/docs/_ext/scylladb_metrics.py
@@ -0,0 +1,188 @@
+import os
+import sys
+import json
+from sphinx import addnodes
+from sphinx.directives import ObjectDescription
+from sphinx.util.docfields import Field
+from sphinx.util.docutils import switch_source_input
+from sphinx.util.nodes import make_id
+from sphinx.util import logging, ws_re
+from docutils.parsers.rst import Directive, directives
+from docutils.statemachine import StringList
+from sphinxcontrib.datatemplates.directive import DataTemplateJSON
+from utils import maybe_add_filters
+
+sys.path.insert(0, os.path.abspath("../../scripts"))
+import scripts.get_description as metrics
+
+LOGGER = logging.getLogger(__name__)
+
+
+class MetricsProcessor:
+
+    MARKER = "::description"
+
+    def _create_output_directory(self, app, metrics_directory):
+        output_directory = os.path.join(app.builder.srcdir, metrics_directory)
+        os.makedirs(output_directory, exist_ok=True)
+        return output_directory
+
+    def _process_single_file(self, file_path, destination_path, metrics_config_path):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        if self.MARKER in content and not os.path.exists(destination_path):
+            try:
+                metrics_file = metrics.get_metrics_from_file(file_path, "scylla", metrics.get_metrics_information(metrics_config_path))
+                with open(destination_path, 'w+', encoding='utf-8') as f:
+                    json.dump(metrics_file, f, indent=4)
+            except SystemExit:
+                LOGGER.info(f'Skipping file: {file_path}')
+            except Exception as error:
+                LOGGER.info(error)
+
+    def _process_metrics_files(self, repo_dir, output_directory, metrics_config_path):
+        for root, _, files in os.walk(repo_dir):
+            for file in files:
+                if file.endswith(".cc"):
+                    file_path = os.path.join(root, file)
+                    file_name = os.path.splitext(file)[0] + ".json"
+                    destination_path = os.path.join(output_directory, file_name)
+                    self._process_single_file(file_path, destination_path, metrics_config_path)
+
+    def run(self, app, exception=None):
+        repo_dir = os.path.abspath(os.path.join(app.srcdir, ".."))
+        metrics_config_path = os.path.join(repo_dir, app.config.scylladb_metrics_config_path)
+        output_directory = self._create_output_directory(app, app.config.scylladb_metrics_directory)
+
+        self._process_metrics_files(repo_dir, output_directory, metrics_config_path)
+
+
+class MetricsTemplateDirective(DataTemplateJSON):
+    option_spec = DataTemplateJSON.option_spec.copy()
+    option_spec["title"] = lambda x: x
+
+    def _make_context(self, data, config, env):
+        context = super()._make_context(data, config, env)
+        context["title"] = self.options.get("title")
+        return context
+
+    def run(self):
+        return super().run()
+
+
+class MetricsOption(ObjectDescription):
+    has_content = True
+    required_arguments = 1
+    optional_arguments = 0
+    final_argument_whitespace = False
+    option_spec = {
+        'type': directives.unchanged,
+        'component': directives.unchanged,
+        'key': directives.unchanged,
+        'source': directives.unchanged,
+    }
+
+    doc_field_types = [
+        Field('type', label='Type', has_arg=False, names=('type',)),
+        Field('component', label='Component', has_arg=False, names=('component',)),
+        Field('key', label='Key', has_arg=False, names=('key',)),
+        Field('source', label='Source', has_arg=False, names=('source',)),
+    ]
+
+    def handle_signature(self, sig: str, signode: addnodes.desc_signature):
+        signode.clear()
+        signode += addnodes.desc_name(sig, sig)
+        return ws_re.sub(' ', sig)
+
+    @property
+    def env(self):
+        return self.state.document.settings.env
+
+    def _render(self, name, option_type, component, key, source):
+        item = {'name': name, 'type': option_type, 'component': component, 'key': key, 'source': source }
+        template = self.config.scylladb_metrics_option_template
+        return self.env.app.builder.templates.render(template, item)
+
+    def transform_content(self, contentnode: addnodes.desc_content) -> None:
+        name = self.arguments[0]
+        option_type = self.options.get('type', '')
+        component = self.options.get('component', '')
+        key = self.options.get('key', '')
+        source_file = self.options.get('source', '')
+        _, lineno = self.get_source_info()
+        source = f'scylladb_metrics:{lineno}:<{name}>'
+        fields = StringList(self._render(name, option_type, component, key, source_file).splitlines(), source=source, parent_offset=lineno)
+        with switch_source_input(self.state, fields):
+            self.state.nested_parse(fields, 0, contentnode)
+
+    def add_target_and_index(self, name: str, sig: str, signode: addnodes.desc_signature) -> None:
+        node_id = make_id(self.env, self.state.document, self.objtype, name)
+        signode['ids'].append(node_id)
+        self.state.document.note_explicit_target(signode)
+        entry = f'{name}; metrics option'
+        self.indexnode['entries'].append(('pair', entry, node_id, '', None))
+        self.env.get_domain('std').note_object(self.objtype, name, node_id, location=signode)
+
+class MetricsDirective(Directive):
+    TEMPLATE = 'metrics.tmpl'
+    required_arguments = 0
+    optional_arguments = 1
+    option_spec = {'template': directives.path}
+    has_content = True
+
+    def _process_file(self, file, relative_path_from_current_rst):
+        data_directive = MetricsTemplateDirective(
+            name=self.name,
+            arguments=[os.path.join(relative_path_from_current_rst, file)],
+            options=self.options,
+            content=self.content,
+            lineno=self.lineno,
+            content_offset=self.content_offset,
+            block_text=self.block_text,
+            state=self.state,
+            state_machine=self.state_machine,
+        )
+        data_directive.options["template"] = self.options.get('template', self.TEMPLATE)
+        data_directive.options["title"] = file.replace('_', ' ').replace('.json','').capitalize()
+        return data_directive.run()
+
+    def _get_relative_path(self, output_directory, app, docname):
+        current_rst_path = os.path.join(app.builder.srcdir, docname + ".rst")
+        return os.path.relpath(output_directory, os.path.dirname(current_rst_path))
+
+
+    def run(self):
+        maybe_add_filters(self.state.document.settings.env.app.builder)
+        app = self.state.document.settings.env.app
+        docname = self.state.document.settings.env.docname
+        metrics_directory = os.path.join(app.builder.srcdir, app.config.scylladb_metrics_directory)
+        output = []
+        try:
+            relative_path_from_current_rst = self._get_relative_path(metrics_directory, app, docname)
+            files = os.listdir(metrics_directory)
+            for _, file in enumerate(files):
+                output.extend(self._process_file(file, relative_path_from_current_rst))
+        except Exception as error:
+            LOGGER.info(error)
+        return output
+
+def setup(app):
+    app.add_config_value("scylladb_metrics_directory", default="_data/metrics", rebuild="html")
+    app.add_config_value("scylladb_metrics_config_path", default='scripts/metrics-config.yml', rebuild="html")
+    app.add_config_value('scylladb_metrics_option_template', default='metrics_option.tmpl', rebuild='html', types=[str])
+    app.connect("builder-inited", MetricsProcessor().run)
+    app.add_object_type(
+        'metrics_option',
+        'metrics_option',
+        objname='metrics option')
+    app.add_directive_to_domain('std', 'metrics_option', MetricsOption, override=True)
+    app.add_directive("metrics_option", MetricsOption)
+    app.add_directive("scylladb_metrics", MetricsDirective)
+
+   
+    return {
+        "version": "0.1",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
+
--- a/docs/_ext/utils.py
+++ b/docs/_ext/utils.py
@@ -0,0 +1,44 @@
+def readable_desc(description: str) -> str:
+    """
+    This function is deprecated and maintained only for backward compatibility 
+    with previous versions. Use ``readable_desc_rst``instead.
+    """
+    return (
+        description.replace("\\n", "")
+        .replace('<', '&lt;')
+        .replace('>', '&gt;')
+        .replace("\n", "<br>")
+        .replace("\\t", "- ")
+        .replace('"', "")
+    )
+
+
+def readable_desc_rst(description):
+    indent = ' ' * 3
+    lines = description.split('\n')
+    cleaned_lines = []
+    
+    for line in lines:
+
+        cleaned_line = line.replace('\\n', '\n')
+
+        if line.endswith('"'):
+            cleaned_line = cleaned_line[:-1] + ' '
+
+        cleaned_line = cleaned_line.lstrip()
+        cleaned_line = cleaned_line.replace('"', '')
+        
+        if cleaned_line != '':
+            cleaned_line = indent + cleaned_line
+            cleaned_lines.append(cleaned_line)
+    
+    return ''.join(cleaned_lines)
+
+
+def maybe_add_filters(builder):
+    env = builder.templates.environment
+    if 'readable_desc' not in env.filters:
+        env.filters['readable_desc'] = readable_desc
+
+    if 'readable_desc_rst' not in env.filters:
+        env.filters['readable_desc_rst'] = readable_desc_rst
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@@ -41,6 +41,6 @@ dl dt:hover > a.headerlink {
    visibility: visible;
 }

-dl.confval {
+dl.confval, dl.metrics_option {
    border-bottom: 1px solid #cacaca;
 }
--- a/docs/_templates/metrics.tmpl
+++ b/docs/_templates/metrics.tmpl
@@ -0,0 +1,19 @@
+.. -*- mode: rst -*-
+
+{{title}}
+{{ '-' * title|length }}
+
+{% if data  %}
+{% for key, value in data.items() %}
+.. _metricsprop_{{ key }}:
+
+.. metrics_option:: {{ key }}
+  :type: {{value[0]}}
+  :source: {{value[4]}}
+  :component: {{value[2]}}
+  :key: {{value[3]}}
+
+  {{value[1] | readable_desc_rst}}
+
+{% endfor %}
+{% endif %}
--- a/docs/_templates/metrics_option.tmpl
+++ b/docs/_templates/metrics_option.tmpl
@@ -0,0 +1,3 @@
+   {% if type %}* **Type:** ``{{ type }}``{% endif %}
+   {% if component %}* **Component:** ``{{ component }}``{% endif %}
+   {% if key %}* **Key:** ``{{ key }}``{% endif %}
--- a/docs/_utils/redirects.yaml
+++ b/docs/_utils/redirects.yaml
@@ -21,6 +21,9 @@
 # remove the Open Source vs. Enterprise Matrix from the Open Source docs

 /stable/reference/versions-matrix-enterprise-oss.html: https://enterprise.docs.scylladb.com/stable/reference/versions-matrix-enterprise-oss.html
+# Remove the outdated Troubleshooting article
+
+/stable/troubleshooting/error-messages/create-mv.html: /stable/troubleshooting/index.html

 # Remove the Learn page (replaced with a link to a page in a different repo)

--- a/docs/architecture/images/tablets-cluster.png
+++ b/docs/architecture/images/tablets-cluster.png
--- a/docs/architecture/images/tablets-load-balancing.png
+++ b/docs/architecture/images/tablets-load-balancing.png
--- a/docs/architecture/index.rst
+++ b/docs/architecture/index.rst
@@ -4,6 +4,7 @@ ScyllaDB Architecture
   :titlesonly:
   :hidden:
 
+   Data Distribution with Tablets </architecture/tablets>
   ScyllaDB Ring Architecture <ringarchitecture/index/>
   ScyllaDB Fault Tolerance <architecture-fault-tolerance>
   Consistency Level Console Demo <console-CL-full-demo>
@@ -13,6 +14,7 @@ ScyllaDB Architecture
   Raft Consensus Algorithm in ScyllaDB </architecture/raft>
   
              
+* :doc:`Data Distribution with Tablets </architecture/tablets/>` - Tablets in ScyllaDB
 * :doc:`ScyllaDB Ring Architecture </architecture/ringarchitecture/index/>` - High-Level view of ScyllaDB Ring Architecture
 * :doc:`ScyllaDB Fault Tolerance </architecture/architecture-fault-tolerance>` - Deep dive into ScyllaDB Fault Tolerance
 * :doc:`Consistency Level Console Demo </architecture/console-CL-full-demo>` - Console Demos of Consistency Level Settings
--- a/docs/architecture/tablets.rst
+++ b/docs/architecture/tablets.rst
@@ -0,0 +1,128 @@
+=========================================
+Data Distribution with Tablets
+=========================================
+
+A ScyllaDB cluster is a group of interconnected nodes. The data of the entire 
+cluster has to be distributed as evenly as possible across those nodes.
+
+ScyllaDB is designed to ensure a balanced distribution of data by storing data
+in tablets. When you add or remove nodes to scale your cluster, add or remove
+a datacenter, or replace a node, tablets are moved between the nodes to keep
+the same number on each node. In addition, tablets are balanced across shards
+in each node.
+
+This article explains the concept of tablets and how they let you scale your
+cluster quickly and seamlessly.
+
+Data Distribution
+-------------------
+
+ScyllaDB distributes data by splitting tables into tablets. Each tablet has 
+its replicas on different nodes, depending on the RF (replication factor). Each
+partition of a table is mapped to a single tablet in a deterministic way. When you
+query or update the data, ScyllaDB can quickly identify the tablet that stores
+the relevant partition. 
+
+The following example shows a 3-node cluster with a replication factor (RF) of
+3. The data is stored in a table (Table 1) with two rows. Both rows are mapped
+to one tablet (T1) with replicas on all three nodes.
+
+.. image:: images/tablets-cluster.png
+
+.. TODO - Add a section about tablet splitting when there are more triggers,
+   like throughput. In 6.0, tablets only split when reaching a threshold size
+   (the threshold is based on the average tablet data size).
+
+Load Balancing
+==================
+
+ScyllaDB autonomously moves tablets to balance the load. This process
+is managed by a load balancer mechanism and happens independently of
+the administrator. The tablet load balancer decides where to migrate
+the tablets, either within the same node to balance the shards or across 
+the nodes to balance the global load in the cluster.
+
+As a table grows, each tablet can split into two, creating a new tablet.
+The load balancer can migrate the split halves independently to different nodes
+or shards.
+
+The load-balancing process takes place in the background and is performed
+without any service interruption.
+
+Scaling Out
+=============
+
+A tablet can be dynamically migrated to an existing node or a newly added
+empty node. Paired with consistent topology updates with Raft, tablets allow
+you to add multiple nodes simultaneously. After nodes are added to the cluster,
+existing nodes stream data to the new ones, and the system load eventually
+converges to an even distribution as the process completes. 
+
+With tablets enabled, manual cleanup is not required.
+Cleanup is performed automatically per tablet,
+making tablets-based streaming user-independent and safer.
+
+In addition, tablet cleanup is lightweight and efficient, as it doesn't
+involve rewriting SStables on the existing nodes, which makes data ownership 
+changes faster. This dramatically reduces 
+the impact of cleanup on the performance of user queries.
+
+The following diagrams show migrating tablets from heavily loaded nodes A and B
+to a new node.
+
+.. image:: images/tablets-load-balancing.png
+
+.. _tablets-enable-tablets: 
+
+Enabling Tablets
+-------------------
+
+Tablets are enabled or disabled on the keyspace level. When you create a new
+keyspace, tablets are enabled by default. 
+``NetworkTopologyStrategy``, recommended for all keyspaces,
+is *required* when creating a keyspace with tablets enabled. 
+
+You can create a keyspace with tablets
+disabled with the ``tablets = {'enabled': false}`` option:
+
+.. code:: cql
+
+    CREATE KEYSPACE my_keyspace
+    WITH replication = {
+        'class': 'NetworkTopologyStrategy',
+        'replication_factor': 3,
+    } AND tablets = {
+        'enabled': false
+    };
+
+
+
+.. warning::
+
+    You cannot ALTER a keyspace to enable or disable tablets.
+    The only way to update the tablet support for a keyspace is to DROP it
+    (losing the schema and data) and then recreate it after redefining 
+    the keyspace schema with ``tablets = { 'enabled': false }`` or 
+    ``tablets = { 'enabled': true }``.
+
+Limitations and Unsupported Features
+--------------------------------------
+
+The following ScyllaDB features are not supported if a keyspace has tablets
+enabled:
+
+* Counters
+* Change Data Capture (CDC)
+* Lightweight Transactions (LWT)
+* Alternator (as it uses LWT)
+
+If you plan to use any of the above features, CREATE your keyspace
+:ref:`with tablets disabled <tablets-enable-tablets>`.
+
+Resharding in keyspaces with tablets enabled has the following limitations:
+
+* ScyllaDB does not support reducing the number of shards after node restart.
+* ScyllaDB does not reshard data on node restart. Tablet replicas remain
+  allocated to the old shards on restart and are subject to background
+  load-balancing to additional shards after restart completes and the node 
+  starts serving CQL.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -44,7 +44,8 @@ extensions = [
    "scylladb_gcp_images",
    "scylladb_include_flag",
    "scylladb_dynamic_substitutions",
-    "scylladb_swagger"
+    "scylladb_swagger",
+    "scylladb_metrics"
 ]

 # The suffix(es) of source filenames.
@@ -127,6 +128,10 @@ scylladb_swagger_origin_api = "../api"
 scylladb_swagger_template = "swagger.tmpl"
 scylladb_swagger_inc_template = "swagger_inc.tmpl"

+# -- Options for scylladb_metrics
+scylladb_metrics_directory = "_data/opensource/metrics"
+
+
 # -- Options for HTML output

 # The theme to use for pages.
--- a/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
+++ b/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
@@ -70,11 +70,46 @@ Step One: Determining Host IDs of Ghost Members
 If you cannot determine the ghost members' host ID using the suggestions above, use the method described below.

 #. Make sure there are no ongoing membership changes.
-#. Execute the following CQL query on one of your nodes to obtain the host IDs of all token ring members:
+
+#. Execute the following CQL query on one of your nodes to retrieve the Raft group 0 ID:

   .. code-block:: cql
    
-    select peer, host_id, up from system.cluster_status;
+    select value from system.scylla_local where key = 'raft_group0_id'
+
+   For example:
+
+   .. code-block:: cql
+    
+    cqlsh> select value from system.scylla_local where key = 'raft_group0_id';
+
+     value
+    --------------------------------------
+     607fef80-c276-11ed-a6f6-3075f294cc65
+
+#. Use the obtained Raft group 0 ID to query the set of all cluster members' host IDs (which includes the ghost members), by executing the following query:
+
+   .. code-block:: cql
+    
+    select server_id from system.raft_state where group_id = <group0_id>
+
+   replace ``<group0_id>`` with the group 0 ID that you obtained. For example:
+
+   .. code-block:: cql
+    
+    cqlsh> select server_id from system.raft_state where group_id = 607fef80-c276-11ed-a6f6-3075f294cc65;
+
+     server_id
+    --------------------------------------
+     26a9badc-6e96-4b86-a8df-5173e5ab47fe
+     7991e7f5-692e-45a0-8ae5-438be5bc7c4f
+     aff11c6d-fbe7-4395-b7ca-3912d7dba2c6
+
+#. Execute the following CQL query to obtain the host IDs of all token ring members:
+
+   .. code-block:: cql
+    
+    select host_id, up from system.cluster_status;

   For example:

@@ -83,25 +118,28 @@ If you cannot determine the ghost members' host ID using the suggestions above,
    cqlsh> select peer, host_id, up from system.cluster_status;

     peer      | host_id                              | up
-     -----------+--------------------------------------+-------
-     127.0.0.3 | 42405b3b-487e-4759-8590-ddb9bdcebdc5 | False
-     127.0.0.1 | 4e3ee715-528f-4dc9-b10f-7cf294655a9e |  True
-     127.0.0.2 | 225a80d0-633d-45d2-afeb-a5fa422c9bd5 |  True
+    -----------+--------------------------------------+-------
+     127.0.0.3 |                                 null | False
+     127.0.0.1 | 26a9badc-6e96-4b86-a8df-5173e5ab47fe |  True
+     127.0.0.2 | 7991e7f5-692e-45a0-8ae5-438be5bc7c4f |  True

   The output of this query is similar to the output of ``nodetool status``.

-   We included the ``up`` column to see which nodes are down.
+   We included the ``up`` column to see which nodes are down and the ``peer`` column to see their IP addresses.

-   In this example, one of the 3 nodes tried to decommission but crashed while it was leaving the token ring. The node is in a partially left state and will refuse to restart, but other nodes still consider it as a normal member. We'll have to use ``removenode`` to clean up after it.
+   In this example, one of the nodes tried to decommission and crashed as soon as it left the token ring but before it left the Raft group. Its entry will show up in ``system.cluster_status`` queries with ``host_id = null``, like above, until the cluster is restarted.

-#. A host ID belongs to a ghost member if it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
+#. A host ID belongs to a ghost member if:
+
+   * It appears in the ``system.raft_state`` query but not in the ``system.cluster_status`` query,
+   * Or it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
+
+   In our example, the ghost member's host ID was ``aff11c6d-fbe7-4395-b7ca-3912d7dba2c6`` because it appeared in the ``system.raft_state`` query but not in the ``system.cluster_status`` query.

   If you're unsure whether a given row in the ``system.cluster_status`` query corresponds to a node in your cluster, you can connect to each node in the cluster and execute ``select host_id from system.local`` (or search the node's logs) to obtain that node's host ID, collecting the host IDs of all nodes in your cluster. Then check if each host ID from the ``system.cluster_status`` query appears in your collected set; if not, it's a ghost member.

   A good rule of thumb is to look at the members marked as down (``up = False`` in ``system.cluster_status``) - ghost members are eventually marked as down by the remaining members of the cluster. But remember that a real member might also be marked as down if it was shutdown or partitioned away from the rest of the cluster. If in doubt, connect to each node and collect their host IDs, as described in the previous paragraph.

-   In our example, the ghost member's host ID is ``42405b3b-487e-4759-8590-ddb9bdcebdc5`` because it is the only member marked as down and we can verify that the other two rows appearing in ``system.cluster_status`` belong to the remaining 2 nodes in the cluster.
-
 In some cases, even after a failed topology change, there may be no ghost members left - for example, if a bootstrapping node crashed very early in the procedure or a decommissioning node crashed after it committed the membership change but before it finalized its own shutdown steps.

 If any ghost members are present, proceed to the next step.
--- a/docs/reference/metrics.rst
+++ b/docs/reference/metrics.rst
@@ -0,0 +1,6 @@
+==============
+Metrics (BETA)
+==============
+
+.. scylladb_metrics::
+  :template: metrics.tmpl
--- a/docs/troubleshooting/error-messages/create-mv.rst
+++ b/docs/troubleshooting/error-messages/create-mv.rst
@@ -1,95 +0,0 @@
-A Removed Node was not Removed Properly from the Seed Node List
-===============================================================
-
-Phenonoma
-^^^^^^^^^
-
-Failed to create :doc:`materialized view </cql/mv>` after node was removed from the cluster. 
-
-
-Error message:
-
-.. code-block:: shell
-
-   InvalidRequest: Error from server: code=2200 [Invalid query] message="Can't create materialized views until the whole cluster has been upgraded"
-
-Problem
-^^^^^^^
-
-A removed node was not removed properly from the seed node list.
-
-Scylla Open Source 4.3 and later and Scylla Enterprise 2021.1 and later are seedless. See :doc:`Scylla Seed Nodes </kb/seed-nodes/>` for details.
-This problem may occur in an earlier version of Scylla.
-
-How to Verify
-^^^^^^^^^^^^^
-
-Scylla logs show the error message above.
-
-To verify that the node wasn't remove properly use the :doc:`nodetool gossipinfo </operating-scylla/nodetool-commands/gossipinfo>` command
-
-For example:
-
-A three nodes cluster, with one node (54.62.0.101) removed.
-
-.. code-block:: shell
-
-   nodetool gossipinfo
-
-   /54.62.0.99
-   generation:1172279348
-   heartbeat:7212
-   LOAD:2.0293227179E10
-   INTERNAL_IP:10.240.0.83
-   DC:E1
-   STATUS:NORMAL,-872190912874367364312
-   HOST_ID:12fdcf43-4642-53b1-a987-c0e825e4e10a
-   RPC_ADDRESS:10.240.0.83
-   RACK:R1
-
-   /54.62.0.100
-   generation:1657463198
-   heartbeat:8135
-   LOAD:2.0114638716E12
-   INTERNAL_IP:10.240.0.93
-   DC:E1
-   STATUS:NORMAL,-258152127640110957173
-   HOST_ID:99acbh55-1013-24a1-a987-s1w718c1e01b
-   RPC_ADDRESS:10.240.0.93
-   RACK:R1
-
-   /54.62.0.101
-   generation:1657463198
-   heartbeat:7022
-   LOAD:2.5173672157E48
-   INTERNAL_IP:10.240.0.103
-   DC:E1
-   STATUS:NORMAL,-365481201980413697284
-   HOST_ID:99acbh55-1301-55a1-a628-s4w254c1e01b
-   RPC_ADDRESS:10.240.0.103
-   RACK:R1
-
-We can see that node ``54.62.0.101`` is still part of the cluster and needs to be removed.
-  
-Solution
-^^^^^^^^
-
-Remove the relevant node from the other nodes seed list (under scylla.yaml) and restart the nodes one by one.
-
-For example:
-
-Seed list before remove the node
-
-.. code-block:: shell
-
-   - seeds: "10.240.0.83,10.240.0.93,10.240.0.103" 
-
-Seed list after removing the node
-
-.. code-block:: shell
-
-   - seeds: "10.240.0.83,10.240.0.93" 
-
-Restart Scylla nodes
-
-.. include:: /rst_include/scylla-commands-restart-index.rst
--- a/docs/troubleshooting/error-messages/index.rst
+++ b/docs/troubleshooting/error-messages/index.rst
@@ -6,7 +6,6 @@ Error Messages

   kb-fs-not-qualified-aio
   address-already-in-use
-   create-mv
   schema-mismatch
   invalid-ssl-prot-error

@@ -20,8 +19,6 @@ Error Messages

  * :doc:`"Address already in use" messages </troubleshooting/error-messages/address-already-in-use/>`

-  * :doc:`"Error from server: code=2200 [Invalid query] message="Can't create materialized views until the whole cluster has been upgraded"  </troubleshooting/error-messages/create-mv/>`
-
  * :doc:`Schema Mismatch </troubleshooting/error-messages/schema-mismatch>`

  * :doc:`Invalid SSL Protocol </troubleshooting/error-messages/invalid-ssl-prot-error>`
--- a/replica/table.cc
+++ b/replica/table.cc
@@ -666,6 +666,10 @@ private:
    storage_group* storage_group_for_id(size_t i) const {
        return storage_group_manager::storage_group_for_id(schema(), i);
    }
+
+    size_t tablet_id_for_token(dht::token t) const noexcept {
+        return tablet_map().get_tablet_id(t).value();
+    }
 public:
    tablet_storage_group_manager(table& t, const locator::effective_replication_map& erm)
        : _t(t)
@@ -715,9 +719,6 @@ public:
    size_t log2_storage_groups() const override {
        return log2ceil(tablet_map().tablet_count());
    }
-    size_t storage_group_id_for_token(dht::token t) const noexcept {
-        return storage_group_of(t).first;
-    }
    storage_group* storage_group_for_token(dht::token token) const noexcept override {
        return storage_group_for_id(storage_group_of(token).first);
    }
@@ -924,8 +925,8 @@ utils::chunked_vector<compaction_group*> tablet_storage_group_manager::compactio
    utils::chunked_vector<compaction_group*> ret;
    auto cmp = dht::token_comparator();

-    size_t candidate_start = tr.start() ? storage_group_id_for_token(tr.start()->value()) : size_t(0);
-    size_t candidate_end = tr.end() ? storage_group_id_for_token(tr.end()->value()) : (tablet_count() - 1);
+    size_t candidate_start = tr.start() ? tablet_id_for_token(tr.start()->value()) : size_t(0);
+    size_t candidate_end = tr.end() ? tablet_id_for_token(tr.end()->value()) : (tablet_count() - 1);

    while (candidate_start <= candidate_end) {
        auto it = _storage_groups.find(candidate_start++);
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -883,7 +883,7 @@ future<std::vector<mutation>> prepare_new_view_announcement(storage_proxy& sp, v
        }
        mlogger.info("Create new view: {}", view);
        return seastar::async([&db, keyspace = std::move(keyspace), &sp, view = std::move(view), ts] {
-            auto mutations = db::schema_tables::make_create_view_mutations(keyspace, std::move(view), ts);
+            auto mutations = db::schema_tables::make_create_view_mutations(keyspace, view, ts);
            // We don't have a separate on_before_create_view() listener to
            // call. But a view is also a column family, and we need to call
            // the on_before_create_column_family listener - notably, to
--- a/service/qos/raft_service_level_distributed_data_accessor.cc
+++ b/service/qos/raft_service_level_distributed_data_accessor.cc
@@ -10,6 +10,7 @@
 #include "cql3/query_processor.hh"
 #include "db/consistency_level_type.hh"
 #include <seastar/core/abort_source.hh>
+#include "exceptions/exceptions.hh"
 #include "service/raft/raft_group0_client.hh"
 #include "db/system_keyspace.hh"
 #include "types/types.hh"
@@ -55,7 +56,7 @@ future<> raft_service_level_distributed_data_accessor::do_raft_command(service::
    co_await _group0_client.add_entry(std::move(group0_cmd), std::move(guard), &as);
 }

-future<> raft_service_level_distributed_data_accessor::set_service_level(sstring service_level_name, qos::service_level_options slo, std::optional<service::group0_guard> guard, abort_source& as) const {   
+static void validate_state(const service::raft_group0_client& group0_client, const std::optional<service::group0_guard>& guard) {
    if (this_shard_id() != 0) {
        on_internal_error(logger, "raft_service_level_distributed_data_accessor: must be executed on shard 0");
    }
@@ -63,6 +64,14 @@ future<> raft_service_level_distributed_data_accessor::set_service_level(sstring
    if (!guard) {
        on_internal_error(logger, "raft_service_level_distributed_data_accessor: guard must be present");
    }
+
+    if (group0_client.in_recovery()) {
+        throw exceptions::invalid_request_exception("The cluster is in recovery mode. Changes to service levels are not allowed.");
+    }
+}
+
+future<> raft_service_level_distributed_data_accessor::set_service_level(sstring service_level_name, qos::service_level_options slo, std::optional<service::group0_guard> guard, abort_source& as) const {   
+    validate_state(_group0_client, guard);
    
    static sstring insert_query = format("INSERT INTO {}.{} (service_level, timeout, workload_type) VALUES (?, ?, ?);", db::system_keyspace::NAME, db::system_keyspace::SERVICE_LEVELS_V2);
    data_value workload = slo.workload == qos::service_level_options::workload_type::unspecified
@@ -81,13 +90,7 @@ future<> raft_service_level_distributed_data_accessor::drop_service_level(sstrin
        guard = co_await _group0_client.start_operation(&as);
    }

-    if (this_shard_id() != 0) {
-        on_internal_error(logger, "raft_service_level_distributed_data_accessor: must be executed on shard 0");
-    }
-
-    if (!guard) {
-        on_internal_error(logger, "raft_service_level_distributed_data_accessor: guard must be present");
-    }
+    validate_state(_group0_client, guard);

    static sstring delete_query = format("DELETE FROM {}.{} WHERE service_level= ?;", db::system_keyspace::NAME, db::system_keyspace::SERVICE_LEVELS_V2);
    
--- a/test/auth_cluster/test_raft_service_levels.py
+++ b/test/auth_cluster/test_raft_service_levels.py
@@ -15,6 +15,7 @@ from test.topology.util import trigger_snapshot, wait_until_topology_upgrade_fin
 from test.topology.conftest import skip_mode
 from cassandra import ConsistencyLevel
 from cassandra.query import SimpleStatement
+from cassandra.protocol import InvalidRequest


 logger = logging.getLogger(__name__)
@@ -136,6 +137,14 @@ async def test_service_levels_work_during_recovery(manager: ManagerClient):
    assert sl_v1 not in [sl.service_level for sl in recovery_result]
    assert set([sl.service_level for sl in recovery_result]) == set(sls)

+    logging.info("Checking changes to service levels are forbidden during recovery")
+    with pytest.raises(InvalidRequest, match="The cluster is in recovery mode. Changes to service levels are not allowed."):
+        await cql.run_async(f"CREATE SERVICE LEVEL sl_{unique_name()}")
+    with pytest.raises(InvalidRequest, match="The cluster is in recovery mode. Changes to service levels are not allowed."):
+        await cql.run_async(f"ALTER SERVICE LEVEL {sls[0]} WITH timeout = 1h")
+    with pytest.raises(InvalidRequest, match="The cluster is in recovery mode. Changes to service levels are not allowed."):
+        await cql.run_async(f"DROP SERVICE LEVEL {sls[0]}")
+
    logging.info("Restoring cluster to normal status")
    await asyncio.gather(*(delete_raft_topology_state(cql, h) for h in hosts))
    await asyncio.gather(*(delete_raft_data_and_upgrade_state(cql, h) for h in hosts))
Author	SHA1	Message	Date
Piotr Dulikowski	bc711a169d	Merge '[Backport 6.0] qos/raft_service_level_distributed_data_accessor: print correct error message when trying to modify a service level in recovery mode' from ScyllaDB Raft service levels are read-only in recovery mode. This patch adds check and proper error message when a user tries to modify service levels in recovery mode. Fixes https://github.com/scylladb/scylladb/issues/18827 (cherry picked from commit `2b56158d13`) (cherry picked from commit `ee08d7fdad`) (cherry picked from commit `af0b6bcc56`) Refs #18841 Closes scylladb/scylladb#18913 * github.com:scylladb/scylladb: test/auth_cluster/test_raft_service_levels: try to create sl in recovery service/qos/raft_sl_dda: reject changes to service levels in recovery mode service/qos/raft_sl_dda: extract raft_sl_dda steps to common function	2024-05-28 16:45:52 +02:00
Anna Stuchlik	7229c820cf	doc: describe Tablets in ScyllaDB This commit adds the main description of tablets and their benefits. The article can be used as a reference in other places across the docs where we mention tablets. (cherry picked from commit `b5c006aadf`) Closes scylladb/scylladb#18916	2024-05-28 11:27:53 +02:00
Michał Jadwiszczak	1dd522edc8	test/auth_cluster/test_raft_service_levels: try to create sl in recovery (cherry picked from commit `af0b6bcc56`)	2024-05-27 18:20:36 +00:00
Michał Jadwiszczak	6d655e6766	service/qos/raft_sl_dda: reject changes to service levels in recovery mode When a cluster goes into recovery mode and service levels were migrated to raft, service levels become temporarily read-only. This commit adds a proper error message in case a user tries to do any changes. (cherry picked from commit `ee08d7fdad`)	2024-05-27 18:20:36 +00:00
Michał Jadwiszczak	54b9fdab03	service/qos/raft_sl_dda: extract raft_sl_dda steps to common function When setting/dropping a service level using raft data accessor, the same validation steps are executed (this_shard_id = 0 and guard is present). To not duplicate the calls in both functions, they can be extracted to a helper function. (cherry picked from commit `2b56158d13`)	2024-05-27 18:20:36 +00:00
Raphael S. Carvalho	13f8486cd7	replica: Fix tablet's compaction_groups_for_token_range() with unowned range File-based tablet streaming calls every shard to return data of every group that intersects with a given range. After dynamic group allocation, that breaks as the tablet range will only be present in a single shard, so an exception is thrown causing migration to halt during streaming phase. Ideally, only one shard is invoked, but that's out of the scope of this fix and compaction_groups_for_token_range() should return empty result if none of the local groups intersect with the range. Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com> (cherry picked from commit `eb8ef38543`) Closes scylladb/scylladb#18859	2024-05-27 15:20:04 +03:00
Kefu Chai	747ffd8776	migration_manager: do not reference moved-away smart pointer this change is inspired by clang-tidy. it warns like: ``` [752/852] Building CXX object service/CMakeFiles/service.dir/migration_manager.cc.o Warning: /home/runner/work/scylladb/scylladb/service/migration_manager.cc:891:71: warning: 'view' used after it was moved [bugprone-use-after-move] 891 \| db.get_notifier().before_create_column_family(keyspace, view, mutations, ts); \| ^ /home/runner/work/scylladb/scylladb/service/migration_manager.cc:886:86: note: move occurred here 886 \| auto mutations = db::schema_tables::make_create_view_mutations(keyspace, std::move(view), ts); \| ^ ``` in which, `view` is an instance of view_ptr which is a type with the semantics of shared pointer, it's backed by a member variable of `seastar::lw_shared_ptr<const schema>`, whose move-ctor actually resets the original instance. so we are actually accessing the moved-away pointer in ```c++ db.get_notifier().before_create_column_family(keyspace, view, mutations, ts) ``` so, in this change, instead of moving away from `view`, we create a copy, and pass the copy to `db::schema_tables::make_create_view_mutations()`. this should be fine, as the behavior of `db::schema_tables::make_create_view_mutations()` does not rely on if the `view` passed to it is a moved away from it or not. the change which introduced this use-after-move was `88a5ddabce` Refs `88a5ddabce` Fixes #18837 Signed-off-by: Kefu Chai <kefu.chai@scylladb.com> (cherry picked from commit `125464f2d9`) Closes scylladb/scylladb#18873	2024-05-27 15:18:29 +03:00
Anna Stuchlik	a87683c7be	doc: remove outdated MV error from Troubleshooting This commit removes the MV error message, which only affect older versions of ScyllaDB, from the Troubleshooting section. Fixes https://github.com/scylladb/scylladb/issues/17205 (cherry picked from commit `92bc8053e2`) Closes scylladb/scylladb#18855	2024-05-27 15:12:22 +03:00
Anna Stuchlik	eff7b0d42d	doc: replace Raft-disabled with Raft-enabled procedure This commit fixes the incorrect Raft-related information on the Handling Cluster Membership Change Failures page introduced with https://github.com/scylladb/scylladb/pull/17500. The page describes the procedure for when Raft is disabled. Since 6.0, Raft for consistent schema management is enabled and mandatory (cannot be disabled), this commit adds the procedure for Raft-enabled setups. (cherry picked from commit `6626d72520`) Closes scylladb/scylladb#18858	2024-05-27 15:11:09 +03:00
David Garcia	7dbcfe5a39	docs: docs: autogenerate metrics Autogenerates metrics documentation using the scripts/get_description.py script introduced in #17479 docs: add beta (cherry picked from commit `9eef3d6139`) Closes scylladb/scylladb#18857	2024-05-27 15:10:48 +03:00
Jenkins Promoter	d078bafa00	Update ScyllaDB version to: 6.0.0-rc1	2024-05-23 15:35:32 +03:00
Yaron Kaikov	1b4d5d02ef	Update ScyllaDB version to: 6.0.0-rc0	2024-05-22 14:07:45 +03:00