scylladb/conf/scylla.yaml

# Scylla storage config YAML

#######################################
# This file is split to two sections:
# 1. Supported parameters
# 2. Unsupported parameters: reserved for future use or backwards
#    compatibility.
# Scylla will only read and use the first segment
#######################################

### Supported Parameters

# The name of the cluster. This is mainly used to prevent machines in
# one logical cluster from joining another.
# It is recommended to change the default value when creating a new cluster.
# You can NOT modify this value for an existing cluster
#cluster_name: 'Test Cluster'

# This defines the number of tokens randomly assigned to this node on the ring
# The more tokens, relative to other nodes, the larger the proportion of data
# that this node will store. You probably want all nodes to have the same number
# of tokens assuming they have equal hardware capability.
num_tokens: 256

# Directory where Scylla should store all its files, which are commitlog,
# data, hints, view_hints and saved_caches subdirectories. All of these
# subs can be overridden by the respective options below.
# If unset, the value defaults to /var/lib/scylla
# workdir: /var/lib/scylla

# Directory where Scylla should store data on disk.
# data_file_directories:
#    - /var/lib/scylla/data

# commit log.  when running on magnetic HDD, this should be a
# separate spindle than the data directories.
# commitlog_directory: /var/lib/scylla/commitlog

# schema commit log. A special commitlog instance
# used for schema and system tables.
# When running on magnetic HDD, this should be a
# separate spindle than the data directories.
# schema_commitlog_directory: /var/lib/scylla/commitlog/schema

# commitlog_sync may be either "periodic" or "batch."
#
# When in batch mode, Scylla won't ack writes until the commit log
# has been fsynced to disk.  It will wait
# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
# This window should be kept short because the writer threads will
# be unable to do extra work while waiting.  (You may need to increase
# concurrent_writes for the same reason.)
#
# commitlog_sync: batch
# commitlog_sync_batch_window_in_ms: 2
#
# the other option is "periodic" where writes may be acked immediately
# and the CommitLog is simply synced every commitlog_sync_period_in_ms
# milliseconds.
commitlog_sync: periodic
commitlog_sync_period_in_ms: 10000

# The size of the individual commitlog file segments.  A commitlog
# segment may be archived, deleted, or recycled once all the data
# in it (potentially from each columnfamily in the system) has been
# flushed to sstables.
#
# The default size is 32, which is almost always fine, but if you are
# archiving commitlog segments (see commitlog_archiving.properties),
# then you probably want a finer granularity of archiving; 8 or 16 MB
# is reasonable.
commitlog_segment_size_in_mb: 32

# The size of the individual schema commitlog file segments.
#
# The default size is 128, which is 4 times larger than the default
# size of the data commitlog. It's because the segment size puts
# a limit on the mutation size that can be written at once, and some
# schema mutation writes are much larger than average.
schema_commitlog_segment_size_in_mb: 128

# seed_provider class_name is saved for future use.
# A seed address is mandatory.
seed_provider:
    # The addresses of hosts that will serve as contact points for the joining node.
    # It allows the node to discover the cluster ring topology on startup (when
    # joining the cluster).
    # Once the node has joined the cluster, the seed list has no function.
    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
      parameters:
          # In a new cluster, provide the address of the first node.
          # In an existing cluster, specify the address of at least one existing node.
          # If you specify addresses of more than one node, use a comma to separate them.
          # For example: "<IP1>,<IP2>,<IP3>"
          - seeds: "127.0.0.1"

# Address to bind to and tell other Scylla nodes to connect to.
# You _must_ change this if you want multiple nodes to be able to communicate!
#
# If you leave broadcast_address (below) empty, then setting listen_address
# to 0.0.0.0 is wrong as other nodes will not know how to reach this node.
# If you set broadcast_address, then you can set listen_address to 0.0.0.0.
listen_address: localhost

# Address to broadcast to other Scylla nodes
# Leaving this blank will set it to the same value as listen_address
# broadcast_address: 1.2.3.4


# When using multiple physical network interfaces, set this to true to listen on broadcast_address
# in addition to the listen_address, allowing nodes to communicate in both interfaces.
# Ignore this property if the network configuration automatically routes between the public and private networks such as EC2.
#
# listen_on_broadcast_address: false

# port for the CQL native transport to listen for clients on
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
# To disable the CQL native transport, remove this option and configure native_transport_port_ssl.
native_transport_port: 9042

# Like native_transport_port, but clients are forwarded to specific shards, based on the
# client-side port numbers.
native_shard_aware_transport_port: 19042

# Enabling native transport encryption in client_encryption_options allows you to either use
# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
# standard native_transport_port.
# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
# for native_transport_port. Setting native_transport_port_ssl to a different value
# from native_transport_port will use encryption for native_transport_port_ssl while
# keeping native_transport_port unencrypted.
#native_transport_port_ssl: 9142

# Like native_transport_port_ssl, but clients are forwarded to specific shards, based on the
# client-side port numbers.
#native_shard_aware_transport_port_ssl: 19142

# How long the coordinator should wait for read operations to complete
read_request_timeout_in_ms: 5000

# How long the coordinator should wait for writes to complete
write_request_timeout_in_ms: 2000
# how long a coordinator should continue to retry a CAS operation
# that contends with other proposals for the same row
cas_contention_timeout_in_ms: 1000

# phi value that must be reached for a host to be marked down.
# most users should never need to adjust this.
# phi_convict_threshold: 8

# IEndpointSnitch.  The snitch has two functions:
# - it teaches Scylla enough about your network topology to route
#   requests efficiently
# - it allows Scylla to spread replicas around your cluster to avoid
#   correlated failures. It does this by grouping machines into
#   "datacenters" and "racks."  Scylla will do its best not to have
#   more than one replica on the same "rack" (which may not actually
#   be a physical location)
#
# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
# ARE PLACED.
#
# Out of the box, Scylla provides
#  - SimpleSnitch:
#    Treats Strategy order as proximity. This can improve cache
#    locality when disabling read repair.  Only appropriate for
#    single-datacenter deployments.
#  - GossipingPropertyFileSnitch
#    This should be your go-to snitch for production use.  The rack
#    and datacenter for the local node are defined in
#    cassandra-rackdc.properties and propagated to other nodes via
#    gossip.  If cassandra-topology.properties exists, it is used as a
#    fallback, allowing migration from the PropertyFileSnitch.
#  - PropertyFileSnitch:
#    Proximity is determined by rack and data center, which are
#    explicitly configured in cassandra-topology.properties.
#  - Ec2Snitch:
#    Appropriate for EC2 deployments in a single Region. Loads Region
#    and Availability Zone information from the EC2 API. The Region is
#    treated as the datacenter, and the Availability Zone as the rack.
#    Only private IPs are used, so this will not work across multiple
#    Regions.
#  - Ec2MultiRegionSnitch:
#    Uses public IPs as broadcast_address to allow cross-region
#    connectivity.  (Thus, you should set seed addresses to the public
#    IP as well.) You will need to open the storage_port or
#    ssl_storage_port on the public IP firewall.  (For intra-Region
#    traffic, Scylla will switch to the private IP after
#    establishing a connection.)
#  - RackInferringSnitch:
#    Proximity is determined by rack and data center, which are
#    assumed to correspond to the 3rd and 2nd octet of each node's IP
#    address, respectively.  Unless this happens to match your
#    deployment conventions, this is best used as an example of
#    writing a custom Snitch class and is provided in that spirit.
#
# You can use a custom Snitch by setting this to the full class name
# of the snitch, which will be assumed to be on your classpath.
endpoint_snitch: SimpleSnitch

# The address or interface to bind the native transport server to.
#
# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
# to a single address, IP aliasing is not supported.
#
# Leaving rpc_address blank has the same effect as on listen_address
# (i.e. it will be based on the configured hostname of the node).
#
# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
# set broadcast_rpc_address to a value other than 0.0.0.0.
#
# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
#
# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
rpc_address: localhost
# rpc_interface: eth1
# rpc_interface_prefer_ipv6: false

# port for REST API server
api_port: 10000

# IP for the REST API server
api_address: 127.0.0.1

# Log WARN on any batch size exceeding this value. 128 kiB per batch by default.
# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
batch_size_warn_threshold_in_kb: 128

# Fail any multiple-partition batch exceeding this value. 1 MiB (8x warn threshold) by default.
batch_size_fail_threshold_in_kb: 1024

# Authentication backend, identifying users
# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator,
# PasswordAuthenticator}.
#
# - AllowAllAuthenticator performs no checks - set it to disable authentication.
# - PasswordAuthenticator relies on username/password pairs to authenticate
#   users. It keeps usernames and hashed passwords in system_auth.credentials table.
#   Please increase system_auth keyspace replication factor if you use this authenticator.
# - com.scylladb.auth.TransitionalAuthenticator requires username/password pair
#   to authenticate in the same manner as PasswordAuthenticator, but improper credentials
#   result in being logged in as an anonymous user. Use for upgrading clusters' auth.
# - com.scylladb.auth.SaslauthdAuthenticator outsources authentication to a running saslauthd
#   daemon. When using this authenticator, you must set the saslauthd_socket_path property to the
#   Unix domain socket on which saslauthd is listening.
# authenticator: AllowAllAuthenticator
# saslauthd_socket_path: /var/state/saslauthd/mux

# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthorizer,
# CassandraAuthorizer}.
#
# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
#   increase system_auth keyspace replication factor if you use this authorizer.
# - com.scylladb.auth.TransitionalAuthorizer wraps around the CassandraAuthorizer, using it for
#   authorizing permission management. Otherwise, it allows all. Use for upgrading
#   clusters' auth.
# authorizer: AllowAllAuthorizer

# initial_token allows you to specify tokens manually.  While you can use # it with
# vnodes (num_tokens > 1, above) -- in which case you should provide a
# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
# that do not have vnodes enabled.
# initial_token:

# RPC address to broadcast to drivers and other Scylla nodes. This cannot
# be set to 0.0.0.0. If left blank, this will be set to the value of
# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
# be set.
# broadcast_rpc_address: 1.2.3.4

# Uncomment to enable experimental features
# experimental_features:
#     - udf
#     - alternator-streams
#     - broadcast-tables
#     - keyspace-storage-options

# The directory where hints files are stored if hinted handoff is enabled.
# hints_directory: /var/lib/scylla/hints

# The directory where hints files are stored for materialized-view updates
# view_hints_directory: /var/lib/scylla/view_hints

# See https://docs.scylladb.com/architecture/anti-entropy/hinted-handoff
# May either be "true" or "false" to enable globally, or contain a list
# of data centers to enable per-datacenter.
# hinted_handoff_enabled: DC1,DC2
# hinted_handoff_enabled: true

# this defines the maximum amount of time a dead host will have hints
# generated.  After it has been dead this long, new hints for it will not be
# created until it has been seen alive and gone down again.
# max_hint_window_in_ms: 10800000 # 3 hours


# Validity period for authorized statements cache. Defaults to 10000, set to 0 to disable.
# Will be disabled automatically for AllowAllAuthorizer.
# permissions_validity_in_ms: 10000

# Refresh interval for authorized statements cache.
# After this interval, cache entries become eligible for refresh. Upon next
# access, an async reload is scheduled and the old value returned until it
# completes. If permissions_validity_in_ms is non-zero, then this also must have
# a non-zero value. Defaults to 2000. It's recommended to set this value to
# be at least 3 times smaller than the permissions_validity_in_ms.
# permissions_update_interval_in_ms: 2000

# The partitioner is responsible for distributing groups of rows (by
# partition key) across nodes in the cluster.  You should leave this
# alone for new clusters.  The partitioner can NOT be changed without
# reloading all data, so when upgrading you should set this to the
# same partitioner you were already using.
#
# Murmur3Partitioner is currently the only supported partitioner,
#
partitioner: org.apache.cassandra.dht.Murmur3Partitioner

# Total space to use for commitlogs.
#
# If space gets above this value (it will round up to the next nearest
# segment multiple), Scylla will flush every dirty CF in the oldest
# segment and remove it.  So a small total commitlog space will tend
# to cause more flush activity on less-active columnfamilies.
#
# A value of -1 (default) will automatically equate it to the total amount of memory
# available for Scylla.
commitlog_total_space_in_mb: -1

# TCP port, for commands and data
# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
# storage_port: 7000

# SSL port, for encrypted communication.  Unused unless enabled in
# encryption_options
# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
# ssl_storage_port: 7001

# listen_interface: eth0
# listen_interface_prefer_ipv6: false

# Whether to start the native transport server.
# Please note that the address on which the native transport is bound is the
# same as the rpc_address. The port however is different and specified below.
# start_native_transport: true

# The maximum size of allowed frame. Frame (requests) larger than this will
# be rejected as invalid. The default is 256MB.
# native_transport_max_frame_size_in_mb: 256

# enable or disable keepalive on rpc/native connections
# rpc_keepalive: true

# Set to true to have Scylla create a hard link to each sstable
# flushed or streamed locally in a backups/ subdirectory of the
# keyspace data.  Removing these links is the operator's
# responsibility.
# incremental_backups: false

# Whether or not to take a snapshot before each compaction.  Be
# careful using this option, since Scylla won't clean up the
# snapshots for you.  Mostly useful if you're paranoid when there
# is a data format change.
# snapshot_before_compaction: false

# Whether or not a snapshot is taken of the data before keyspace truncation
# or dropping of column families. The STRONGLY advised default of true
# should be used to provide data safety. If you set this flag to false, you will
# lose data on truncation or drop.
# auto_snapshot: true

# When executing a scan, within or across a partition, we need to keep the
# tombstones seen in memory so we can return them to the coordinator, which
# will use them to make sure other replicas also know about the deleted rows.
# With workloads that generate a lot of tombstones, this can cause performance
# problems and even exhaust the server heap.
# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
# Adjust the thresholds here if you understand the dangers and want to
# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
# using the StorageService mbean.
# tombstone_warn_threshold: 1000
# tombstone_failure_threshold: 100000

# Granularity of the collation index of rows within a partition.
# Increase if your rows are large, or if you have a very large
# number of rows per partition.  The competing goals are these:
#   1) a smaller granularity means more index entries are generated
#      and looking up rows within the partition by collation column
#      is faster
#   2) but, Scylla will keep the collation index in memory for hot
#      rows (as part of the key cache), so a larger granularity means
#      you can cache more hot rows
# column_index_size_in_kb: 64

# sstable format version for newly written sstables.
# Currently allowed values are `me` and `ms`.
# If not specified in the config, this defaults to `me`.
#
# The difference between `me` and `ms` are the data structures used
# in the primary index.
# In short, `ms` needs more CPU during sstable writes,
# but should behave better during reads,
# although it might behave worse for very long clustering keys.
#
# `ms` sstable format works even better with `column_index_size_in_kb` set to 1,
# so keep those two settings in sync (either both set, or both unset).
sstable_format: ms
column_index_size_in_kb: 1

# Auto-scaling of the promoted index prevents running out of memory
# when the promoted index grows too large (due to partitions with many rows
# vs. too small column_index_size_in_kb).  When the serialized representation
# of the promoted index grows by this threshold, the desired block size
# for this partition (initialized to column_index_size_in_kb)
# is doubled, to decrease the sampling resolution by half.
#
# To disable promoted index auto-scaling, set the threshold to 0.
# column_index_auto_scale_threshold_in_kb: 10240

# Log a warning when writing partitions larger than this value
# compaction_large_partition_warning_threshold_mb: 1000

# Log a warning when writing rows larger than this value
# compaction_large_row_warning_threshold_mb: 10

# Log a warning when writing cells larger than this value
# compaction_large_cell_warning_threshold_mb: 1

# Log a warning when row number is larger than this value
# compaction_rows_count_warning_threshold: 100000

# Log a warning when writing a collection containing more elements than this value
# compaction_collection_elements_count_warning_threshold: 10000

# How long the coordinator should wait for seq or index scans to complete
# range_request_timeout_in_ms: 10000
# How long the coordinator should wait for writes to complete
# counter_write_request_timeout_in_ms: 5000
# How long a coordinator should continue to retry a CAS operation
# that contends with other proposals for the same row
# cas_contention_timeout_in_ms: 1000
# How long the coordinator should wait for truncates to complete
# (This can be much longer, because unless auto_snapshot is disabled
# we need to flush first so we can snapshot before removing the data.)
# truncate_request_timeout_in_ms: 60000
# The default timeout for other, miscellaneous operations
# request_timeout_in_ms: 10000

# Enable or disable inter-node encryption.
# You must also generate keys and provide the appropriate key and trust store locations and passwords.
#
# The available internode options are : all, none, dc, rack
# If set to dc scylla  will encrypt the traffic between the DCs
# If set to rack scylla  will encrypt the traffic between the racks
#
# SSL/TLS algorithm and ciphers used can be controlled by
# the priority_string parameter. Info on priority string
# syntax and values is available at:
#   https://gnutls.org/manual/html_node/Priority-Strings.html
#
# The require_client_auth parameter allows you to
# restrict access to service based on certificate
# validation. Client must provide a certificate
# accepted by the used trust store to connect.
#
# server_encryption_options:
#    internode_encryption: none
#    certificate: conf/scylla.crt
#    keyfile: conf/scylla.key
#    truststore: <not set, use system trust>
#    certficate_revocation_list: <not set>
#    require_client_auth: False
#    priority_string: <not set, use default>

# enable or disable client/server encryption.
# client_encryption_options:
#    enabled: false
#    certificate: conf/scylla.crt
#    keyfile: conf/scylla.key
#    truststore: <not set, use system trust>
#    certficate_revocation_list: <not set>
#    require_client_auth: False
#    priority_string: <not set, use default>
#    enable_session_tickets: <default false>

# internode_compression controls whether traffic between nodes is
# compressed.
# can be:  all  - all traffic is compressed
#          dc   - traffic between different datacenters is compressed
#          rack - traffic between different racks is compressed
#          none - nothing is compressed.
# internode_compression: none

# Enables inter-node traffic compression metrics (`scylla_rpc_compression_...`)
# and enables a new implementation of inter-node traffic compressors,
# capable of using zstd (in addition to the default lz4)
# and shared dictionaries.
# (Those features must still be enabled by other settings).
# Has minor CPU cost.
#
# internode_compression_enable_advanced: false

# Enables training of shared compression dictionaries on inter-node traffic.
# New dictionaries are distributed throughout the cluster via Raft,
# and used to improve the effectiveness of inter-node traffic compression
# when `internode_compression_enable_advanced` is enabled.
#
# WARNING: this may leak unencrypted data to disk. The trained dictionaries
# contain randomly-selected pieces of data written to the cluster.
# When the Raft log is unencrypted, those pieces of data will be
# written to disk unencrypted. At the moment of writing, there is no
# way to encrypt the Raft log.
# This problem is tracked by https://github.com/scylladb/scylla-enterprise/issues/4717.
#
# Can be:  never       - Dictionaries aren't trained by this node.
#          when_leader - New dictionaries are trained by this node only if
#                        it's the current Raft leader.
#          always      - Dictionaries are trained by this node unconditionally.
#
# For efficiency reasons, training shouldn't be enabled on more than one node.
# To enable it on a single node, one can let the cluster pick the trainer
# by setting `when_leader` on all nodes, or specify one manually by setting `always`
# on one node and `never` on others.
#
# rpc_dict_training_when: never

# A number in range [0.0, 1.0] specifying the share of CPU which can be spent
# by this node on compressing inter-node traffic with zstd.
#
# Depending on the workload, enabling zstd might have a drastic negative
# effect on performance, so it shouldn't be done lightly.
#
# internode_compression_zstd_max_cpu_fraction: 0.0

# Enable or disable tcp_nodelay for inter-dc communication.
# Disabling it will result in larger (but fewer) network packets being sent,
# reducing overhead from the TCP protocol itself, at the cost of increasing
# latency if you block for cross-datacenter responses.
# inter_dc_tcp_nodelay: false

# Relaxation of environment checks.
#
# Scylla places certain requirements on its environment.  If these requirements are
# not met, performance and reliability can be degraded.
#
# These requirements include:
#    - A filesystem with good support for asynchronous I/O (AIO). Currently,
#      this means XFS.
#
# false: strict environment checks are in place; do not start if they are not met.
# true: relaxed environment checks; performance and reliability may degraade.
#
# developer_mode: false


# Idle-time background processing
#
# Scylla can perform certain jobs in the background while the system is otherwise idle,
# freeing processor resources when there is other work to be done.
#
# defragment_memory_on_idle: true
#
# prometheus port
# By default, Scylla opens prometheus API port on port 9180
# setting the port to 0 will disable the prometheus API.
# prometheus_port: 9180
#
# prometheus address
# Leaving this blank will set it to the same value as listen_address.
# This means that by default, Scylla listens to the prometheus API on the same
# listening address (and therefore network interface) used to listen for
# internal communication. If the monitoring node is not in this internal
# network, you can override prometheus_address explicitly - e.g., setting
# it to 0.0.0.0 to listen on all interfaces.
# prometheus_address: 1.2.3.4

# audit settings
# Table audit is enabled by default.
# 'audit' config option controls if and where to output audited events:
#   - "none": auditing is disabled
#   - "table": save audited events in audit.audit_log column family (default)
#   - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
audit: "table"
#
# List of statement categories that should be audited.
audit_categories: "DCL,DDL,AUTH,ADMIN"
#
# List of tables that should be audited.
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
#
# List of keyspaces that should be fully audited.
# All tables in those keyspaces will be audited
# audit_keyspaces: "<keyspace_name>,<keyspace_name>"
#
# Overrides the Unix socket path used to connect to syslog. If left unset, it'll
# use the default on the build system, which is usually "/dev/log"
# audit_unix_socket_path: "/dev/log"

# Distribution of data among cores (shards) within a node
#
# Scylla distributes data within a node among shards, using a round-robin
# strategy:
#  [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ...
#
# Scylla versions 1.6 and below used just one repetition of the pattern;
# this interfered with data placement among nodes (vnodes).
#
# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this
# provides for better data distribution.
#
# the value below is log (base 2) of the number of repetitions.
#
# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and
# below.
#
# Keep at 12 for new clusters.
murmur3_partitioner_ignore_msb_bits: 12

# Use on a new, parallel algorithm for performing aggregate queries.
# Set to `false` to fall-back to the old algorithm.
# enable_parallelized_aggregation: true

# Time for which task manager task started internally is kept in memory after it completes.
# task_ttl_in_seconds: 0

# Time for which task manager task started by user is kept in memory after it completes.
# user_task_ttl_in_seconds: 3600

# In materialized views, restrictions are allowed only on the view's primary key columns.
# In old versions Scylla mistakenly allowed IS NOT NULL restrictions on columns which were not part
# of the view's primary key. These invalid restrictions were ignored.
# This option controls the behavior when someone tries to create a view with such invalid IS NOT NULL restrictions.
#
# Can be true, false, or warn.
# * `true`: IS NOT NULL is allowed only on the view's primary key columns,
#           trying to use it on other columns will cause an error, as it should.
# * `false`: Scylla accepts IS NOT NULL restrictions on regular columns, but they're silently ignored.
#            It's useful for backwards compatibility.
# * `warn`: The same as false, but there's a warning about invalid view restrictions.
#
# To preserve backwards compatibility on old clusters, Scylla's default setting is `warn`.
# New clusters have this option set to `true` by scylla.yaml (which overrides the default `warn`)
# to make sure that trying to create an invalid view causes an error.
strict_is_not_null_in_views: true

# The Unix Domain Socket the node uses for maintenance socket.
# The possible options are:
# * ignore: the node will not open the maintenance socket,
# * workdir: the node will open the maintenance socket on the path <scylla's workdir>/cql.m,
#            where <scylla's workdir> is a path defined by the workdir configuration option,
# * <socket path>: the node will open the maintenance socket on the path <socket path>.
maintenance_socket: workdir

# If set to true, configuration parameters defined with LiveUpdate option can be updated in runtime with CQL
# by updating system.config virtual table. If we don't want any configuration parameter to be changed in runtime
# via CQL, this option should be set to false. This parameter doesn't impose any limits on other mechanisms updating
# configuration parameters in runtime, e.g. sending SIGHUP or using API. This option should be set to false
# e.g. for cloud users, for whom scylla's configuration should be changed only by support engineers.
# live_updatable_config_params_changeable_via_cql: true

#
# Guardrails options
#
# Guardrails to warn or fail when Replication Factor is smaller/greater than the threshold.
# Please note that the value of 0 is always allowed,
# which means that having no replication at all, i.e. RF = 0, is always valid.
# A guardrail value smaller than 0, e.g. -1, means that the guardrail is disabled.
# Commenting out a guardrail also means it is disabled.
# minimum_replication_factor_fail_threshold: -1
# minimum_replication_factor_warn_threshold:  3
# maximum_replication_factor_warn_threshold: -1
# maximum_replication_factor_fail_threshold: -1
#
# Guardrails to warn about or disallow creating a keyspace with specific replication strategy.
# Each of these 2 settings is a list storing replication strategies considered harmful.
# The replication strategies to choose from are:
# 1) SimpleStrategy,
# 2) NetworkTopologyStrategy,
# 3) LocalStrategy,
# 4) EverywhereStrategy
#
# replication_strategy_warn_list:
#  - SimpleStrategy
# replication_strategy_fail_list:
#
# Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
# enable_create_table_with_compact_storage: false
#
# Guardrails to limit usage of selected consistency levels for writes.
# Adding a warning to a CQL query response can significantly increase network
# traffic and decrease overall throughput.
# write_consistency_levels_warned: []
# write_consistency_levels_disallowed: []

#
# System information encryption settings
#
# If enabled, system tables that may contain sensitive information (system.batchlog,
# system.paxos), hints files and commit logs are encrypted with the
# encryption settings below.
#
# When enabling system table encryption on a node with existing data, run
# `nodetool upgradesstables -a` on the listed tables to encrypt existing data.
#
# When tracing is enabled, sensitive info will be written into the tables in the
# system_traces keyspace. Those tables should be configured to encrypt their data
# on disk.
#
# It is recommended to use remote encryption keys from a KMIP server/KMS when using
# Transparent Data Encryption (TDE) features.
# Local key support is provided when a KMIP server/KMS is not available.
#
# See the scylla documentation for more info on available key providers and
# their properties.
#
# system_info_encryption:
#   enabled: true
#   cipher_algorithm: AES
#   secret_key_strength: 128
#   key_provider: LocalFileSystemKeyProviderFactory
#   secret_key_file: <key file>
#
# system_info_encryption:
#   enabled: true
#   cipher_algorithm: AES
#   secret_key_strength: 128
#   key_provider: KmipKeyProviderFactory
#   kmip_host: <kmip host group>
#   template_name: <kmip key template name> (optional)
#   key_namespace: <kmip key namespace> (optional)
#

#
# The directory where system keys are kept
# This directory should have 700 permissions and belong to the scylla user
#
# system_key_directory: /etc/scylla/conf/resources/system_keys
#

#
# KMIP host(s).
#
# The unique name of kmip host/cluster that can be referenced in table schema.
#
# host.yourdomain.com={ hosts=[<host1>, <host2>...], keyfile=/path/to/keyfile, truststore=/path/to/truststore.pem, key_cache_millis=<cache ms>, timeout=<timeout ms> }:...
#
# The KMIP connection management only supports failover, so all requests will go through a
# single KMIP server. There is no load balancing, as no KMIP servers (at the time of this writing)
# support read replication, or other strategies for availability.
#
# Hosts are tried in the order they appear here. Add them in the same sequence they'll fail over in.
#
# KMIP requests will fail over/retry 'max_command_retries' times (default 3)
#
# kmip_hosts:
#   <name>:
#       hosts: <address1[:port]> [, <address2[:port]>...]
#       certificate: <identifying certificate> (optional)
#       keyfile: <identifying key> (optional)
#       truststore: <truststore for SSL connection> (optional)
#       priority_string: <kmip tls priority string> (optional)
#       username: <login> (optional>
#       password: <password> (optional)
#       max_command_retries: <int> (optional; default 3)
#       key_cache_expiry: <key cache expiry period>
#       key_cache_refresh: <key cache refresh/prune period>
#   <name>:
#       ...
#

#
# KMS host(s).
#
# The unique name of kms host/account config that can be referenced in table schema.
#
# host.yourdomain.com={ endpoint=<http(s)://host[:port]>, aws_access_key_id=<AWS access id>, aws_secret_access_key=<AWS secret key>, aws_region=<AWS region>, master_key=<alias or id>, keyfile=/path/to/keyfile, truststore=/path/to/truststore.pem, key_cache_millis=<cache ms>, timeout=<timeout ms> }:...
#
# Actual connection can be either an explicit endpoint (<host>:<port>), or selected automatic via aws_region.
#
# Authentication can be explicit with aws_access_key_id and aws_secret_access_key. Either secret or both can be omitted
# in which case the provider will try to read them from AWS credentials in ~/.aws/credentials. If aws_profile is set, the
# credentials in this section is used.
#
# master_key is an AWS KMS key id or alias from which all keys used for actual encryption of scylla data will be derived.
# This key must be pre-created with access policy allowing the above AWS id Encrypt, Decrypt and GenerateDataKey operations.
#
# kms_hosts:
#   <name>:
#       endpoint: http(s)://<host>(:port) (optional)
#       aws_region: <aws region> (optional)
#       aws_access_key_id: <aws access key id> (optional)
#       aws_secret_access_key: <aws secret access key> (optional)
#       aws_profile: <aws credentials profile> (optional)
#       aws_use_ec2_credentials: <bool> (default false) If true, KMS queries will use the credentials provided by ec2 instance role metadata as initial access key.
#       aws_use_ec2_region: <bool> (default false) If true, KMS queries will use the AWS region indicated by ec2 instance metadata
#       aws_assume_role_arn: <aws role arn> (optional) If set, any KMS query will first attempt to assume this role.
#       master_key: <named KMS key for encrypting data keys> (required)
#       certificate: <identifying certificate> (optional)
#       keyfile: <identifying key> (optional)
#       truststore: <truststore for SSL connection> (optional)
#       priority_string: <kmip tls priority string> (optional)
#       key_cache_expiry: <key cache expiry period>
#       key_cache_refresh: <key cache refresh/prune period>
#   <name>:
#       ...
#

#
# Azure Key Vault host(s).
#
# The unique name of azure host/account config that can be referenced in table schema.
#
# host.yourdomain.com={ azure_tenant_id=<the tenant hosting your service principal>, azure_client_id=<ID of your service principal>, azure_client_secret=<secret of the service principal>, azure_client_certificate_path=<path to PEM-encoded certificate and private key of the service principal>, master_key=<vault name>/<keyname>, truststore=/path/to/truststore.pem, priority_string=<tls priority string>, key_cache_expiry=<cache expiry in ms>, key_cache_refersh=<cache refresh in ms>}:...
#
# Authentication can be explicit with Service Principal credentials. Either secret or certificate can be provided.
# If both are provided, the secret will be used. If no credentials are provided, the provider will try to detect them
# from the environment, the Azure CLI, and IMDS, in this specific order.
#
# master_key is a Vault key that will be used to wrap all keys used for actual encryption of scylla data.
# This key must be pre-created and the principal must have permissions for Wrapkey and Unwrapkey operations on this key.
#
# azure_hosts:
#    <name>:
#       azure_tenant_id: <the tenant hosting your service principal> (optional)
#       azure_client_id: <ID of your service principal> (optional)
#       azure_client_secret: <secret of the service principal> (optional)
#       azure_client_certificate_path: <path to PEM-encoded certificate and private key of the service principal> (optional)
#       master_key: <vault name>/<keyname> - named Vault key for key wrapping (optional)
#       truststore: <PEM file with CA certificates for TLS connection> (optional)
#       priority_string: <GnuTLS priority string for TLS handshake> (optional)
#       key_cache_expiry: <key cache expiry period (ms)> (optional)
#       key_cache_refresh: <key cache refresh/prune period (ms)> (optional)
#   <name>:
#       ...
#

#
# Server-global user information encryption settings
#
# If enabled, all user tables are encrypted with the
# encryption settings below, unless the table has local scylla_encryption_options
# specified.
#
# When enabling user table encryption on a node with existing data, run
# `nodetool upgradesstables -a` on all user tables to encrypt existing data.
#
# It is recommended to use remote encryption keys from a KMIP server or KMS when using
# Transparent Data Encryption (TDE) features.
# Local key support is provided when a KMIP server/KMS is not available.
#
# See the scylla documentation for more info on available key providers and
# their properties.
#
# user_info_encryption:
#   enabled: true
#   cipher_algorithm: AES
#   secret_key_strength: 128
#   key_provider: LocalFileSystemKeyProviderFactory
#   secret_key_file: <key file>
#
# user_info_encryption:
#   enabled: true
#   cipher_algorithm: AES
#   secret_key_strength: 128
#   key_provider: KmipKeyProviderFactory
#   kmip_host: <kmip host group>
#   template_name: <kmip key template name> (optional)
#   key_namespace: <kmip key namespace> (optional)
#

# Control tablets for new keyspaces.
# Can be set to: disabled|enabled|enforced
#
# When enabled, newly created keyspaces will have tablets enabled by default.
# That can be explicitly disabled in the CREATE KEYSPACE query
# by using the `tablets = {'enabled': false}` replication option.
#
# Correspondingly, when disabled, newly created keyspaces will use vnodes
# unless tablets are explicitly enabled in the CREATE KEYSPACE query
# by using the `tablets = {'enabled': true}` replication option.
#
# When set to `enforced`, newly created keyspaces will always have tablets enabled by default.
# This prevents explicitly disabling tablets in the CREATE KEYSPACE query
# using the `tablets = {'enabled': false}` replication option.
# It also mandates a replication strategy supporting tablets, like
# NetworkTopologyStrategy
#
# Note that creating keyspaces with tablets enabled or disabled is irreversible.
# The `tablets` option cannot be changed using `ALTER KEYSPACE`.
tablets_mode_for_new_keyspaces: enabled

# Require every tablet-enabled keyspace to be RF-rack-valid.
#
# A tablet-enabled keyspace is RF-rack-valid when, for each data center,
# its replication factor (RF) is 0, 1, or exactly equal to the number of
# racks in that data center. Setting the RF to the number of racks ensures
# that a single rack failure never results in data unavailability.
#
# When set to true, CREATE KEYSPACE and ALTER KEYSPACE statements that
# would produce an RF-rack-invalid keyspace are rejected.
# When set to false, such statements are allowed but emit a warning.
rf_rack_valid_keyspaces: false

#
# Alternator options
#
# Maximum number of items in single BatchWriteItem command. Default is 100.
# Note: DynamoDB has a hard-coded limit of 25.
# alternator_max_items_in_batch_write: 100

#
# Vector Store options
#
# HTTP and HTTPS schemes are supported. Port number is mandatory.
# If both `vector_store_primary_uri` and `vector_store_secondary_uri` are unset or empty, vector search is disabled.
#
# A comma-separated list of primary vector store node URIs. These nodes are preferred for vector search operations.
# vector_store_primary_uri: http://vector-store.dns.name:{port}
#
# A comma-separated list of secondary vector store node URIs. These nodes are used as a fallback when all primary nodes are unavailable, and are typically located in a different availability zone for high availability.
# vector_store_secondary_uri: http://vector-store.dns.name:{port}
#
# Options for encrypted connections to the vector store. These options are used for HTTPS URIs in vector_store_primary_uri and vector_store_secondary_uri.
# vector_store_encryption_options:
#    truststore: <not set, use system trust>

#
# Background IO rate limiting
# When setting this value to be non-zero scylla throttles disk throughput for
# background activities such as backup, repair, tablet migration and more.
# This limit is useful for user queries so the network interface does
# not get saturated by streaming activities.
# The recommended value is 75% of network bandwidth
# E.g for i4i.8xlarge (https://github.com/scylladb/scylla-machine-image/tree/next/common/aws_net_params.json):
# network: 18.75 GiB/s --> 18750 Mib/s --> 1875 MB/s (from network bits to network bytes: divide by 10, not 8)
# Converted to disk bytes: 1875 * 1000 / 1024 = 1831 MB/s (disk wise)
# 75% of disk bytes is: 0.75 * 1831 = 1373 megabytes/s
# maintenance_io_throughput_mb_per_sec: 1373
#