mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-22 01:20:39 +00:00
Alternator supports four ways in which write operations can use quorum writes or LWT or both, which we called "write isolation policies". Until this patch, Alternator defaulted to the most generally safe policy, "always_use_lwt". This default could have been overriden for each table separately, but there was no way to change this default for all tables. This patch adds a "--alternator-write-isolation" configuration option which allows changing the default. Moreover, @dorlaor asked that users must *explicitly* choose this default mode, and not get "always_use_lwt" without noticing. The previous default, "always_use_lwt" supports any workload correctly but because it uses LWT for all writes it may be disappointingly slow for users who run write-only workloads (including most benchmarks) - such users might find the slow writes so disappointing that they will drop Scylla. Conversely, a default of "forbid_rmw" will be faster and still correct, but will fail on workloads which need read-modify-write operations - and suprise users that need these operations. So Dor asked that that *none* of the write modes be made the default, and users must make an informed choice between the different write modes, rather than being disappointed by a default choice they weren't aware of. So after this patch, Scylla refuses to boot if Alternator is enabled but a "--alternator-write-isolation" option is missing. The patch also modifies the relevant documentation, adds the same option to our docker image, and the modifies the test-running script test/alternator/run to run Scylla with the old default mode (always_use_lwt), which we need because we want to test RMW operations as well. Fixes #6452 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Message-Id: <20200524160338.108417-1-nyh@scylladb.com>
176 lines
6.2 KiB
Bash
Executable File
176 lines
6.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Exit if any one of the commands below fails
|
|
set -e
|
|
|
|
script_path=$(dirname $(readlink -e $0))
|
|
source_path=$script_path/../..
|
|
|
|
# By default, we take the latest build/*/scylla as the executable:
|
|
SCYLLA=${SCYLLA-$(ls -t "$source_path/build/"*"/scylla" | head -1)}
|
|
SCYLLA=$(readlink -f "$SCYLLA")
|
|
|
|
# Below, we need to use python3 and the Cassandra drive to set up the
|
|
# authentication credentials expected by some of the tests that check
|
|
# authentication. If they are not installed there isn't much point of
|
|
# even starting Scylla
|
|
if ! python3 -c 'from cassandra.cluster import Cluster' >/dev/null 2>&1
|
|
then
|
|
echo "Error: python3 and python3-cassandra-driver must be installed to configure Alternator authentication." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Pick a loopback IP address for Scylla to run, in an attempt not to collide
|
|
# other concurrent runs of Scylla. CCM uses 127.0.0.<nodenum>, so if we use
|
|
# 127.1.*.* which cannot collide with it. Moreover, we'll take the last two
|
|
# bytes of the address from the current process - so as to allow multiple
|
|
# concurrent runs of this code to use a different address.
|
|
SCYLLA_IP=127.1.$(($$ >> 8 & 255)).$(($$ & 255))
|
|
echo "Running Scylla on $SCYLLA_IP"
|
|
|
|
tmp_dir=/tmp/alternator-test-$$
|
|
mkdir $tmp_dir
|
|
|
|
# We run the cleanup() function on exit for any reason - successful finish
|
|
# of the script, an error (since we have "set -e"), or a signal.
|
|
# It ensures that Scylla is killed and its temporary storage directory is
|
|
# deleted. It also shows Scylla's output log.
|
|
code=17
|
|
summary=
|
|
cleanup() {
|
|
kill -9 $SCYLLA_PROCESS 2>/dev/null || :
|
|
echo
|
|
echo "Scylla log:"
|
|
echo
|
|
# we want to cat "$tmp_dir/log", but this can take a long time,
|
|
# especially if stdout is piped, and be interrupted. We don't want
|
|
# the "rm" below to not happen, so we need to open the file later,
|
|
# and cat it later:
|
|
exec 3<"$tmp_dir/log"
|
|
rm -rf --preserve-root "$tmp_dir"
|
|
cat <&3
|
|
echo $summary
|
|
exit $code
|
|
}
|
|
trap 'cleanup' EXIT
|
|
|
|
# Set up SSL certificates needed for "--alternator-https-port=8043"
|
|
# to work. We only need to do this if the "--https" option was explicitly
|
|
# passed - otherwise the test would not use HTTPS anyway.
|
|
alternator_port_option="--alternator-port=8000"
|
|
alternator_url="http://$SCYLLA_IP:8000"
|
|
for i
|
|
do
|
|
if [ "$i" = --https ]
|
|
then
|
|
openssl genrsa 2048 > "$tmp_dir/scylla.key"
|
|
openssl req -new -x509 -nodes -sha256 -days 365 -subj "/C=IL/ST=None/L=None/O=None/OU=None/CN=example.com" -key "$tmp_dir/scylla.key" -out "$tmp_dir/scylla.crt"
|
|
alternator_port_option="--alternator-https-port=8043"
|
|
alternator_url="https://$SCYLLA_IP:8043"
|
|
fi
|
|
done
|
|
|
|
# To make things easier for users of "killall", "top", and similar, we want
|
|
# the Scylla executable which we run during the test to have a different name
|
|
# from manul runs of Scylla. Unfortunately, using "exec -a" to change just
|
|
# argv[0] isn't good enough - because killall inspects the actual executable
|
|
# filename in /proc/<pid>/stat. So we need to name the executable differently.
|
|
# Luckily, using a symbolic link is good enough.
|
|
SCYLLA_LINK=$tmp_dir/test_scylla
|
|
ln -s "$SCYLLA" "$SCYLLA_LINK"
|
|
|
|
"$SCYLLA_LINK" --options-file "$source_path/conf/scylla.yaml" \
|
|
--alternator-address $SCYLLA_IP \
|
|
$alternator_port_option \
|
|
--alternator-enforce-authorization=1 \
|
|
--alternator-write-isolation=always_use_lwt \
|
|
--developer-mode=1 \
|
|
--ring-delay-ms 0 --collectd 0 \
|
|
--smp 2 -m 1G \
|
|
--overprovisioned --unsafe-bypass-fsync 1 \
|
|
--api-address $SCYLLA_IP \
|
|
--rpc-address $SCYLLA_IP \
|
|
--listen-address $SCYLLA_IP \
|
|
--prometheus-address $SCYLLA_IP \
|
|
--seed-provider-parameters seeds=$SCYLLA_IP \
|
|
--workdir "$tmp_dir" \
|
|
--server-encryption-options keyfile="$tmp_dir/scylla.key" \
|
|
--server-encryption-options certificate="$tmp_dir/scylla.crt" \
|
|
--auto-snapshot 0 \
|
|
--skip-wait-for-gossip-to-settle 0 \
|
|
>"$tmp_dir/log" 2>&1 &
|
|
SCYLLA_PROCESS=$!
|
|
|
|
# Set up the the proper authentication credentials needed by the Alternator
|
|
# test. This requires connecting to Scylla with CQL - we'll wait up for
|
|
# one minute for this to work:
|
|
setup_authentication() {
|
|
python3 -c 'from cassandra.cluster import Cluster; Cluster(["'$SCYLLA_IP'"]).connect().execute("INSERT INTO system_auth.roles (role, salted_hash) VALUES ('\''alternator'\'', '\''secret_pass'\'')")'
|
|
}
|
|
# Test that Alternator is serving. Alternator starts after CQL, so we use
|
|
# check_alternator() to verify that both are up.
|
|
check_alternator() {
|
|
python3 -c 'import requests; requests.get("'$alternator_url'/")'
|
|
}
|
|
echo "Scylla is: $SCYLLA."
|
|
echo -n "Booting Scylla..."
|
|
ok=
|
|
SECONDS=0
|
|
while ((SECONDS < 200))
|
|
do
|
|
sleep 1
|
|
echo -n .
|
|
if ! kill -0 $SCYLLA_PROCESS 2>/dev/null
|
|
then
|
|
summary="Error: Scylla failed to boot after $SECONDS seconds."
|
|
break
|
|
fi
|
|
case "`check_alternator 2>&1`" in
|
|
*"Connection refused"*)
|
|
# This may indicate that Scylla is still booting, or that it failed
|
|
# to boot and exited. Try again (and check again if Scylla exited).
|
|
continue;;
|
|
esac
|
|
err=`setup_authentication 2>&1` && ok=yes && break
|
|
case "$err" in
|
|
*NoHostAvailable:*)
|
|
# This is what we expect while Scylla is still booting.
|
|
;;
|
|
*ImportError:*|*"command not found"*)
|
|
summary="Error: need python3 and python3-cassandra-driver to configure Alternator authentication."
|
|
echo
|
|
echo $summary
|
|
break;;
|
|
*)
|
|
summary="Unknown error trying to set authentication credentials: '$err'"
|
|
echo
|
|
echo $summary
|
|
break;;
|
|
esac
|
|
done
|
|
if test -n "$ok" && kill -0 $SCYLLA_PROCESS 2>/dev/null
|
|
then
|
|
echo "Done ($SECONDS seconds)"
|
|
else
|
|
echo
|
|
if test -z "$summary"
|
|
then
|
|
summary="Error: Scylla failed to boot after $SECONDS seconds."
|
|
echo $summary
|
|
fi
|
|
exit 1
|
|
fi
|
|
|
|
cd "$script_path"
|
|
set +e
|
|
pytest --url $alternator_url "$@"
|
|
code=$?
|
|
case $code in
|
|
0) summary="Alternator tests pass";;
|
|
*) summary="Alternator tests failure";;
|
|
esac
|
|
|
|
# Note that the cleanup() function runs now, just like on any exit from
|
|
# any reason in this script. It will delete the temporary files and
|
|
# announce the failure or success of the test.
|