Files
scylladb/test/alternator/run
Nadav Har'El 92e36c5df5 test/alternator: increase timeout on Scylla boot
The Alternator test boots Scylla to test against it. We set an arbitrary
timeout for this boot to succeed: 100 seconds. This 100 seconds is
significantly more than 25 seconds it takes on my laptop, and I though
we'll never reach it. But it turns out that in some setups - running the
very slow debug build on slow and overcommitted nodes - 100 seconds is
not enough.

So this patch doubles the timeout to 200 seconds.

Note that this "200 seconds" is just a timeout, and doesn't affect normal
runs: Both a successful boot and a failed boot are recognized as soon as
they happen, and we never unnecessarily wait the entire 200 seconds.

Fixes #6271.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200422193920.17079-1-nyh@scylladb.com>
2020-04-23 07:47:21 +02:00

153 lines
5.1 KiB
Bash
Executable File

#!/bin/bash
# Exit if any one of the commands below fails
set -e
script_path=$(dirname $(readlink -e $0))
source_path=$script_path/../..
# By default, we take the latest build/*/scylla as the executable:
SCYLLA=${SCYLLA-$(ls -t "$source_path/build/"*"/scylla" | head -1)}
SCYLLA=$(readlink -f "$SCYLLA")
# Below, we need to use python3 and the Cassandra drive to set up the
# authentication credentials expected by some of the tests that check
# authentication. If they are not installed there isn't much point of
# even starting Scylla
if ! python3 -c 'from cassandra.cluster import Cluster' >/dev/null 2>&1
then
echo "Error: python3 and python3-cassandra-driver must be installed to configure Alternator authentication." >&2
exit 1
fi
# Pick a loopback IP address for Scylla to run, in an attempt not to collide
# other concurrent runs of Scylla. CCM uses 127.0.0.<nodenum>, so if we use
# 127.1.*.* which cannot collide with it. Moreover, we'll take the last two
# bytes of the address from the current process - so as to allow multiple
# concurrent runs of this code to use a different address.
SCYLLA_IP=127.1.$(($$ >> 8 & 255)).$(($$ & 255))
echo "Running Scylla on $SCYLLA_IP"
tmp_dir=/tmp/alternator-test-$$
mkdir $tmp_dir
# We run the cleanup() function on exit for any reason - successful finish
# of the script, an error (since we have "set -e"), or a signal.
# It ensures that Scylla is killed and its temporary storage directory is
# deleted. It also shows Scylla's output log.
code=17
summary=
cleanup() {
kill -9 $SCYLLA_PROCESS 2>/dev/null || :
echo
echo "Scylla log:"
echo
# we want to cat "$tmp_dir/log", but this can take a long time,
# especially if stdout is piped, and be interrupted. We don't want
# the "rm" below to not happen, so we need to open the file later,
# and cat it later:
exec 3<"$tmp_dir/log"
rm -rf --preserve-root "$tmp_dir"
cat <&3
echo $summary
exit $code
}
trap 'cleanup' EXIT
# Set up SSL certificates needed for "--alternator-https-port=8043"
# to work. We only need to do this if the "--https" option was explicitly
# passed - otherwise the test would not use HTTPS anyway.
alternator_port_option="--alternator-port=8000"
alternator_url="http://$SCYLLA_IP:8000"
for i
do
if [ "$i" = --https ]
then
openssl genrsa 2048 > "$tmp_dir/scylla.key"
openssl req -new -x509 -nodes -sha256 -days 365 -subj "/C=IL/ST=None/L=None/O=None/OU=None/CN=example.com" -key "$tmp_dir/scylla.key" -out "$tmp_dir/scylla.crt"
alternator_port_option="--alternator-https-port=8043"
alternator_url="https://$SCYLLA_IP:8043"
fi
done
"$SCYLLA" --options-file "$source_path/conf/scylla.yaml" \
--alternator-address $SCYLLA_IP \
$alternator_port_option \
--alternator-enforce-authorization=1 \
--developer-mode=1 \
--ring-delay-ms 0 --collectd 0 \
--smp 2 -m 1G \
--overprovisioned --unsafe-bypass-fsync 1 \
--api-address $SCYLLA_IP \
--rpc-address $SCYLLA_IP \
--listen-address $SCYLLA_IP \
--prometheus-address $SCYLLA_IP \
--seed-provider-parameters seeds=$SCYLLA_IP \
--workdir "$tmp_dir" \
--server-encryption-options keyfile="$tmp_dir/scylla.key" \
--server-encryption-options certificate="$tmp_dir/scylla.crt" \
--auto-snapshot 0 \
>"$tmp_dir/log" 2>&1 &
SCYLLA_PROCESS=$!
# Set up the the proper authentication credentials needed by the Alternator
# test. This requires connecting to Scylla with CQL - we'll wait up for
# one minute for this to work:
setup_authentication() {
python3 -c 'from cassandra.cluster import Cluster; Cluster(["'$SCYLLA_IP'"]).connect().execute("INSERT INTO system_auth.roles (role, salted_hash) VALUES ('\''alternator'\'', '\''secret_pass'\'')")'
}
echo "Scylla is: $SCYLLA."
echo -n "Booting Scylla..."
ok=
SECONDS=0
while ((SECONDS < 200))
do
sleep 2
echo -n .
if ! kill -0 $SCYLLA_PROCESS 2>/dev/null
then
summary="Error: Scylla failed to boot after $SECONDS seconds."
break
fi
err=`setup_authentication 2>&1` && ok=yes && break
case "$err" in
*NoHostAvailable:*)
# This is what we expect while Scylla is still booting.
;;
*ImportError:*|*"command not found"*)
summary="Error: need python3 and python3-cassandra-driver to configure Alternator authentication."
echo
echo $summary
break;;
*)
summary="Unknown error trying to set authentication credentials: '$err'"
echo
echo $summary
break;;
esac
done
if test -n "$ok" && kill -0 $SCYLLA_PROCESS 2>/dev/null
then
echo "Done ($SECONDS seconds)"
else
echo
if test -z "$summary"
then
summary="Error: Scylla failed to boot after $SECONDS seconds."
echo $summary
fi
exit 1
fi
cd "$script_path"
set +e
pytest --url $alternator_url "$@"
code=$?
case $code in
0) summary="Alternator tests pass";;
*) summary="Alternator tests failure";;
esac
# Note that the cleanup() function runs now, just like on any exit from
# any reason in this script. It will delete the temporary files and
# announce the failure or success of the test.