From 59bd5e485bcb382599e6cfdc60792c8bac058f24 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 3 Aug 2016 10:58:55 +0300 Subject: [PATCH 1/5] dist/docker: Use supervisord to manage multiple processes Switch to supervisord to manage the two processes we have: Scylla server and Scylla JMX proxy. We need this to make the Docker image run under Kubernetes, which now fails as follows as we try to start the systemd init process: Couldn't find an alternative telinit implementation to spawn. I have not seen other people hitting the issue, except for GitLab Docker image: https://gitlab.com/gitlab-org/gitlab-ce/issues/18612 which "solved" the problem by not running init... https://gitlab.com/gitlab-org/omnibus-gitlab/merge_requests/838/diffs Furthermore, the "supervisord" approach seems to be what people actually use in Docker land: http://blog.kunicki.org/blog/2016/02/12/multiple-entrypoints-in-docker/ The only downside is that we now sort of duplicate functionality that's already in the systemd configuration files. However, we should work towards Scylla figuring out its configuration rather than compose a long list of command line arguments. Once we do that, the duplication in Docker supervisord scripts disappears. --- dist/docker/redhat/Dockerfile | 12 +++++++++--- .../{start-scylla => docker-entrypoint.py} | 5 ++--- dist/docker/redhat/etc/supervisord.conf | 16 ++++++++++++++++ dist/docker/redhat/scylla-jmx-service.sh | 5 +++++ dist/docker/redhat/scylla-service.sh | 7 +++++++ dist/docker/redhat/scyllasetup.py | 4 ---- 6 files changed, 39 insertions(+), 10 deletions(-) rename dist/docker/redhat/{start-scylla => docker-entrypoint.py} (63%) create mode 100644 dist/docker/redhat/etc/supervisord.conf create mode 100755 dist/docker/redhat/scylla-jmx-service.sh create mode 100755 dist/docker/redhat/scylla-service.sh diff --git a/dist/docker/redhat/Dockerfile b/dist/docker/redhat/Dockerfile index 0c34804aae..05d43251a0 100644 --- a/dist/docker/redhat/Dockerfile +++ b/dist/docker/redhat/Dockerfile @@ -12,7 +12,7 @@ RUN yum -y install epel-release RUN yum -y clean expire-cache RUN yum -y update RUN yum -y remove boost-thread boost-system -RUN yum -y install scylla hostname +RUN yum -y install scylla hostname supervisor RUN yum clean all #install python3 for our main script @@ -21,10 +21,16 @@ RUN yum -y install python34 python34-PyYAML ADD scylla_bashrc /scylla_bashrc RUN cat /scylla_bashrc >> /etc/bashrc +# Supervisord configuration: +ADD etc/supervisord.conf /etc/supervisord.conf +RUN mkdir -p /var/log/scylla +ADD scylla-service.sh /scylla-service.sh +ADD scylla-jmx-service.sh /scylla-jmx-service.sh + ADD scyllasetup.py /scyllasetup.py ADD commandlineparser.py /commandlineparser.py -ADD start-scylla /start-scylla -ENTRYPOINT ["/start-scylla"] +ADD docker-entrypoint.py /docker-entrypoint.py +ENTRYPOINT ["/docker-entrypoint.py"] EXPOSE 10000 9042 9160 7000 7001 VOLUME [ "/var/lib/scylla" ] diff --git a/dist/docker/redhat/start-scylla b/dist/docker/redhat/docker-entrypoint.py similarity index 63% rename from dist/docker/redhat/start-scylla rename to dist/docker/redhat/docker-entrypoint.py index ec2069ff5f..8de8709019 100755 --- a/dist/docker/redhat/start-scylla +++ b/dist/docker/redhat/docker-entrypoint.py @@ -4,7 +4,7 @@ import scyllasetup import logging import commandlineparser -logging.basicConfig(filename="/start-scylla.log", level=logging.DEBUG, format="%(message)s") +logging.basicConfig(filename="/var/log/scylla/docker-entrypoint.log", level=logging.DEBUG, format="%(message)s") try: arguments = commandlineparser.parse() @@ -13,7 +13,6 @@ try: setup.cpuSet() setup.io() setup.scyllaYAML() - setup.enableServices() - os.execl("/usr/sbin/init", "/usr/sbin/init") + os.system("/usr/bin/supervisord -c /etc/supervisord.conf") except: logging.exception('failed!') diff --git a/dist/docker/redhat/etc/supervisord.conf b/dist/docker/redhat/etc/supervisord.conf new file mode 100644 index 0000000000..eff86ad3f1 --- /dev/null +++ b/dist/docker/redhat/etc/supervisord.conf @@ -0,0 +1,16 @@ +[supervisord] +nodaemon=true + +[program:scylla] +command=/scylla-service.sh +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:scylla-jmx] +command=/scylla-jmx-service.sh +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/dist/docker/redhat/scylla-jmx-service.sh b/dist/docker/redhat/scylla-jmx-service.sh new file mode 100755 index 0000000000..8ea90e447e --- /dev/null +++ b/dist/docker/redhat/scylla-jmx-service.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +source /etc/sysconfig/scylla-jmx + +/usr/lib/scylla/jmx/scylla-jmx -l /usr/lib/scylla/jmx diff --git a/dist/docker/redhat/scylla-service.sh b/dist/docker/redhat/scylla-service.sh new file mode 100755 index 0000000000..c89eca0423 --- /dev/null +++ b/dist/docker/redhat/scylla-service.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +. /usr/lib/scylla/scylla_prepare + +export SCYLLA_HOME SCYLLA_CONF + +/usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET diff --git a/dist/docker/redhat/scyllasetup.py b/dist/docker/redhat/scyllasetup.py index ffe225a4c0..772c9fda47 100644 --- a/dist/docker/redhat/scyllasetup.py +++ b/dist/docker/redhat/scyllasetup.py @@ -40,7 +40,3 @@ class ScyllaSetup: configuration['broadcast_address'] = self._broadcastAddress with open('/etc/scylla/scylla.yaml', 'w') as file: yaml.dump(configuration, file) - - def enableServices(self): - self._run('systemctl enable scylla-server', shell=True) - self._run('systemctl enable scylla-jmx', shell=True) From d0aeb53e7c2ec8081100410ed9a0e9abe5f457e1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 4 Aug 2016 09:46:26 +0300 Subject: [PATCH 2/5] dist/docker: Log to stdout instead of syslog We don't have systemd running on the image so "journalctl" is useless. Log to stdout instead which has the nice benefit of making "docker logs" produce meaningful output on the host. --- dist/docker/redhat/Dockerfile | 3 ++ .../docker/redhat/etc/sysconfig/scylla-server | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 dist/docker/redhat/etc/sysconfig/scylla-server diff --git a/dist/docker/redhat/Dockerfile b/dist/docker/redhat/Dockerfile index 05d43251a0..0660d02ac8 100644 --- a/dist/docker/redhat/Dockerfile +++ b/dist/docker/redhat/Dockerfile @@ -21,6 +21,9 @@ RUN yum -y install python34 python34-PyYAML ADD scylla_bashrc /scylla_bashrc RUN cat /scylla_bashrc >> /etc/bashrc +# Scylla configuration: +ADD etc/sysconfig/scylla-server /etc/sysconfig/scylla-server + # Supervisord configuration: ADD etc/supervisord.conf /etc/supervisord.conf RUN mkdir -p /var/log/scylla diff --git a/dist/docker/redhat/etc/sysconfig/scylla-server b/dist/docker/redhat/etc/sysconfig/scylla-server new file mode 100644 index 0000000000..bda23df9a3 --- /dev/null +++ b/dist/docker/redhat/etc/sysconfig/scylla-server @@ -0,0 +1,41 @@ +# choose following mode: virtio, dpdk, posix +NETWORK_MODE=posix + +# tap device name(virtio) +TAP=tap0 + +# bridge device name (virtio) +BRIDGE=virbr0 + +# ethernet device name +IFNAME=eth0 + +# setup NIC's interrupts, RPS, XPS (posix) +SET_NIC=no + +# ethernet device driver (dpdk) +ETHDRV= + +# ethernet device PCI ID (dpdk) +ETHPCIID= + +# number of hugepages +NR_HUGEPAGES=64 + +# user for process (must be root for dpdk) +USER=scylla + +# group for process +GROUP=scylla + +# scylla home dir +SCYLLA_HOME=/var/lib/scylla + +# scylla config dir +SCYLLA_CONF=/etc/scylla + +# scylla arguments +SCYLLA_ARGS="--log-to-syslog 0 --log-to-stdout 1 --default-log-level info --collectd-address=127.0.0.1:25826 --collectd=1 --collectd-poll-period 3000 --network-stack posix" + +# setup as AMI instance +AMI=no From 6c8c60a5fc4b17118c7194d62b8e0e802f374606 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 4 Aug 2016 09:55:32 +0300 Subject: [PATCH 3/5] dist/docker: Setup hostname in cqlshrc We configure the hostname in the "CQLSH_HOST" environment variable but that is only picked up if we first start the shell. Setup the hostname in $HOME/.cqlshrc file instead so that we can start "cqlsh" directly: docker exec -it scylla cqlsh --- dist/docker/redhat/docker-entrypoint.py | 1 + dist/docker/redhat/scylla_bashrc | 2 -- dist/docker/redhat/scyllasetup.py | 8 +++++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dist/docker/redhat/docker-entrypoint.py b/dist/docker/redhat/docker-entrypoint.py index 8de8709019..0f1d41d4b1 100755 --- a/dist/docker/redhat/docker-entrypoint.py +++ b/dist/docker/redhat/docker-entrypoint.py @@ -13,6 +13,7 @@ try: setup.cpuSet() setup.io() setup.scyllaYAML() + setup.cqlshrc() os.system("/usr/bin/supervisord -c /etc/supervisord.conf") except: logging.exception('failed!') diff --git a/dist/docker/redhat/scylla_bashrc b/dist/docker/redhat/scylla_bashrc index 61ad785701..204178e663 100644 --- a/dist/docker/redhat/scylla_bashrc +++ b/dist/docker/redhat/scylla_bashrc @@ -16,5 +16,3 @@ echo ' cqlsh' > /dev/stderr echo 'More documentation available at: ' > /dev/stderr echo ' http://www.scylladb.com/doc/' > /dev/stderr echo > /dev/stderr - -export CQLSH_HOST=$(hostname -i) diff --git a/dist/docker/redhat/scyllasetup.py b/dist/docker/redhat/scyllasetup.py index 772c9fda47..3c03bcaf91 100644 --- a/dist/docker/redhat/scyllasetup.py +++ b/dist/docker/redhat/scyllasetup.py @@ -1,7 +1,7 @@ import subprocess import logging import yaml - +import os class ScyllaSetup: def __init__(self, arguments): @@ -40,3 +40,9 @@ class ScyllaSetup: configuration['broadcast_address'] = self._broadcastAddress with open('/etc/scylla/scylla.yaml', 'w') as file: yaml.dump(configuration, file) + + def cqlshrc(self): + home = os.environ['HOME'] + hostname = subprocess.check_output(['hostname', '-i']).decode('ascii').strip() + with open("%s/.cqlshrc" % home, "w") as cqlshrc: + cqlshrc.write("[connection]\nhostname = %s\n" % hostname) From 7deddbe17a9b874283803ed352d59f5ea200a7a5 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 4 Aug 2016 10:05:08 +0300 Subject: [PATCH 4/5] dist/docker: Fix Docker Hub documentation Fix Docker Hub documentation to match what we have right now. More work is needed in the following areas: * How to make a cluster * How to configure Docker image for production use --- docs/docker-hub.md | 151 ++++++++++----------------------------------- 1 file changed, 34 insertions(+), 117 deletions(-) diff --git a/docs/docker-hub.md b/docs/docker-hub.md index 7fd5085c86..8b7eb185b1 100644 --- a/docs/docker-hub.md +++ b/docs/docker-hub.md @@ -1,11 +1,3 @@ -# Supported tags and respective `Dockerfile` links - -- [`1.3.0`, `1.3`, `1` (*1.3/Dockerfile*)](https://github.com/docker-library/scylladb/blob/?????/1.3/Dockerfile) - -For more information about this image and its history, please see [the relevant manifest file (`library/scylladb`)](https://github.com/docker-library/official-images/blob/master/library/scylladb). This image is updated via [pull requests to the `docker-library/official-images` GitHub repo](https://github.com/docker-library/official-images/pulls?q=label%3Alibrary%2Fscylladb). - -For detailed information about the virtual/transfer sizes and individual layers of each of the above supported tags, please see [the `scylladb/tag-details.md` file](https://github.com/docker-library/docs/blob/master/scylladb/tag-details.md) in [the `docker-library/docs` GitHub repo](https://github.com/docker-library/docs). - # What is ScyllaDB ? ScyllaDB is a high-performance Cassandra implementation written in C++14. Classified as a NoSQL database, ScyllaDB deliver a high number of transactions per seconds making it one of the fastest database on the planet. ScyllaDB is released under the GNU Affero General Public License version 3 and the Apache License, ScyllaDB is free and open-source software. @@ -14,128 +6,53 @@ ScyllaDB is a high-performance Cassandra implementation written in C++14. Classi ![logo](http://www.scylladb.com/img/logo.svg) -## Testing with docker +# How to use this image -To launch a Scylla instance, run: - -``` -docker pull scylladb/scylla - -docker run -p 127.0.0.1:9042:9042 -i -t scylladb/scylla +## Start a `scylla` server instance +```console +$ docker run --name scylla -d scylladb/scylla ``` -## Docker for production usage +## Run `nodetool` utility -First disable SELinux if present. - -ScyllaDB needs XFS to perform well. Get a kernel with XFS patches. On Ubuntu see the kernel section of [Getting Started with Scylla on Red Hat Enterprise, CentOS, and Fedora](/doc/getting-started-rpm/). - -### Install xfsprogs on Ubuntu - -```sh -apt-get install xfsprogs +``` +$ docker exec -it scylla nodetool status +Datacenter: datacenter1 +======================= +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns (effective) Host ID Rack +UN 172.17.0.2 125.51 KB 256 100.0% c9155121-786d-44f8-8667-a8b915b95665 rack1 ``` -### Install xfsprogs on Centos +## Run `cqlsh` utility -```sh -yum install xfsprogs +``` +$ docker exec -it scylla cqlsh +Connected to Test Cluster at 172.17.0.2:9042. +[cqlsh 5.0.1 | Cassandra 2.1.8 | CQL spec 3.2.1 | Native protocol v3] +Use HELP for help. +cqlsh> ``` -### Format and prepare the XFS volume +## Check `scylla` logs -Remember the volume device file you want to use as `$VOLUME`. - -As root on the host (`sudo su -`) do: - -```sh -mkfs.xfs /dev/$VOLUME -echo "/dev/$VOLUME /var/lib/scylla xfs defaults 0 2" >> /etc/fstab -mkdir /var/lib/scylla -mount /var/lib/scylla -ln -s /etc/scylla /var/lib/scylla/conf -mkdir /var/lib/scylla/data/ -mkdir /var/lib/scylla/commitlog -chown -R 997.1000 /var/lib/scylla/ +``` +$ docker logs scylla | tail +INFO 2016-08-04 06:57:40,836 [shard 5] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,836 [shard 3] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,836 [shard 1] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,836 [shard 2] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,836 [shard 4] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,836 [shard 7] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,837 [shard 6] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy +INFO 2016-08-04 06:57:40,839 [shard 0] database - Schema version changed to fea14d93-9c5a-34f5-9d0e-2e49dcfa747e +INFO 2016-08-04 06:57:40,839 [shard 0] storage_service - Starting listening for CQL clients on 172.17.0.2:9042... +INFO 2016-08-04 06:57:40,840 [shard 0] storage_service - Thrift server listening on 172.17.0.2:9160 ... ``` -Prepare the `rc.local` script - -```sh -echo '#!/bin/sh -e' > /etc/rc.local -echo "chown -R 997.1000 /var/lib/scylla/" >> /etc/rc.local -echo "exit 0" >> /etc/rc.local -chmod +x /etc/rc.local -``` - -Read the `--cpuset` reference in [Docker run reference](https://docs.docker.com/engine/reference/run/) and determine and set the `CPUSET` variable corresponding to your needs. - -Then to launch a Scylla instance, run: - -```sh -docker pull scylladb/scylla - -docker run -e "SCYLLA_PRODUCTION=true" -e "SCYLLA_CPU_SET=$CPUSET" -v /var/lib/scylla:/var/lib/scylla --cpuset-cpus="$CPUSET" -p 127.0.0.1:9042:9042 -i -t scylladb/scylla - -./tools/bin/cassandra-stress write -mode cql3 native -``` - -## Docker clustering on the same physical machine - -```sh -docker pull scylladb/scylla - -docker run -p 127.0.0.1:9042:9042 -d --name scylla_seed_node -t scylladb/scylla -docker run -p 127.0.0.1:9043:9042 -e SCYLLA_SEEDS="$(docker inspect --format='{{ .NetworkSettings.IPAddress }}' scylla_seed_node)" -d --name scylla_node -t scylladb/scylla -``` - -## Docker clustering with multiple physical machines - -Given two machine with first machine having ip IP1 and -second having ip IP2 you can do. - - -On the first machine - -```sh -docker pull scylladb/scylla - -docker run -p $IP1:9042:9042 -e SCYLLA_SEEDS=$IP1 -e SCYLLA_BROADCAST_ADDRESS=$IP1 -p 7000:7000 -d --name scylla_seed_node -t scylladb/scylla -``` - -And on the second -```sh -docker pull scylladb/scylla - -docker run -p $IP2:9042:9042 -e SCYLLA_SEEDS=$IP1 -e SCYLLA_BROADCAST_ADDRESS=$IP2 -p 7000:7000 -d --name scylla_node -t scylladb/scylla -``` - -## List of ports that are nice to forward from the host to inside the container - -7199: JMX (for using nodetool) -7000: Internode communication (The cluster communicate with it) -9042: CQL native transport port (How the client application access the database) - -## ScyllaDB Docker special variables - -### SCYLLA_BROADCAST_ADDRESS - -Ip address to communicate to other node of the cluster. - - -### SCYLLA_CPU_SET - -Cpu set to pass to Scylla as exposed in the [Docker run reference](https://docs.docker.com/engine/reference/run/). - -### SCYLLA_PRODUCTION - -Can be set to true in order to desactivate developer mode. This must be combined with a bind mount of an XFS volume -in the /var/lib/scylla destination. - -### SCYLLA_SEEDS - -List of seed nodes IPs. Seed nodes are used to discover the Scylla cluster topology at startup. +# User Feedback ## Issues From 394c8f8c4f3aafbe45708cc80f37321e58f4860e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 4 Aug 2016 12:17:02 +0300 Subject: [PATCH 5/5] dist/docker: Document Scylla cluster setup Add instructions on how to make a cluster of two Scylla nodes. --- docs/docker-hub.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/docker-hub.md b/docs/docker-hub.md index 8b7eb185b1..2670134eab 100644 --- a/docs/docker-hub.md +++ b/docs/docker-hub.md @@ -11,13 +11,13 @@ ScyllaDB is a high-performance Cassandra implementation written in C++14. Classi ## Start a `scylla` server instance ```console -$ docker run --name scylla -d scylladb/scylla +$ docker run --name some-scylla -d scylladb/scylla ``` ## Run `nodetool` utility ``` -$ docker exec -it scylla nodetool status +$ docker exec -it some-scylla nodetool status Datacenter: datacenter1 ======================= Status=Up/Down @@ -29,17 +29,23 @@ UN 172.17.0.2 125.51 KB 256 100.0% c9155121-786d-44f8-8667-a8b ## Run `cqlsh` utility ``` -$ docker exec -it scylla cqlsh +$ docker exec -it some-scylla cqlsh Connected to Test Cluster at 172.17.0.2:9042. [cqlsh 5.0.1 | Cassandra 2.1.8 | CQL spec 3.2.1 | Native protocol v3] Use HELP for help. cqlsh> ``` +## Make a cluster + +``` +$ docker run --name some-scylla2 -d scylladb/scylla --seeds="$(docker inspect --format='{{ .NetworkSettings.IPAddress }}' some-scylla)" +``` + ## Check `scylla` logs ``` -$ docker logs scylla | tail +$ docker logs some-scylla | tail INFO 2016-08-04 06:57:40,836 [shard 5] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy INFO 2016-08-04 06:57:40,836 [shard 3] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy INFO 2016-08-04 06:57:40,836 [shard 1] database - Setting compaction strategy of system_traces.events to SizeTieredCompactionStrategy