Currently, we use --sig-proxy to forward signals to the container. However, this
requires the container's co-operation, which usually doesn't exist. For example,
docker run --sig-proxy fedora:29 bash -c "sleep 5"
Does not respond to ctrl-C.
This is a problem for continuous integration. If a build is aborted, Jenkins will
first attempt to gracefully terminate the processes (SIGINT/SIGTERM) and then give
up and use SIGKILL. If the graceful termination doesn't work, we end up with an
orphan container running on the node, which can then consume enough memory and CPU
to harm the following jobs.
To fix this, trap signals and handle them by killing the container. Also trap
shell exit, and even kill the container unconditionally, since if Jenkins happens
to kill the "docker wait" process the regular paths will not be taken.
Message-Id: <20190415084040.12352-1-avi@scylladb.com>
62 lines
1.3 KiB
Bash
Executable File
62 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
here="$(realpath $(dirname "$0"))"
|
|
toplevel="$(realpath "$here/../..")"
|
|
group_args=()
|
|
docker_args=()
|
|
|
|
for gid in $(id -G); do
|
|
group_args+=(--group-add "$gid")
|
|
done
|
|
|
|
if [[ "$1" = -* ]]; then
|
|
while [[ "$1" != "--" && $# != 0 ]]; do
|
|
docker_args+=("$1")
|
|
shift
|
|
done
|
|
if [[ "$1" != "--" ]]; then
|
|
echo "Expected '--' to terminate docker flag list"
|
|
exit 1
|
|
fi
|
|
shift
|
|
fi
|
|
|
|
container=$(
|
|
docker run \
|
|
--detach=true \
|
|
--rm \
|
|
--network host \
|
|
-u "$(id -u):$(id -g)" \
|
|
"${group_args[@]}" \
|
|
--cap-add SYS_PTRACE \
|
|
-v "$PWD:$PWD:z" \
|
|
-v "$toplevel:$toplevel:z" \
|
|
-v /tmp:/tmp:z \
|
|
-v /etc/passwd:/etc/passwd:ro \
|
|
-v /etc/group:/etc/group:ro \
|
|
-v /etc/localtime:/etc/localtime:ro \
|
|
-w "$PWD" \
|
|
"${docker_args[@]}" \
|
|
"$(<"$here/image")" \
|
|
"$@"
|
|
)
|
|
|
|
kill_it() {
|
|
docker kill "$container" > /dev/null || :
|
|
}
|
|
|
|
trap kill_it SIGTERM SIGINT SIGHUP EXIT
|
|
|
|
exitcode="$(docker wait "$container")"
|
|
|
|
# Abnormal termination could result if "docker wait" was interrupted. Make extra sure
|
|
# that the container is dead.
|
|
kill_it
|
|
|
|
trap - SIGTERM SIGINT SIGHUP EXIT
|
|
|
|
# after "docker kill", docker wait will not print anything
|
|
[[ -z "$exitcode" ]] && exitcode=1
|
|
|
|
exit "$exitcode"
|