diff --git a/dist/redhat/build_relocatable_python3_rpm.sh b/dist/redhat/build_relocatable_python3_rpm.sh new file mode 100755 index 0000000000..4389345f95 --- /dev/null +++ b/dist/redhat/build_relocatable_python3_rpm.sh @@ -0,0 +1,71 @@ +#!/usr/bin/bash +# +# Copyright (C) 2019 ScyllaDB +# + +# +# This file is part of Scylla. +# +# Scylla is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Scylla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Scylla. If not, see . +# +print_usage() { + echo "${0} --reloc-pkg build/release/scylla-python3.tar.gz" + echo " --reloc-pkg specify relocatable package path" + echo " --rpmbuild specify directory to use for building rpms" + exit 1 +} + +RELOC_PKG= +RPMBUILD= +while [ $# -gt 0 ]; do + case "$1" in + "--reloc-pkg") + RELOC_PKG="$2" + shift 2 + ;; + "--rpmbuild") + RPMBUILD="$2" + shift 2 + ;; + *) + print_usage + ;; + esac +done + +if [ -z "$RELOC_PKG" ]; then + print_usage + exit 1 +fi + +if [ -z "$RPMBUILD" ]; then + print_usage + exit 1 +fi + +if [ ! -f "$RELOC_PKG" ]; then + echo "${RELOC_PKG} not found." + exit 1 +fi +RELOC_PKG_BASENAME=$(basename "$RELOC_PKG") + +RPMBUILD=$(readlink -f "$RPMBUILD") +SPEC=$(dirname $(readlink -f "$0")) + +mkdir -p "$RPMBUILD"/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} +PYVER=$(python3 -V | cut -d' ' -f2) + +ln -fv "$RELOC_PKG" "$RPMBUILD"/SOURCES/ +pystache "$SPEC"/relocatable_python.spec.mustache "{ \"version\": \"${PYVER}\", \"reloc_pkg\": \"${RELOC_PKG_BASENAME}\", \"name\": \"scylla-python3\", \"target\": \"/opt/scylladb/python3\" }" > "$RPMBUILD"/SPECS/relocatable_python.spec +rpmbuild --nodebuginfo -ba --define "_build_id_links none" --define "_topdir ${RPMBUILD}" --define "dist .el7" "$RPMBUILD"/SPECS/relocatable_python.spec diff --git a/dist/redhat/relocatable_python.spec.mustache b/dist/redhat/relocatable_python.spec.mustache new file mode 100644 index 0000000000..600c6d313c --- /dev/null +++ b/dist/redhat/relocatable_python.spec.mustache @@ -0,0 +1,37 @@ +Name: {{name}} +Version: {{version}} +Release: 1%{?dist} +Summary: A standalone python3 interpreter that can be moved around different Linux machines +AutoReqProv: no +Provides: {{name}} + +License: Python +Source0: {{reloc_pkg}} + +%global __brp_python_bytecompile %{nil} +%global __brp_mangle_shebangs %{nil} +%global __brp_ldconfig %{nil} +%global __brp_strip %{nil} +%global __brp_strip_comment_note %{nil} +%global __brp_strip_static_archive %{nil} + +%description +This is a self-contained python interpreter that can be moved around +different Linux machines as long as they run a new enough kernel (where +new enough is defined by whichever Python module uses any kernel +functionality). All shared libraries needed for the interpreter to +operate are shipped with it. + +%prep +%setup -q -c + +%install +mkdir -p %{buildroot}/{{target}} +cp -a ./ %{buildroot}/{{target}} + +%files +%dir {{target}} +{{target}}/* + +%changelog + diff --git a/dist/redhat/scylla.spec.mustache b/dist/redhat/scylla.spec.mustache index 6124f5d068..0965e6bc53 100644 --- a/dist/redhat/scylla.spec.mustache +++ b/dist/redhat/scylla.spec.mustache @@ -49,7 +49,7 @@ BuildRequires: pystache {{#centos7}} Requires: kernel >= 3.10.0-514 {{/centos7}} -Requires: {{product}}-conf systemd-libs hwloc curl util-linux pciutils mdadm xfsprogs file python3 python3-PyYAML python3-urwid python3-pyparsing python3-requests python3-pyudev python3-setuptools +Requires: {{product}}-conf systemd-libs hwloc curl util-linux pciutils mdadm xfsprogs file scylla-python3 Conflicts: abrt AutoReqProv: no diff --git a/reloc/build_python3.sh b/reloc/build_python3.sh new file mode 100755 index 0000000000..21ca16272b --- /dev/null +++ b/reloc/build_python3.sh @@ -0,0 +1,30 @@ +#!/bin/bash -e +# +# Copyright (C) 2019 ScyllaDB +# + +# +# This file is part of Scylla. +# +# Scylla is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Scylla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Scylla. If not, see . +# + +TARGET=build/release/scylla-python3.tar.gz + +if [ -f "$TARGET" ]; then + rm "$TARGET" +fi + +PACKAGES="python3-PyYAML python3-urwid python3-pyparsing python3-requests python3-pyudev python3-setuptools" +./scripts/create-relocatable-python.py --output "$TARGET" $PACKAGES diff --git a/scripts/create-relocatable-python.py b/scripts/create-relocatable-python.py new file mode 100755 index 0000000000..d95d35d479 --- /dev/null +++ b/scripts/create-relocatable-python.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 ScyllaDB +# + +# +# This file is part of Scylla. +# +# Scylla is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Scylla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Scylla. If not, see . +# + +import argparse +import io +import os +import pathlib +import subprocess +import tarfile +import pathlib +import shutil +import sys +import tarfile +from tempfile import mkstemp + +def should_copy(f): + '''Given a file, returns whether or not we are interested in copying this file. + We want the actual python interepreter, and the files in /lib(64) and /usr/lib(64) + All the stuff in /var and other paths is not useful for the relocatable package. + The locale files take a lot of space and we won't use them, so we get rid of them as well. + The build_id files will be symlinks to binaries and shared libraries, that we don't want to keep. + ''' + + if f == "": # package with no files + return False + + if f.startswith("/usr/bin/python3."): + return f[-1] != "m" # python ships with two binaries, one of them with a specialized malloc (python 3.xm). No need. + + if f.startswith("/lib64/ld-linux"): # the interpreter is copied by the binary fixup process + return False + + parts = list(pathlib.PurePath(f).parts) + el = parts.pop(0) + if el != "/": + raise RuntimeError("unexpected path: not absolute! {}".format(f)) + + if len(parts) > 0 and parts[0] == "usr": + parts.pop(0) + + if not parts: + return False + + if parts[0] != "lib" and parts[0] != "lib64": + return False + parts.pop(0) + + if len(parts) > 0 and (parts[0] == "locale" or parts[0] == ".build-id"): + return False + return True + +def fix_binary(ar, path, libpath): + '''Makes one binary or shared library relocatable. To do that, we need to set RUNPATH to $ORIGIN/../lib64 so we get libraries + from the relocatable directory and not from the system during runtime. We also want to copy the interpreter used so + we can launch with it later. + ''' + # it's a pity patchelf have to patch an actual binary. + patched_elf = mkstemp()[1] + shutil.copy2(path, patched_elf) + + subprocess.check_call(['patchelf', + '--set-rpath', + libpath, + patched_elf]) + return patched_elf + +def fix_python_binary(ar, binpath): + '''Makes the python binary relocatable. To do that, we need to set RUNPATH to $ORIGIN/../lib64 so we get libraries + from the relocatable directory and not from the system during runtime. We also want to copy the interpreter used so + we can launch with it later. + ''' + pyname = os.path.basename(binpath) + patched_binary = fix_binary(ar, binpath, '$ORIGIN/../lib64/') + interpreter = subprocess.check_output(['patchelf', + '--print-interpreter', + patched_binary], universal_newlines=True).splitlines()[0] + ar.add(os.path.realpath(interpreter), arcname=os.path.join("libexec", "ld.so")) + ar.add(patched_binary, arcname=os.path.join("libexec", pyname + ".bin")) + +def fix_dynload(ar, binpath, targetpath): + patched_binary = fix_binary(ar, binpath, '$ORIGIN/../../') + ar.add(patched_binary, arcname=targetpath, recursive=False) + +def gen_python_thunk(ar, pybin): + thunk=b'''\ +#!/bin/bash +x="$(readlink -f "$0")" +b="$(basename "$x")" +d="$(dirname "$x")/.." +ldso="$d/libexec/ld.so" +realexe="$d/libexec/$b.bin" +exec -a "$0" "$ldso" "$realexe" "$@" +''' + + ti = tarfile.TarInfo(name=os.path.join("bin", pybin)) + ti.size = len(thunk) + ti.mode = 0o755 + ar.addfile(ti, fileobj=io.BytesIO(thunk)) + + ti = tarfile.TarInfo(name=os.path.join("bin", "python3")) + ti.type = tarfile.SYMTYPE + ti.linkname = pybin + ar.addfile(ti) + +def copy_file_to_python_env(ar, f): + if f.startswith("/usr/bin/python"): + gen_python_thunk(ar, os.path.basename(f)) + fix_python_binary(ar, f) + else: + libfile = f + # python tends to install in both /usr/lib and /usr/lib64, which doesn't mean it is + # a package for the wrong arch. So we need to handle both /lib and /lib64. Copying files + # blindly from /lib could be a problem, but we filtered out all the i686 packages during + # the dependency generation. + if libfile.startswith("/usr/"): + libfile = libfile.replace("/usr/", "/", 1) + if libfile.startswith("/lib/"): + libfile = libfile.replace("/lib/", "lib64/", 1) + elif libfile.startswith("/lib64/"): + libfile = libfile.replace("/lib64/", "lib64/", 1) + else: + raise RuntimeError("unexpected path: don't know what to do with {}".format(f)) + + # copy file instead of link unless we link to the current directory. + # links to the current directory are usually safe, but because we are manipulating + # the directory structure, very likely links that transverse paths will break. + if os.path.islink(f) and os.readlink(f) != os.path.basename(os.readlink(f)): + ar.add(os.path.realpath(f), arcname=libfile) + elif os.path.dirname(f).endswith("lib-dynload"): + fix_dynload(ar, f, libfile) + else: + # in case this is a directory that is listed, we don't want to include everything that is in that directory + # for instance, the python3 package will own site-packages, but other packages that we are not packaging could have + # filled it with stuff. + ar.add(f, arcname=libfile, recursive=False) + +def filter_basic_packages(package): + '''Returns true if this package should be filtered out. We filter out packages that are too basic like the Fedora repos, + or contains no files''' + # The packages below are way too basic and are listed just because repoquery will, correctly, list + # everything. We make our lives easier by filtering them out. + too_basic_packages = ["filesystem", + "tzdata", + "chkconfig", + "basesystem", + "coreutils", + "fedora-release", + "fedora-repos", + "fedora-gpg-keys", + "glibc-minimal-langpack", + "glibc-all-langpacks"] + return True in [package.startswith(x) for x in too_basic_packages] + + +def dependencies(package_list): + '''Generates a list of RPM dependencies for the python interpreter and its modules''' + output = subprocess.check_output(['repoquery', + # Some architectures like x86_64 also carry packages for + # their 32-bit versions. In thise cases, we won't want + # to mix them since we will only install lib64/ + '--archlist=noarch,{machine}'.format(machine=os.uname().machine), + # Don't look into the yum cache. Guarantees consistent builds + '--cacheonly', + '--installed', + '--resolve', + '--requires', + '--recursive'] + package_list, + universal_newlines=True).splitlines() + + output = [x for x in output if not filter_basic_packages(x)] + return output + package_list + +def generate_file_list(executables): + '''Given the RPM files that we want to scan in this run, returns a list of all files in those packages that are of interest to us''' + + exclusions = [] + for exe in executables: + exclusions += subprocess.check_output(['rpm', '-qd', exe], universal_newlines=True).splitlines() + + # we don't want to use --list the first time: For one, we want to be able to filter out some packages with files we don't want to copy + # Second, repoquery --list do not include the actual package files when used with --resolve and --recursive (only its dependencies'). + # So we need a separate step in which all packages are added together. + candidates = subprocess.check_output(['repoquery', + '--installed', + '--cacheonly', + '--list' ] + executables, universal_newlines=True).splitlines() + + return [x for x in set(candidates) - set(exclusions) if should_copy(x)] + +ap = argparse.ArgumentParser(description='Create a relocatable python3 interpreter.') +ap.add_argument('--output', required=True, + help='Destination file (tar format)') +ap.add_argument('modules', nargs='*', help='list of python modules to add, separated by spaces') + +args = ap.parse_args() +packages= ["python3"] + args.modules + +file_list = generate_file_list(dependencies(packages)) +ar = tarfile.open(args.output, mode='w|gz') + +for f in file_list: + copy_file_to_python_env(ar, f) + +ar.close()