Discussion:
[gentoo-portage-dev] [PATCH v3 1/3] Add FEATURES=mount-sandbox to take advantage of mount ns
Michał Górny
2018-11-18 11:42:50 UTC
Permalink
Support FEATURES=mount-sandbox that unshares the ebuild processes
into a new mount namespace and makes all the mounts private by default.

Signed-off-by: Michał Górny <***@gentoo.org>
---
lib/portage/const.py | 1 +
lib/portage/package/ebuild/doebuild.py | 7 +++++-
lib/portage/process.py | 34 +++++++++++++++++++++-----
man/make.conf.5 | 5 ++++
4 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/lib/portage/const.py b/lib/portage/const.py
index 602caeb34..e0f93f7cc 100644
--- a/lib/portage/const.py
+++ b/lib/portage/const.py
@@ -160,6 +160,7 @@ SUPPORTED_FEATURES = frozenset([
"merge-sync",
"metadata-transfer",
"mirror",
+ "mount-sandbox",
"multilib-strict",
"network-sandbox",
"network-sandbox-proxy",
diff --git a/lib/portage/package/ebuild/doebuild.py b/lib/portage/package/ebuild/doebuild.py
index d0e96f34c..e84a618d2 100644
--- a/lib/portage/package/ebuild/doebuild.py
+++ b/lib/portage/package/ebuild/doebuild.py
@@ -148,6 +148,7 @@ def _doebuild_spawn(phase, settings, actionmap=None, **kwargs):

kwargs['ipc'] = 'ipc-sandbox' not in settings.features or \
phase in _ipc_phases
+ kwargs['mountns'] = 'mount-sandbox' in settings.features
kwargs['networked'] = 'network-sandbox' not in settings.features or \
phase in _networked_phases or \
'network-sandbox' in settings['PORTAGE_RESTRICT'].split()
@@ -1480,7 +1481,8 @@ def _validate_deps(mysettings, myroot, mydo, mydbapi):
# XXX This would be to replace getstatusoutput completely.
# XXX Issue: cannot block execution. Deadlock condition.
def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
- sesandbox=False, fakeroot=False, networked=True, ipc=True, **keywords):
+ sesandbox=False, fakeroot=False, networked=True, ipc=True,
+ mountns=False, **keywords):
"""
Spawn a subprocess with extra portage-specific options.
Optiosn include:
@@ -1514,6 +1516,8 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
@type networked: Boolean
@param ipc: Run this command with host IPC access enabled
@type ipc: Boolean
+ @param mountns: Run this command inside mount namespace
+ @type mountns: Boolean
@param keywords: Extra options encoded as a dict, to be passed to spawn
@type keywords: Dictionary
@rtype: Integer
@@ -1546,6 +1550,7 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
if uid == 0 and platform.system() == 'Linux':
keywords['unshare_net'] = not networked
keywords['unshare_ipc'] = not ipc
+ keywords['unshare_mount'] = mountns

if not networked and mysettings.get("EBUILD_PHASE") != "nofetch" and \
("network-sandbox-proxy" in features or "distcc" in features):
diff --git a/lib/portage/process.py b/lib/portage/process.py
index fd326731a..46868f442 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -1,5 +1,5 @@
# portage.py -- core Portage functionality
-# Copyright 1998-2014 Gentoo Foundation
+# Copyright 1998-2018 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2


@@ -10,6 +10,7 @@ import platform
import signal
import socket
import struct
+import subprocess
import sys
import traceback
import os as _os
@@ -222,7 +223,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
uid=None, gid=None, groups=None, umask=None, logfile=None,
path_lookup=True, pre_exec=None,
close_fds=(sys.version_info < (3, 4)), unshare_net=False,
- unshare_ipc=False, cgroup=None):
+ unshare_ipc=False, unshare_mount=False, cgroup=None):
"""
Spawns a given command.

@@ -260,6 +261,9 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
@type unshare_net: Boolean
@param unshare_ipc: If True, IPC will be unshared from the spawned process
@type unshare_ipc: Boolean
+ @param unshare_mount: If True, mount namespace will be unshared and mounts will
+ be private to the namespace
+ @type unshare_mount: Boolean
@param cgroup: CGroup path to bind the process to
@type cgroup: String

@@ -328,7 +332,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
# This caches the libc library lookup in the current
# process, so that it's only done once rather than
# for each child process.
- if unshare_net or unshare_ipc:
+ if unshare_net or unshare_ipc or unshare_mount:
find_library("c")

# Force instantiation of portage.data.userpriv_groups before the
@@ -344,7 +348,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
try:
_exec(binary, mycommand, opt_name, fd_pipes,
env, gid, groups, uid, umask, pre_exec, close_fds,
- unshare_net, unshare_ipc, cgroup)
+ unshare_net, unshare_ipc, unshare_mount, cgroup)
except SystemExit:
raise
except Exception as e:
@@ -414,7 +418,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
return 0

def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
- pre_exec, close_fds, unshare_net, unshare_ipc, cgroup):
+ pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, cgroup):

"""
Execute a given binary with options
@@ -443,6 +447,9 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
@type unshare_net: Boolean
@param unshare_ipc: If True, IPC will be unshared from the spawned process
@type unshare_ipc: Boolean
+ @param unshare_mount: If True, mount namespace will be unshared and mounts will
+ be private to the namespace
+ @type unshare_mount: Boolean
@param cgroup: CGroup path to bind the process to
@type cgroup: String
@rtype: None
@@ -499,11 +506,13 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
f.write('%d\n' % os.getpid())

# Unshare (while still uid==0)
- if unshare_net or unshare_ipc:
+ if unshare_net or unshare_ipc or unshare_mount:
filename = find_library("c")
if filename is not None:
libc = LoadLibrary(filename)
if libc is not None:
+ # from /usr/include/bits/sched.h
+ CLONE_NEWNS = 0x00020000
CLONE_NEWIPC = 0x08000000
CLONE_NEWNET = 0x40000000

@@ -512,6 +521,9 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
flags |= CLONE_NEWNET
if unshare_ipc:
flags |= CLONE_NEWIPC
+ if unshare_mount:
+ # NEWNS = mount namespace
+ flags |= CLONE_NEWNS

try:
if libc.unshare(flags) != 0:
@@ -519,6 +531,16 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
errno.errorcode.get(ctypes.get_errno(), '?')),
noiselevel=-1)
else:
+ if unshare_mount:
+ # mark the whole filesystem as slave to avoid
+ # mounts escaping the namespace
+ s = subprocess.Popen(['mount',
+ '--make-rslave', '/'])
+ mount_ret = s.wait()
+ if mount_ret != 0:
+ # TODO: should it be fatal maybe?
+ writemsg("Unable to mark mounts slave: %d\n" % (mount_ret,),
+ noiselevel=-1)
if unshare_net:
# 'up' the loopback
IFF_UP = 0x1
diff --git a/man/make.conf.5 b/man/make.conf.5
index f69afd015..7cb5741ad 100644
--- a/man/make.conf.5
+++ b/man/make.conf.5
@@ -494,6 +494,11 @@ ${repository_location}/metadata/md5\-cache/ directory will be used directly
Fetch everything in \fBSRC_URI\fR regardless of \fBUSE\fR settings,
except do not fetch anything when \fImirror\fR is in \fBRESTRICT\fR.
.TP
+.B mount\-sandbox
+Isolate the ebuild phase functions from host mount namespace. This makes
+it possible for ebuild to alter mountpoints without affecting the host
+system. Supported only on Linux. Requires mount namespace support in kernel.
+.TP
.B multilib\-strict
Many Makefiles assume that their libraries should go to /usr/lib, or
$(prefix)/lib. This assumption can cause a serious mess if /usr/lib
--
2.19.1
Michał Górny
2018-11-18 11:42:52 UTC
Permalink
Signed-off-by: Michał Górny <***@gentoo.org>
---
bin/pid-ns-init | 30 ++++++++++++++++++++++++++++++
lib/portage/process.py | 11 ++++++-----
2 files changed, 36 insertions(+), 5 deletions(-)
create mode 100644 bin/pid-ns-init

New in v3: pid-ns-init handles exit-by-signal properly.

diff --git a/bin/pid-ns-init b/bin/pid-ns-init
new file mode 100644
index 000000000..843257b70
--- /dev/null
+++ b/bin/pid-ns-init
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# Copyright 2018 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import os
+import sys
+
+
+def main(argv):
+ if len(argv) < 2:
+ return 'Usage: {} <main-child-pid>'.format(argv[0])
+ main_child_pid = int(argv[1])
+
+ # wait for child processes
+ while True:
+ pid, status = os.wait()
+ if pid == main_child_pid:
+ if os.WIFEXITED(status):
+ return os.WEXITSTATUS(status)
+ elif os.WIFSIGNALED(status):
+ os.kill(os.getpid(), os.WTERMSIG(status))
+ # go to the unreachable place
+ break
+
+ # this should never be reached
+ return 127
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
diff --git a/lib/portage/process.py b/lib/portage/process.py
index dee126c3c..75ec299f0 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -544,13 +544,14 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
else:
if unshare_pid:
# pid namespace requires us to become init
- # TODO: do init-ty stuff
- # therefore, fork() ASAP
fork_ret = os.fork()
if fork_ret != 0:
- pid, status = os.waitpid(fork_ret, 0)
- assert pid == fork_ret
- os._exit(status)
+ os.execv(portage._python_interpreter, [
+ portage._python_interpreter,
+ os.path.join(portage._bin_path,
+ 'pid-ns-init'),
+ '%s' % fork_ret,
+ ])
if unshare_mount:
# mark the whole filesystem as slave to avoid
# mounts escaping the namespace
--
2.19.1
Zac Medico
2018-11-18 12:00:52 UTC
Permalink
Post by Michał Górny
---
bin/pid-ns-init | 30 ++++++++++++++++++++++++++++++
lib/portage/process.py | 11 ++++++-----
2 files changed, 36 insertions(+), 5 deletions(-)
create mode 100644 bin/pid-ns-init
New in v3: pid-ns-init handles exit-by-signal properly.
Then whole series looks good now. Please merge.
--
Thanks,
Zac
Michał Górny
2018-11-18 11:42:51 UTC
Permalink
Supporting using PID namespace in order to isolate the ebuild processes
from host system, and make it possible to kill them all easily
(similarly to cgroups but easier to use).

Bug: https://bugs.gentoo.org/659582
Signed-off-by: Michał Górny <***@gentoo.org>
---
lib/portage/const.py | 1 +
lib/portage/package/ebuild/doebuild.py | 8 +++--
lib/portage/process.py | 48 +++++++++++++++++++++++---
man/make.conf.5 | 7 ++++
4 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/lib/portage/const.py b/lib/portage/const.py
index e0f93f7cc..ca66bc46e 100644
--- a/lib/portage/const.py
+++ b/lib/portage/const.py
@@ -174,6 +174,7 @@ SUPPORTED_FEATURES = frozenset([
"notitles",
"parallel-fetch",
"parallel-install",
+ "pid-sandbox",
"prelink-checksums",
"preserve-libs",
"protect-owned",
diff --git a/lib/portage/package/ebuild/doebuild.py b/lib/portage/package/ebuild/doebuild.py
index e84a618d2..9917ac82c 100644
--- a/lib/portage/package/ebuild/doebuild.py
+++ b/lib/portage/package/ebuild/doebuild.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2018 Gentoo Foundation
+# Copyright 2010-2018 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

from __future__ import unicode_literals
@@ -152,6 +152,7 @@ def _doebuild_spawn(phase, settings, actionmap=None, **kwargs):
kwargs['networked'] = 'network-sandbox' not in settings.features or \
phase in _networked_phases or \
'network-sandbox' in settings['PORTAGE_RESTRICT'].split()
+ kwargs['pidns'] = 'pid-sandbox' in settings.features

if phase == 'depend':
kwargs['droppriv'] = 'userpriv' in settings.features
@@ -1482,7 +1483,7 @@ def _validate_deps(mysettings, myroot, mydo, mydbapi):
# XXX Issue: cannot block execution. Deadlock condition.
def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
sesandbox=False, fakeroot=False, networked=True, ipc=True,
- mountns=False, **keywords):
+ mountns=False, pidns=False, **keywords):
"""
Spawn a subprocess with extra portage-specific options.
Optiosn include:
@@ -1518,6 +1519,8 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
@type ipc: Boolean
@param mountns: Run this command inside mount namespace
@type mountns: Boolean
+ @param pidns: Run this command in isolated PID namespace
+ @type pidns: Boolean
@param keywords: Extra options encoded as a dict, to be passed to spawn
@type keywords: Dictionary
@rtype: Integer
@@ -1551,6 +1554,7 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
keywords['unshare_net'] = not networked
keywords['unshare_ipc'] = not ipc
keywords['unshare_mount'] = mountns
+ keywords['unshare_pid'] = pidns

if not networked and mysettings.get("EBUILD_PHASE") != "nofetch" and \
("network-sandbox-proxy" in features or "distcc" in features):
diff --git a/lib/portage/process.py b/lib/portage/process.py
index 46868f442..dee126c3c 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -223,7 +223,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
uid=None, gid=None, groups=None, umask=None, logfile=None,
path_lookup=True, pre_exec=None,
close_fds=(sys.version_info < (3, 4)), unshare_net=False,
- unshare_ipc=False, unshare_mount=False, cgroup=None):
+ unshare_ipc=False, unshare_mount=False, unshare_pid=False,
+ cgroup=None):
"""
Spawns a given command.

@@ -264,6 +265,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
@param unshare_mount: If True, mount namespace will be unshared and mounts will
be private to the namespace
@type unshare_mount: Boolean
+ @param unshare_pid: If True, PID ns will be unshared from the spawned process
+ @type unshare_pid: Boolean
@param cgroup: CGroup path to bind the process to
@type cgroup: String

@@ -332,7 +335,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
# This caches the libc library lookup in the current
# process, so that it's only done once rather than
# for each child process.
- if unshare_net or unshare_ipc or unshare_mount:
+ if unshare_net or unshare_ipc or unshare_mount or unshare_pid:
find_library("c")

# Force instantiation of portage.data.userpriv_groups before the
@@ -348,7 +351,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
try:
_exec(binary, mycommand, opt_name, fd_pipes,
env, gid, groups, uid, umask, pre_exec, close_fds,
- unshare_net, unshare_ipc, unshare_mount, cgroup)
+ unshare_net, unshare_ipc, unshare_mount, unshare_pid,
+ cgroup)
except SystemExit:
raise
except Exception as e:
@@ -418,7 +422,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False,
return 0

def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
- pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, cgroup):
+ pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, unshare_pid,
+ cgroup):

"""
Execute a given binary with options
@@ -450,6 +455,8 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
@param unshare_mount: If True, mount namespace will be unshared and mounts will
be private to the namespace
@type unshare_mount: Boolean
+ @param unshare_pid: If True, PID ns will be unshared from the spawned process
+ @type unshare_pid: Boolean
@param cgroup: CGroup path to bind the process to
@type cgroup: String
@rtype: None
@@ -506,7 +513,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
f.write('%d\n' % os.getpid())

# Unshare (while still uid==0)
- if unshare_net or unshare_ipc or unshare_mount:
+ if unshare_net or unshare_ipc or unshare_mount or unshare_pid:
filename = find_library("c")
if filename is not None:
libc = LoadLibrary(filename)
@@ -514,6 +521,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
# from /usr/include/bits/sched.h
CLONE_NEWNS = 0x00020000
CLONE_NEWIPC = 0x08000000
+ CLONE_NEWPID = 0x20000000
CLONE_NEWNET = 0x40000000

flags = 0
@@ -524,6 +532,9 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
if unshare_mount:
# NEWNS = mount namespace
flags |= CLONE_NEWNS
+ if unshare_pid:
+ # we also need mount namespace for slave /proc
+ flags |= CLONE_NEWPID | CLONE_NEWNS

try:
if libc.unshare(flags) != 0:
@@ -531,6 +542,15 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
errno.errorcode.get(ctypes.get_errno(), '?')),
noiselevel=-1)
else:
+ if unshare_pid:
+ # pid namespace requires us to become init
+ # TODO: do init-ty stuff
+ # therefore, fork() ASAP
+ fork_ret = os.fork()
+ if fork_ret != 0:
+ pid, status = os.waitpid(fork_ret, 0)
+ assert pid == fork_ret
+ os._exit(status)
if unshare_mount:
# mark the whole filesystem as slave to avoid
# mounts escaping the namespace
@@ -541,6 +561,24 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
# TODO: should it be fatal maybe?
writemsg("Unable to mark mounts slave: %d\n" % (mount_ret,),
noiselevel=-1)
+ if unshare_pid:
+ # we need at least /proc being slave
+ s = subprocess.Popen(['mount',
+ '--make-slave', '/proc'])
+ mount_ret = s.wait()
+ if mount_ret != 0:
+ # can't proceed with shared /proc
+ writemsg("Unable to mark /proc slave: %d\n" % (mount_ret,),
+ noiselevel=-1)
+ os._exit(1)
+ # mount new /proc for our namespace
+ s = subprocess.Popen(['mount',
+ '-t', 'proc', 'proc', '/proc'])
+ mount_ret = s.wait()
+ if mount_ret != 0:
+ writemsg("Unable to mount new /proc: %d\n" % (mount_ret,),
+ noiselevel=-1)
+ os._exit(1)
if unshare_net:
# 'up' the loopback
IFF_UP = 0x1
diff --git a/man/make.conf.5 b/man/make.conf.5
index 7cb5741ad..de04e5e34 100644
--- a/man/make.conf.5
+++ b/man/make.conf.5
@@ -558,6 +558,13 @@ Use finer\-grained locks when installing packages, allowing for greater
parallelization. For additional parallelization, disable
\fIebuild\-locks\fR.
.TP
+.B pid\-sandbox
+Isolate the process space for the ebuild processes. This makes it
+possible to cleanly kill all processes spawned by the ebuild.
+Supported only on Linux. Requires PID and mount namespace support
+in kernel. /proc is remounted inside the mount namespace to account
+for new PID namespace.
+.TP
.B prelink\-checksums
If \fBprelink\fR(8) is installed then use it to undo any prelinks on files
before computing checksums for merge and unmerge. This feature is
--
2.19.1
Loading...