Quoting Andrew Morgan (morgan@kernel.org):
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Serge E. Hallyn wrote:
> > Andrew, this version follows all of your suggestions. Definately nicer
> > userspace interface. thanks
> [...]
> >
> > /* Allow ioperm/iopl access */
> > @@ -314,6 +314,10 @@ typedef struct kernel_cap_struct {
> >
> > #define CAP_SETFCAP 31
> >
> > +#define CAP_NUM_CAPS 32
> > +
> > +#define cap_valid(x) ((x) >= 0 && (x) < CAP_NUM_CAPS)
> > +
>
> Could you change the name of CAP_NUM_CAPS? There is some libcap building
> code that does the following to automatically build the "cap_*" names
> for libcap, and this new define above messes that up! :-(
>
> sed -ne '/^#define[ \t]CAP[_A-Z]\+[ \t]\+[0-9]\+/{s/^#define \([^
> \t]*\)[ \t]*\([^ \t]*\)/ \{ \2, \"\1\"
> \},/;y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;p;}' <
> $(KERNEL_HEADERS)/linux/capability.h | fgrep -v 0x > cap_names.sed
>
> Something like:
>
> #define CAP_NUM_CAPS (CAP_SETFCAP+1)
>
> will save me some hassle. :-)
>
> [...]
>
> > /*
> > * Bit location of each capability (used by user-space library and kernel)
> > */
> > @@ -350,6 +354,17 @@ typedef struct kernel_cap_struct {
> >
> > #define CAP_INIT_INH_SET CAP_EMPTY_SET
> >
>
> Its kind of a pity to put a kernel config ifdef in a header file. Could
> you put the ifdef code in the c-files that uses these definitions?
>
> > +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
>
> In my experience when headers define things differently based on
> configuration #defines, other users of header files (apps, kernel
> modules etc.), never quite know what the current define is. If we can
> avoid conditional code like this in this header file, I'd be happier.
>
> > +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
>
> ditto.
>
> [...]
> > +extern long cap_prctl_drop(unsigned long cap);
> > +#else
> > +#include <linux/errno.h>
> > +static inline long cap_prctl_drop(unsigned long cap)
> > +{ return -EINVAL; }
> > +#endif
> > +
> > +long cap_prctl_drop(unsigned long cap)
> > +{
> > + if (!capable(CAP_SETPCAP))
> > + return -EPERM;
> > + if (!cap_valid(cap))
> > + return -EINVAL;
> > + cap_lower(current->cap_bset, cap);
>
> I think the following lines are overkill. Basically, the next exec()
> will perform the pP/pE clipping, and cap_bset should only interact with
> fP (and not fI).
>
> We already have a mechanism to manipulate pI, which in turn gates fI.
> And this same mechanism (libcap) can clip pE, pP if it is needed pre-exec().
>
> So, if you want to drop a capability irrevocably, you drop it in bset,
> and separately in pI. The current process may continue to have the
> capability, but post-exec the working process tree has lost it. For
> things like login, this is desirable.
>
> This also makes it possible for you to allow pI to have a capability
> otherwise banned in cap_bset which is useful with limited role accounts.
>
> > + current->cap_effective = cap_intersect(current->cap_effective,
> > + current->cap_bset);
> > + current->cap_permitted = cap_intersect(current->cap_permitted,
> > + current->cap_bset);
> > + current->cap_inheritable = cap_intersect(current->cap_inheritable,
> > + current->cap_bset);
>
> You might want to replace the above three lines with a restriction
> elsewhere on what CAP_SETPCAP can newly set in
> commoncap.c:cap_capset_check().
>
> That is, CAP_SETPCAP permits the current process to raise 'any' pI
> capability. I suspect that you'll want to prevent raising any bits not
> masked by this:
>
> pI' & ~(pI | (pP & cap_bset)).
>
> Cheers
>
> Andrew
How about the following?
thanks,
-serge
>From 16d76d11d27f32487366a7cec6a52f6ec4fb1cbb Mon Sep 17 00:00:00 2001
From: Serge E. Hallyn <serue@us.ibm.com>
Date: Mon, 19 Nov 2007 13:54:05 -0500
Subject: [PATCH 1/1] capabilities: introduce per-process capability bounding set (v9)
The capability bounding set is a set beyond which capabilities
cannot grow. Currently cap_bset is per-system. It can be
manipulated through sysctl, but only init can add capabilities.
Root can remove capabilities. By default it includes all caps
except CAP_SETPCAP.
This patch makes the bounding set per-process when file
capabilities are enabled. It is inherited at fork from parent.
Noone can add elements, CAP_SETPCAP is required to remove them.
One example use of this is to start a safer container. For
instance, until device namespaces or per-container device
whitelists are introduced, it is best to take CAP_MKNOD away
from a container.
The bounding set will not affect pP and pE immediately. It will
only affect pP' and pE' after subsequent exec()s. It also does
not affect pI, and exec() does not constrain pI'. So to really
start a shell with no way of regain CAP_MKNOD, you would do
prctl(PR_CAPBSET_DROP, CAP_MKNOD);
cap_t cap = cap_get_proc();
cap_value_t caparray[1];
caparray[0] = CAP_MKNOD;
cap_set_flag(cap, CAP_INHERITABLE, 1, caparray, CAP_DROP);
cap_set_proc(cap);
cap_free(cap);
The following test program will get and set the bounding
set. For instance
gcc -o capbound capbound.c -lcap
./capbound get
(lists capabilities in bset)
./capbound drop cap_net_raw
(starts shell with new bset)
(use capset, setuid binary, or binary with
file capabilities to try to increase caps)
************************************************************
capbound.c
************************************************************
#include <sys/prctl.h>
#include <sys/capability.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef PR_CAPBSET_READ
#define PR_CAPBSET_READ 23
#endif
#ifndef PR_CAPBSET_DROP
#define PR_CAPBSET_DROP 24
#endif
int usage(char *me)
{
printf("Usage: %s get\n", me);
printf(" %s drop <capability>\n", me);
return 1;
}
#define numcaps 32
char *captable[numcaps] = {
"cap_chown",
"cap_dac_override",
"cap_dac_read_search",
"cap_fowner",
"cap_fsetid",
"cap_kill",
"cap_setgid",
"cap_setuid",
"cap_setpcap",
"cap_linux_immutable",
"cap_net_bind_service",
"cap_net_broadcast",
"cap_net_admin",
"cap_net_raw",
"cap_ipc_lock",
"cap_ipc_owner",
"cap_sys_module",
"cap_sys_rawio",
"cap_sys_chroot",
"cap_sys_ptrace",
"cap_sys_pacct",
"cap_sys_admin",
"cap_sys_boot",
"cap_sys_nice",
"cap_sys_resource",
"cap_sys_time",
"cap_sys_tty_config",
"cap_mknod",
"cap_lease",
"cap_audit_write",
"cap_audit_control",
"cap_setfcap"
};
int getbcap(void)
{
int comma=0;
unsigned long i;
int ret;
printf("i know of %d capabilities\n", numcaps);
printf("capability bounding set:");
for (i=0; i<numcaps; i++) {
ret = prctl(PR_CAPBSET_READ, i);
if (ret < 0)
perror("prctl");
else if (ret==1)
printf("%s%s", (comma++) ? ", " : " ", captable[i]);
}
printf("\n");
return 0;
}
int capdrop(char *str)
{
unsigned long i;
int found=0;
for (i=0; i<numcaps; i++) {
if (strcmp(captable[i], str) == 0) {
found=1;
break;
}
}
if (!found)
return 1;
if (prctl(PR_CAPBSET_DROP, i)) {
perror("prctl");
return 1;
}
cap_t cap = cap_get_proc();
cap_value_t caparray[1];
caparray[0] = i;
cap_set_flag(cap, CAP_INHERITABLE, 1, caparray, CAP_CLEAR);
cap_set_proc(cap);
cap_free(cap);
return 0;
}
int main(int argc, char *argv[])
{
if (argc<2)
return usage(argv[0]);
if (strcmp(argv[1], "get")==0)
return getbcap();
if (strcmp(argv[1], "drop")!=0 || argc<3)
return usage(argv[0]);
if (capdrop(argv[2])) {
printf("unknown capability\n");
return 1;
}
return execl("/bin/bash", "/bin/bash", NULL);
}
************************************************************
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
include/linux/capability.h | 8 +++++++-
include/linux/init_task.h | 12 ++++++++++++
include/linux/prctl.h | 4 ++++
include/linux/sched.h | 2 +-
include/linux/security.h | 5 -----
include/linux/sysctl.h | 3 ---
kernel/fork.c | 1 +
kernel/sys.c | 9 ++++++++-
kernel/sysctl.c | 35 -----------------------------------
kernel/sysctl_check.c | 7 -------
security/commoncap.c | 44 +++++++++++++++++++++++++++++---------------
11 files changed, 62 insertions(+), 68 deletions(-)
diff --git a/include/linux/capability.h b/include/linux/capability.h
index a1d93da..e8aa972 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -153,6 +153,7 @@ typedef struct kernel_cap_struct {
* remove any capability in your permitted set from any pid
* With VFS support for capabilities (neither of above, but)
* Add any capability to the current process' inheritable set
+ * Allow taking bits out of capability bounding set
*/
#define CAP_SETPCAP 8
@@ -202,7 +203,6 @@ typedef struct kernel_cap_struct {
#define CAP_IPC_OWNER 15
/* Insert and remove kerne
...