OpenVZ Forum


Home » Mailing lists » Devel » [RFC PATCH 1/2] capabilities: define CONFIG_COMMONCAP
Re: [PATCH 2/2] capabilities: introduce per-process capability bounding set (v7) [message #23445 is a reply to message #23365] Fri, 16 November 2007 17:18 Go to previous messageGo to previous message
Andrew Morgan is currently offline  Andrew Morgan
Messages: 9
Registered: September 2007
Junior Member
Serge,

I've been thinking a lot about this one. As an alternative
implementation, have you considered changing one bounding capability bit
per system call? Something like this:

    prctl(PR_CAPBSET_READ, CAPVERSION, CAP_NET_RAW);
        returns -> 1(allowed) or 0(blocked)
    prctl(PR_CAPBSET_DROP, CAPVERSION, CAP_NET_RAW)
        returns -> 0(success) or -EPERM;

I also think we should use CAP_SETPCAP for the privilege of manipulating
the bounding set. In many ways irrevocably removing a permission
requires the same level of due care as adding one (to pI).

This has scalability designed in, at the expense of more system calls to
get the same (rare) work done.

Cheers

Andrew

Serge E. Hallyn wrote:
>>From 9ba95f1dbf88a512ffd423f6ccd627dc0460b052 Mon Sep 17 00:00:00 2001
> From: Serge E. Hallyn <serue@us.ibm.com>
> Date: Mon, 12 Nov 2007 16:50:04 -0500
> Subject: [PATCH 2/2] capabilities: introduce per-process capability bounding set (v7)
> 
> The capability bounding set is a set beyond which capabilities
> cannot grow.  Currently cap_bset is per-system.  It can be
> manipulated through sysctl, but only init can add capabilities.
> Root can remove capabilities.  By default it includes all caps
> except CAP_SETPCAP.
> 
> This patch makes the bounding set per-process.  It is inherited
> at fork from parent.  Noone can add elements, CAP_SYS_ADMIN is
> required to remove them.  Perhaps a new capability should be
> introduced to control the ability to remove capabilities, in
> order to help prevent running a privileged app with enough
> privs to be dangerous but not enough to be successful.
> 
> One example use of this is to start a safer container.  For
> instance, until device namespaces or per-container device
> whitelists are introduced, it is best to take CAP_MKNOD away
> from a container.
> 
> Two questions:
> 
> 	1.  I set CAP_FULL_SET and CAP_INIT_EFF_SET to contain
> only valid capabilities.  Does that seem like a future maintenance
> headache?  We only want the capability bounding set returned from kernel
> to container valid capabilities, so having CAP_FULL_SET contain all
> capabilities would mean that on every cap_prctl_getbset() we'd have to
> either manually clear invalid bits or let userspace sort it out.
> 
> 	2. Would getting and setting the bounding sets be
> better done through syscall?  That better mirrors the capset+capget,
> but using prctl better mirrors the keep_capabilities setting.
> 
> The following test program will get and set the bounding
> set.  For instance
> 
> 	./bset get
> 		(lists capabilities in bset)
> 	./bset strset cap_sys_admin
> 		(starts shell with new bset)
> 		(use capset, setuid binary, or binary with
> 		file capabilities to try to increase caps)
> 
> ===========================================================
> bset.c:
> ===========================================================
>  #include <sys/prctl.h>
>  #include <linux/capability.h>
>  #include <sys/types.h>
>  #include <unistd.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> 
>  #ifndef PR_GET_CAPBSET
>  #define PR_GET_CAPBSET 23
>  #endif
> 
>  #ifndef PR_SET_CAPBSET
>  #define PR_SET_CAPBSET 24
>  #endif
> 
>  #define _LINUX_CAPABILITY_VERSION_1  0x19980330
>  #define _LINUX_CAPABILITY_VERSION_2  0x20071026
>  #define CAPVERSION _LINUX_CAPABILITY_VERSION_2
> 
>  #define NUMCAPS 31
> 
> int usage(char *me)
> {
> 	printf("Usage: %s get\n", me);
> 	printf("       %s set capability_string\n", me);
> 	printf("         capability_string is for instance:\n");
> 	printf("         cap_sys_admin,cap_mknod,cap_dac_override\n");
> 	return 1;
> }
> 
> char *captable[] = {
> 	"cap_dac_override",
> 	"cap_dac_read_search",
> 	"cap_fowner",
> 	"cap_fsetid",
> 	"cap_kill",
> 	"cap_setgid",
> 	"cap_setuid",
> 	"cap_setpcap",
> 	"cap_linux_immutable",
> 	"cap_net_bind_service",
> 	"cap_net_broadcast",
> 	"cap_net_admin",
> 	"cap_net_raw",
> 	"cap_ipc_lock",
> 	"cap_ipc_owner",
> 	"cap_sys_module",
> 	"cap_sys_rawio",
> 	"cap_sys_chroot",
> 	"cap_sys_ptrace",
> 	"cap_sys_pacct",
> 	"cap_sys_admin",
> 	"cap_sys_boot",
> 	"cap_sys_nice",
> 	"cap_sys_resource",
> 	"cap_sys_time",
> 	"cap_sys_tty_config",
> 	"cap_mknod",
> 	"cap_lease",
> 	"cap_audit_write",
> 	"cap_audit_control",
> 	"cap_setfcap"
> };
> 
> char *bittostr(unsigned int i, unsigned int j)
> {
> 	if (i!=0 || j>31)
> 		return "invalid";
> 	return captable[j];
> }
> 
> void print_capset(unsigned int *bset)
> {
> 	unsigned int i, j, comma=0;
> 	printf("Capability bounding set: ");
> 	for (i=0; i<2; i++) {
> 		for (j=0; j<31; j++)
> 			if (bset[i] & (1 << (j+1)))
> 				printf("%s%s", comma++?",":"",bittostr(i, j));
> 	}
> 	printf("\n");
> }
> 
> int getbcap(void)
> {
> 	unsigned int bset[2];
> 	if (prctl(PR_GET_CAPBSET, CAPVERSION, &bset)) {
> 		perror("prctl");
> 		return 1;
> 	}
> 	print_capset(bset);
> 	return 0;
> }
> 
> int captoint(char *cap)
> {
> 	int i;
> 	for (i=0; i<NUMCAPS; i++)
> 		if (strcmp(captable[i], cap) == 0)
> 			return i+1;
> 	return -1;
> }
> 
> int setbcap(char *str)
> {
> 	int ret;
> 	unsigned int bset[2];
> 	char *token = strtok(str, ",");
> 
> 	bset[0] = bset[1] = 0;
> 	while (token) {
> 		int bit = captoint(token);
> 		if (bit < 0) {
> 			printf("invalid cap: %s\n", token);
> 			return 1;
> 		}
> 		bset[bit/32] |= 1 << (bit%32);
> 		token = strtok(NULL, ",");
> 
> 	}
> 	if (prctl(PR_SET_CAPBSET, CAPVERSION, &bset)) {
> 		perror("prctl");
> 		return 1;
> 	}
> 	return 0;
> }
> 
> int main(int argc, char *argv[])
> {
> 	if (argc<2)
> 		return usage(argv[0]);
> 	if (strcmp(argv[1], "get")==0)
> 		return getbcap();
> 	if (strcmp(argv[1], "set")!=0 || argc<3)
> 		return usage(argv[0]);
> 	if (setbcap(argv[2]))
> 		return 1;
> 	return execl("/bin/bash", "/bin/bash", NULL);
> }
> ===========================================================
> 
> Changelog:
> 	Enforce current-> capabilities are subsets of the
> 	new bounding set.
> 
> 	As suggested by Andrew Morgan, send the capability
> 	version along with the bset for prctl(PR_SET_CAPBSET)
> 	and PR_GET_CAPBSET)
> 
> 	Adapt to 64-bit capabilities.
> 
> 	Update CAP_FULL_SET and CAP_INIT_EFF_SET to only
> 	contain valid capabilities.
> 
> Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
> ---
>  include/linux/capability.h |   34 +++++++++++++++++++++++++--
>  include/linux/init_task.h  |    1 +
>  include/linux/prctl.h      |    4 +++
>  include/linux/sched.h      |    2 +-
>  include/linux/security.h   |    5 ----
>  include/linux/sysctl.h     |    3 --
>  kernel/fork.c              |    1 +
>  kernel/sys.c               |   53 ++++++++++++++++++++++++++++++++++++++++++++
>  kernel/sysctl.c            |   35 -----------------------------
>  kernel/sysctl_check.c      |    7 -----
>  security/commoncap.c       |   37 +++++++++++++++++++++++++++---
>  11 files changed, 124 insertions(+), 58 deletions(-)
> 
> diff --git a/include/linux/capability.h b/include/linux/capability.h
> index a1d93da..64e668a 100644
> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -202,7 +202,6 @@ typedef struct kernel_cap_struct {
>  #define CAP_IPC_OWNER        15
>  
>  /* Insert and remove kernel modules - modify kernel without limit */
> -/* Modify cap_bset */
>  #define CAP_SYS_MODULE       16
>  
>  /* Allow ioperm/iopl access */
> @@ -259,6 +258,7 @@ typedef struct kernel_cap_struct {
>     arbitrary SCSI commands */
>  /* Allow setting encryption key on loopback filesystem */
>  /* Allow setting zone reclaim policy */
> +/* Allow taking bits out of capability bounding set */
>  
>  #define CAP_SYS_ADMIN        21
>  
> @@ -315,6 +315,12 @@ typedef struct kernel_cap_struct {
>  #define CAP_SETFCAP	     31
>  
>  /*
> + * XXX
> + * When adding a capability, please update the definitions of
> + * CAP_FULL_SET and CAP_INIT_EFF_SET below
> + */
> +
> +/*
>   * Bit location of each capability (used by user-space library and kernel)
>   */
>  
> @@ -341,8 +347,8 @@ typedef struct kernel_cap_struct {
>  #else /* HAND-CODED capability initializers */
>  
>  # define CAP_EMPTY_SET    {{ 0, 0 }}
> -# define CAP_FULL_SET     {{ ~0, ~0 }}
> -# define CAP_INIT_EFF_SET {{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}
> +# define CAP_FULL_SET     {{ ~0, 0 }}
> +# define CAP_INIT_EFF_SET {{ ~CAP_TO_MASK(CAP_SETPCAP), 0 }}
>  # define CAP_FS_SET       {{ CAP_FS_MASK_B0, 0 }}
>  # define CAP_NFSD_SET     {{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), 0 }}
>  
> @@ -350,6 +356,17 @@ typedef struct kernel_cap_struct {
>  
>  #define CAP_INIT_INH_SET    CAP_EMPTY_SET
>  
> +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: netns refcounting
Next Topic: cleanup in workq and dst_destroy
Goto Forum:
  


Current Time: Sat Aug 02 10:44:39 GMT 2025

Total time taken to generate the page: 0.92109 seconds