OpenVZ Forum


Home » Mailing lists » Devel » [patch -mm 00/17] new namespaces and related syscalls
[patch -mm 10/17] nsproxy: add unshare_ns and bind_ns syscalls [message #16812 is a reply to message #16802] Tue, 05 December 2006 10:28 Go to previous messageGo to previous message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
From: Cedric Le Goater <clg@fr.ibm.com>

The following patch defines 2 new syscalls specific to nsproxy and
namespaces :

* unshare_ns :

	enables a process to unshare one or more namespaces. this
        duplicates the unshare syscall for the moment but we
	expect to diverge when the number of namespaces increases

* bind_ns :
	
	allows a process to bind
	1 - its nsproxy to some identifier
	2 - to another nsproxy using an identifier or -pid

Here's a sample user space program to use them. 

#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <libgen.h>

#include <linux/unistd.h>

#if __i386__
#    define __NR_unshare_ns	324
#    define __NR_bind_ns 	325
#elif __ia64__
#    define __NR_unshare_ns	1303
#    define __NR_bind_ns	1304
#elif __powerpc__
#    define __NR_unshare_ns	303
#    define __NR_bind_ns	304
#elif __s390x__
#    define __NR_unshare_ns	313
#    define __NR_bind_ns	314
#elif __x86_64__
#    define __NR_unshare_ns	284
#    define __NR_bind_ns	285
#else
#    error "Architecture not supported"
#endif

static inline _syscall1 (int,  unshare_ns, unsigned long, flags)
static inline _syscall2 (int,  bind_ns, int, id, unsigned long, flags)

#define NS_MNT		0x00000001
#define NS_UTS		0x00000002
#define NS_IPC		0x00000004
#define NS_PID		0x00000008
#define NS_NET		0x00000010
#define NS_USER		0x00000020

static const char* procname;

static void usage(const char *name)
{
	printf("usage: %s [-h] [-I id] [-muiUnp] [command [arg ...]]\n", name);
	printf("\n");
	printf("  -h		this message\n");
	printf("\n");
	printf("  -I <id>	bind process to nsproxy <id>\n");
	printf("  -m		mount namespace\n");
	printf("  -u		utsname namespace\n");
	printf("  -i		ipc namespace\n");
	printf("  -U		user namespace\n");
	printf("  -n		net namespace\n");
	printf("  -p		pid namespace\n");
	printf("\n");
	printf("(C) Copyright IBM Corp. 2006\n");
	printf("\n");
	exit(1);
}

int main(int argc, char *argv[])
{	
	int c;
	unsigned long flags = 0;
	int id = -1;

	procname = basename(argv[0]);

	while ((c = getopt(argc, argv, "+muiUnphI:")) != EOF) {
		switch (c) {
		case 'I': if (optarg) 
				id = atoi(optarg); break;

		case 'm': flags |= NS_MNT;  break;
		case 'u': flags |= NS_UTS;  break;
		case 'i': flags |= NS_IPC;  break;
		case 'U': flags |= NS_USER; break;
		case 'n': flags |= NS_NET;  break;
		case 'p': flags |= NS_PID;  break;
		case 'h':
		default:
			usage(procname);
		}
	};
    
	argv = &argv[optind];
	argc = argc - optind;	
	
	if (!strcmp(procname, "unsharens")) {
		if (unshare_ns(flags) == -1) {
			perror("unshare_ns");
			return 1;
		}		
	} 
	
	if (bind_ns(id, flags) == -1) {
		perror("bind_ns");
		return 1;
	}	
	
	if (argc) {
		execve(argv[0], argv, __environ);
		perror("execve");
		return 1;
	}

	return 0;
}

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>

---
 arch/i386/kernel/syscall_table.S  |    2 
 arch/ia64/kernel/entry.S          |    2 
 arch/s390/kernel/compat_wrapper.S |   11 +
 arch/s390/kernel/syscalls.S       |    2 
 arch/x86_64/ia32/ia32entry.S      |    2 
 include/asm-i386/unistd.h         |    4 
 include/asm-ia64/unistd.h         |    4 
 include/asm-powerpc/systbl.h      |    2 
 include/asm-powerpc/unistd.h      |    4 
 include/asm-s390/unistd.h         |    4 
 include/asm-x86_64/unistd.h       |    6 
 include/linux/syscalls.h          |    3 
 kernel/nsproxy.c                  |  325 +++++++++++++++++++++++++++++++++++++-
 kernel/sys_ni.c                   |    4 
 14 files changed, 366 insertions(+), 9 deletions(-)

Index: 2.6.19-rc6-mm2/include/linux/syscalls.h
===================================================================
--- 2.6.19-rc6-mm2.orig/include/linux/syscalls.h
+++ 2.6.19-rc6-mm2/include/linux/syscalls.h
@@ -605,6 +605,9 @@ asmlinkage long sys_set_robust_list(stru
 				    size_t len);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
 
+asmlinkage long sys_unshare_ns(unsigned long unshare_flags);
+asmlinkage long sys_bind_ns(int id, unsigned long unshare_flags);
+
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage long sys_kevent_get_events(int ctl_fd, unsigned int min, unsigned int max,
Index: 2.6.19-rc6-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.19-rc6-mm2.orig/kernel/nsproxy.c
+++ 2.6.19-rc6-mm2/kernel/nsproxy.c
@@ -22,7 +22,11 @@
 #include <linux/pid_namespace.h>
 #include <linux/net_namespace.h>
 
-#define NS_HASH_BITS 		3 /* this might need some configuration */
+/*
+ * nsproxies are stored in a hash but a rbtree might be more
+ * appropriate.
+ */
+#define NS_HASH_BITS 		3
 #define NS_HASH_SIZE		(1 << NS_HASH_BITS)
 #define NS_HASH_MASK		(NS_HASH_SIZE - 1)
 #define ns_hashfn(id)		(((id >> NS_HASH_BITS) + id) & NS_HASH_MASK)
@@ -193,11 +197,26 @@ static void free_nsproxy(struct nsproxy 
 	kfree(ns);
 }
 
+/*
+ * put_nsproxy() is similar to free_uid() in kernel/user.c
+ *
+ * the lock can be taken from a tasklet context (task getting freed by
+ * RCU) which requires to be irq safe.
+ */
 void put_nsproxy(struct nsproxy *ns)
 {
-	if (atomic_dec_and_test(&ns->count)) {
-		free_nsproxy(ns);
-	}
+ 	unsigned long flags;
+
+ 	local_irq_save(flags);
+ 	if (atomic_dec_and_lock(&ns->count, &ns_hash_lock)) {
+ 		BUG_ON(!ns->id);
+ 		if (ns->id != -1)
+ 			hlist_del(&ns->ns_hash_node);
+ 		spin_unlock_irqrestore(&ns_hash_lock, flags);
+ 		free_nsproxy(ns);
+ 	} else {
+ 		local_irq_restore(flags);
+ 	}
 }
 
 /*
@@ -218,6 +237,304 @@ static inline struct nsproxy *ns_hash_fi
 	return NULL;
 }
 
+static int bind_ns(int id, struct nsproxy *ns)
+{
+ 	struct nsproxy *prev;
+	int ret = 0;
+
+ 	if (id < 0)
+ 		return -EINVAL;
+
+ 	spin_lock_irq(&ns_hash_lock);
+ 	prev = ns_hash_find(id);
+ 	if (!prev) {
+		ns->id = id;
+		hlist_add_head(&ns->ns_hash_node, ns_hash_head(ns->id));
+	}
+	spin_unlock_irq(&ns_hash_lock);
+
+	if (prev) {
+		ret = -EBUSY;
+		put_nsproxy(prev);
+	}
+ 	return ret;
+}
+
+static int switch_ns(int id, unsigned long flags)
+{
+	int err = 0;
+	struct nsproxy *ns = NULL, *old_ns = NULL, *new_ns = NULL;
+
+	if (flags & ~NS_ALL)
+		return -EINVAL;
+
+	/* Let 0 be a default value ? */
+	if (!flags)
+		flags = NS_ALL;
+
+	if (id < 0) {
+		struct task_struct *p;
+
+		err = -ESRCH;
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(-id);
+		if (p) {
+			task_lock(p);
+			get_nsproxy(p->nsproxy);
+			ns = p->nsproxy;
+			task_unlock(p);
+		}
+		read_unlock(&tasklist_lock);
+	} else {
+		err = -ENOENT;
+		spin_lock_irq(&ns_hash_lock);
+		ns = ns_hash_find(id);
+		spin_unlock_irq(&ns_hash_lock);
+	}
+
+	if (!ns)
+		goto out;
+
+	new_ns = ns;
+
+	/*
+	 * clone current nsproxy and populate it with the namespaces
+	 * chosen by flags.
+	 */
+	if (flags != NS_ALL) {
+		new_ns = dup_namespaces(current->nsproxy);
+		if (!new_ns) {
+			err = -ENOMEM;
+			goto out_ns;
+		}
+
+		if (flags & NS_MNT) {
+			put_mnt_ns(new_ns->mnt_ns);
+			get_mnt_ns(ns->mnt_ns);
+			new_ns->mnt_ns = ns->mnt_ns;
+		}
+
+		if (flags & NS_UTS) {
+			put_uts_ns(new_ns->uts_ns);
+			get_uts_ns(ns->uts_ns);
+			new_ns->uts_ns = ns->uts_ns;
+		}
+
+		if (flags & NS_IPC) {
+			put_ipc_ns(new_ns->ipc_ns);
+			new_ns->ipc_ns = get_ipc_ns(ns->ipc_ns);
+		}
+	out_ns:
+		put_nsproxy(ns);
+	}
+
+	task_lock(current);
+	if (new_ns) {
+		old_ns = current->nsproxy;
+		current->nsproxy = new_ns;
+	}
+	task_unlock(current);
+
+	if (old_ns)
+		put_nsproxy(old_ns);
+
+	err = 0;
+out:
+	return err;
+}
+
+
+/*
+ * bind_ns - bind the nsproxy of a task to an id or bind a task to a
+ *           identified nsproxy
+ *
+ * @id: nsproxy identifier if positive or pid if negative
+ * @flags: identifies the namespaces to bind to
+ *
+ * bind_ns serves 2 purposes.
+ *
+ * The first is to bind the nsproxy of the current task to the
+ * identifier @id. If the identifier is already used, -EBUSY is
+ * returned. If the nsproxy is already bound, -EACCES is returned.
+ * flags is not used in that case.
+ *
+ * The second use is to bind the current task to a subset of
+ * namespaces of an identified nsproxy. If positive, @id is considered
+ * being an nsproxy identifier previously used to bind the nsproxy to
+ * @id. If negative, @id is the pid of a task which is another way to
+ * identify a nsproxy. Switching nsproxy is restricted to tasks within
+ * nsproxy 0, the default nsproxy. If unknown, -ENOENT is returned.
+ * @flags is used to bind the task to the selected namespaces.
+ *
+ * Both uses may return -EINVAL for invalid arguments and -EPERM for
+ * insufficient privileges.
+ *
+ * Returns 0 on success.
+ */
+asmlinkage long sys_bind_ns(int id, unsigned long flags)
+{
+	struct nsproxy *ns = current->nsproxy;
+	int ret = 0;
+
+	/*
+	 * ns is being changed by switch_ns(), protect it
+	 */
+	get_nsproxy(ns);
+
+	/*
+	 * protect ns->id
+	 */
+	spin_lock(&ns->nslock);
+	switch (ns->id) {
+	case -1:
+		/*
+		 * only an unbound nsproxy can be bound to an id.
+		 */
+		ret = bind_ns(id, ns);
+		break;
+
+	case 0:
+		if (!capable(CAP_SYS_ADMIN)) {
+			ret = -EPERM;
+			goto unlock;
+		}
+
+		/*
+		 * only nsproxy 0 can switch nsproxy. if target id is
+		 * 0, this is a nop.
+		 */
+		if (id)
+			ret = switch_ns(id, flags);
+		break;
+
+	default:
+		/*
+		 * current nsproxy is already bound. forbid any
+		 * switch.
+		 */
+		ret = -EACCES;
+	}
+unlock:
+	spin_unlock(&ns->nslock);
+	put_nsproxy(ns);
+	return ret;
+}
+
+/*
+ * sys_unshare_ns - unshare one or mor
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: seems to be a flaw in cfq
Next Topic: [PATCH] compat offsets size change
Goto Forum:
  


Current Time: Sun Oct 06 16:33:08 GMT 2024

Total time taken to generate the page: 0.04644 seconds