OpenVZ Forum


Home » Mailing lists » Devel » - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18997 is a reply to message #18993] Mon, 18 June 2007 16:54 Go to previous messageGo to previous message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
[ ... ]

>> It fixes the leak for me. I've run the ltp tests we 
>> have on namespace unsharing and i could see the no 
>> leaks in /proc/slabinfo.
> 
>> Badari,
>>
>> That extra get_nsproxy() seemed a superfluous remain 
>> from the 2.6.20. 
>> Do you see any issues with it ?
>>
>> If we're all happy with these fixes, i'll send them on 
>> lkml@ for review. 
> 
> I'm not terribly happy with the current nsproxy
> framework, although it improved somewhat ...
> 
> I'm still missing some mechanism to 'mix' two
> proxies according to a flagmask (which is required
> to enter a guest 'partially') ...

We have that bind_ns() syscall that does that. I sent it last year 
but it didn't have much success. We still use and there may be room
for improvement to make it altruistically useful.  

Here's the patch on a 2.6.21-mm2. It it's of any interest, I can 
refresh it on the latest -mm.

Thanks,

C.

The following patch defines the new bind_ns syscall specific to
nsproxy and namespaces, which allows a process to bind :

	1 - its nsproxy to some identifier
	2 - to another nsproxy using an identifier or -pid

Here's a sample user space program to use it. 

#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <libgen.h>
#include <sys/syscall.h>

#ifndef HAVE_UNSHARE

#if __i386__
#    define __NR_unshare 310
#elif __x86_64__
#    define __NR_unshare 272
#elif __ia64__
#    define __NR_unshare 1296
#elif __s390x__
#    define __NR_unshare 303
#elif __powerpc__
#    define __NR_unshare 282
#else
#    error "Architecture not supported"
#endif

#endif /* HAVE_UNSHARE */

#if __i386__
#    define __NR_bind_ns 	326
#elif __ia64__
#    define __NR_bind_ns	1305
#elif __powerpc__
#    define __NR_bind_ns	304
#elif __s390x__
#    define __NR_bind_ns	314
#elif __x86_64__
#    define __NR_bind_ns	284
#else
#    error "Architecture not supported"
#endif

#ifndef CLONE_NEWUTS
#define CLONE_NEWUTS		0x04000000
#endif

#ifndef CLONE_NEWIPC
#define CLONE_NEWIPC		0x08000000
#endif

#ifndef CLONE_NEWUSER
#define CLONE_NEWUSER		0x10000000
#endif

#ifndef CLONE_NEWNET2
#define CLONE_NEWNET2		0x20000000
#endif

#ifndef CLONE_NEWNET3
#define CLONE_NEWNET3		0x40000000
#endif

#ifndef CLONE_NEWPID
#define CLONE_NEWPID		0x80000000
#endif


static inline _syscall1 (int,  unshare, unsigned long, flags)
static inline _syscall2 (int,  bind_ns, int, id, unsigned long, flags)

static const char* procname;

static void usage(const char *name)
{
	printf("usage: %s [-h] [-I id] [-muiUnNp] [command [arg ...]]\n", name);
	printf("\n");
	printf("  -h		this message\n");
	printf("\n");
	printf("  -I <id>	bind process to nsproxy <id>\n");
	printf("  -m		mount namespace\n");
	printf("  -u		utsname namespace\n");
	printf("  -i		ipc namespace\n");
	printf("  -U		user namespace\n");
	printf("  -n		net namespace level 2\n");
	printf("  -N		net namespace level 3\n");
	printf("  -p		pid namespace\n");
	printf("\n");
	printf("(C) Copyright IBM Corp. 2006\n");
	printf("\n");
	exit(1);
}

int main(int argc, char *argv[])
{	
	int c;
	unsigned long flags = 0;
	int id = -1;

	procname = basename(argv[0]);

	while ((c = getopt(argc, argv, "+muiUnNphI:")) != EOF) {
		switch (c) {
		case 'I': if (optarg) 
				id = atoi(optarg); break;

		case 'm': flags |= CLONE_NEWNS;  break;
		case 'u': flags |= CLONE_NEWUTS;  break;
		case 'i': flags |= CLONE_NEWIPC;  break;
		case 'U': flags |= CLONE_NEWUSER; break;
		case 'n': flags |= CLONE_NEWNET2;  break;
		case 'N': flags |= CLONE_NEWNET3;  break;
		case 'p': flags |= CLONE_NEWPID;  break;
		case 'h':
		default:
			usage(procname);
		}
	};
    
	argv = &argv[optind];
	argc = argc - optind;	
	
	if (!strcmp(procname, "unsharens")) {
		if (unshare(flags) == -1) {
			perror("unshare");
			return 1;
		}	
	}
	
	if (bind_ns(id, flags) == -1) {
		perror("bind_ns");
		return 1;
	}	
	
	if (argc) {
		execve(argv[0], argv, __environ);
		perror("execve");
		return 1;
	}

	return 0;
}

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>

---
 arch/i386/kernel/syscall_table.S  |    1 
 arch/ia64/kernel/entry.S          |    1 
 arch/s390/kernel/compat_wrapper.S |    6 
 arch/s390/kernel/syscalls.S       |    1 
 arch/x86_64/ia32/ia32entry.S      |    1 
 include/asm-i386/unistd.h         |    3 
 include/asm-ia64/unistd.h         |    3 
 include/asm-powerpc/systbl.h      |    1 
 include/asm-powerpc/unistd.h      |    3 
 include/asm-s390/unistd.h         |    3 
 include/asm-x86_64/unistd.h       |    2 
 include/linux/nsproxy.h           |   10 -
 include/linux/sched.h             |    2 
 include/linux/syscalls.h          |    2 
 kernel/nsproxy.c                  |  264 +++++++++++++++++++++++++++++++++++++-
 kernel/sys_ni.c                   |    2 
 16 files changed, 290 insertions(+), 15 deletions(-)

Index: 2.6.21-mm2/include/linux/syscalls.h
===================================================================
--- 2.6.21-mm2.orig/include/linux/syscalls.h
+++ 2.6.21-mm2/include/linux/syscalls.h
@@ -609,6 +609,8 @@ asmlinkage long sys_timerfd(int ufd, int
 			    const struct itimerspec __user *utmr);
 asmlinkage long sys_eventfd(unsigned int count);
 
+asmlinkage long sys_bind_ns(int id, unsigned long unshare_flags);
+
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage long sys_revokeat(int dfd, const char __user *filename);
Index: 2.6.21-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.21-mm2.orig/kernel/nsproxy.c
+++ 2.6.21-mm2/kernel/nsproxy.c
@@ -22,7 +22,11 @@
 
 static struct kmem_cache *nsproxy_cachep;
 
-#define NS_HASH_BITS 		3 /* this might need some configuration */
+/*
+ * nsproxies are stored in a hash but a rbtree might be more
+ * appropriate.
+ */
+#define NS_HASH_BITS 		3
 #define NS_HASH_SIZE		(1 << NS_HASH_BITS)
 #define NS_HASH_MASK		(NS_HASH_SIZE - 1)
 #define ns_hashfn(id)		(((id >> NS_HASH_BITS) + id) & NS_HASH_MASK)
@@ -63,6 +67,30 @@ static inline struct nsproxy *clone_nspr
 }
 
 /*
+ * copies the nsproxy, setting refcount to 1, and grabbing a
+ * reference to all contained namespaces.  Called from
+ * sys_unshare()
+ */
+static struct nsproxy *dup_namespaces(struct nsproxy *orig)
+{
+	struct nsproxy *ns = clone_nsproxy(orig);
+
+	if (ns) {
+		if (ns->mnt_ns)
+			get_mnt_ns(ns->mnt_ns);
+		if (ns->uts_ns)
+			get_uts_ns(ns->uts_ns);
+		if (ns->ipc_ns)
+			get_ipc_ns(ns->ipc_ns);
+		if (ns->pid_ns)
+			get_pid_ns(ns->pid_ns);
+		if (ns->user_ns)
+			get_user_ns(ns->user_ns);
+	}
+
+	return ns;
+}
+/*
  * Create new nsproxy and all of its the associated namespaces.
  * Return the newly created nsproxy.  Do not attach this to the task,
  * leave it to the caller to do proper locking and attach it to task.
@@ -123,7 +151,7 @@ int copy_namespaces(int flags, struct ta
 
 	get_nsproxy(old_ns);
 
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
+	if (!(flags & NS_ALL))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -143,7 +171,7 @@ out:
 	return err;
 }
 
-void free_nsproxy(struct nsproxy *ns)
+static void free_nsproxy(struct nsproxy *ns)
 {
 	if (ns->mnt_ns)
 		put_mnt_ns(ns->mnt_ns);
@@ -157,6 +185,29 @@ void free_nsproxy(struct nsproxy *ns)
 }
 
 /*
+ * put_nsproxy() is similar to free_uid() in kernel/user.c
+ *
+ * the lock can be taken from a tasklet context (task getting freed by
+ * RCU) which requires to be irq safe.
+ */
+void put_nsproxy(struct nsproxy *ns)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (atomic_dec_and_lock(&ns->count, &ns_hash_lock)) {
+		BUG_ON(!ns->id);
+		if (ns->id != -1)
+			hlist_del(&ns->ns_hash_node);
+		spin_unlock_irqrestore(&ns_hash_lock, flags);
+		free_nsproxy(ns);
+	} else {
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL_GPL(put_nsproxy);
+
+/*
  * Called from unshare. Unshare all the namespaces part of nsproxy.
  * On sucess, returns the new nsproxy and a reference to old nsproxy
  * to make sure it stays around.
@@ -211,6 +262,212 @@ static inline struct nsproxy *ns_hash_fi
 	return NULL;
 }
 
+struct nsproxy *find_nsproxy_by_id(int id)
+{
+ 	struct nsproxy *ns;
+
+ 	if (id < 0)
+ 		return NULL;
+
+ 	spin_lock_irq(&ns_hash_lock);
+ 	ns = ns_hash_find(id);
+	spin_unlock_irq(&ns_hash_lock);
+
+ 	return ns;
+}
+
+EXPORT_SYMBOL_GPL(find_nsproxy_by_id);
+
+static int bind_ns(int id, struct nsproxy *ns)
+{
+ 	struct nsproxy *prev;
+	int ret = 0;
+
+ 	if (id < 0)
+ 		return -EINVAL;
+
+ 	spin_lock_irq(&ns_hash_lock);
+ 	prev = ns_hash_find(id);
+ 	if (!prev) {
+		ns->id = id;
+		hlist_add_head(&ns->ns_hash_node, ns_hash_head(ns->id));
+	}
+	spin_unlock_irq(&ns_hash_lock);
+
+	if (prev) {
+		ret = -EBUSY;
+		put_nsproxy(prev);
+	}
+ 	return ret;
+}
+
+static int switch_ns(int id, unsigned long flags)
+{
+	int err = 0;
+	struct nsproxy *ns = NULL, *old_ns = NULL, *new_ns = NULL;
+
+	if (flags & ~NS_ALL)
+		return -EINVAL;
+
+	/* Let 0 be a default value ? */
+	if (!flags)
+		flags = NS_ALL;
+
+	if (id < 0) {
+		struct task_struct *p;
+
+		err = -ESRCH;
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(-id);
+		if (p) {
+			task_lock(p);
+			get_nsproxy(p->nsproxy);
+			ns = p->nsproxy;
+			task_unlock(p);
+		}
+		read_unlock(&tasklist_lock);
+	} else {
+		err = -ENOENT;
+		spin_lock_irq(&ns_hash_lock);
+		ns = ns_hash_find(id);
+		spin_unlock_irq(&ns_hash_lock);
+	}
+
+	if (!ns)
+		goto out;
+
+	new_ns = ns;
+
+	/*
+	 * clone current nsproxy and populate it with 
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: Re: New pid namespaces patches testing
Next Topic: Re: [PATCH] create_new_namespaces: fix improper return of NULL
Goto Forum:
  


Current Time: Thu Dec 12 17:37:44 GMT 2024

Total time taken to generate the page: 0.02735 seconds