OpenVZ Forum


Home » Mailing lists » Devel » - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree
- merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18584] Wed, 09 May 2007 02:45 Go to next message
akpm is currently offline  akpm
Messages: 224
Registered: March 2007
Senior Member
The patch titled
     Merge sys_clone()/sys_unshare() nsproxy and namespace handling
has been removed from the -mm tree.  Its filename was
     merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch

This patch was dropped because it was merged into mainline or a subsystem tree

------------------------------------------------------
Subject: Merge sys_clone()/sys_unshare() nsproxy and namespace handling
From: Badari Pulavarty <pbadari@us.ibm.com>

sys_clone() and sys_unshare() both makes copies of nsproxy and its associated
namespaces.  But they have different code paths.

This patch merges all the nsproxy and its associated namespace copy/clone
handling (as much as possible).  Posted on container list earlier for
feedback.


- Create a new nsproxy and its associated namespaces and pass it back to
  caller to attach it to right process.

- Changed all copy_*_ns() routines to return a new copy of namespace
  instead of attaching it to task->nsproxy.

- Moved the CAP_SYS_ADMIN checks out of copy_*_ns() routines.

- Removed unnessary !ns checks from copy_*_ns() and added BUG_ON()
  just incase.

- Get rid of all individual unshare_*_ns() routines and make use of
  copy_*_ns() instead.

[akpm@osdl.org: cleanups, warning fix]
[clg@fr.ibm.com: remove dup_namespaces() declaration]
[serue@us.ibm.com: fix CONFIG_IPC_NS=n, clone(CLONE_NEWIPC) retval]
[akpm@linux-foundation.org: fix build with CONFIG_SYSVIPC=n]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: <containers@lists.osdl.org>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/namespace.c                |   30 +-----
 include/linux/ipc.h           |   11 +-
 include/linux/mnt_namespace.h |    5 -
 include/linux/nsproxy.h       |    3 
 include/linux/pid_namespace.h |    2 
 include/linux/utsname.h       |   19 ----
 ipc/util.c                    |   53 ++----------
 kernel/fork.c                 |   85 +------------------
 kernel/nsproxy.c              |  139 +++++++++++++++++++-------------
 kernel/pid.c                  |   11 --
 kernel/utsname.c              |   41 ---------
 11 files changed, 131 insertions(+), 268 deletions(-)

diff -puN fs/namespace.c~merge-sys_clone-sys_unshare-nsproxy-and-namespace fs/namespace.c
--- a/fs/namespace.c~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/fs/namespace.c
@@ -1441,10 +1441,9 @@ dput_out:
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
  */
-struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
+static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		struct fs_struct *fs)
 {
-	struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns;
 	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
 	struct vfsmount *p, *q;
@@ -1509,36 +1508,21 @@ struct mnt_namespace *dup_mnt_ns(struct 
 	return new_ns;
 }
 
-int copy_mnt_ns(int flags, struct task_struct *tsk)
+struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns,
+		struct fs_struct *new_fs)
 {
-	struct mnt_namespace *ns = tsk->nsproxy->mnt_ns;
 	struct mnt_namespace *new_ns;
-	int err = 0;
-
-	if (!ns)
-		return 0;
 
+	BUG_ON(!ns);
 	get_mnt_ns(ns);
 
 	if (!(flags & CLONE_NEWNS))
-		return 0;
+		return ns;
 
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
-	}
+	new_ns = dup_mnt_ns(ns, new_fs);
 
-	new_ns = dup_mnt_ns(tsk, tsk->fs);
-	if (!new_ns) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	tsk->nsproxy->mnt_ns = new_ns;
-
-out:
 	put_mnt_ns(ns);
-	return err;
+	return new_ns;
 }
 
 asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
diff -puN include/linux/ipc.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace include/linux/ipc.h
--- a/include/linux/ipc.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/include/linux/ipc.h
@@ -92,16 +92,19 @@ extern struct ipc_namespace init_ipc_ns;
 
 #ifdef CONFIG_SYSVIPC
 #define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
-extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
+extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+						struct ipc_namespace *ns);
 #else
 #define INIT_IPC_NS(ns)
-static inline int copy_ipcs(unsigned long flags, struct task_struct *tsk)
-{ return 0; }
+static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+						struct ipc_namespace *ns)
+{
+	return ns;
+}
 #endif
 
 #ifdef CONFIG_IPC_NS
 extern void free_ipc_ns(struct kref *kref);
-extern int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns);
 #endif
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff -puN include/linux/mnt_namespace.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace include/linux/mnt_namespace.h
--- a/include/linux/mnt_namespace.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/include/linux/mnt_namespace.h
@@ -14,10 +14,9 @@ struct mnt_namespace {
 	int event;
 };
 
-extern int copy_mnt_ns(int, struct task_struct *);
-extern void __put_mnt_ns(struct mnt_namespace *ns);
-extern struct mnt_namespace *dup_mnt_ns(struct task_struct *,
+extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *,
 		struct fs_struct *);
+extern void __put_mnt_ns(struct mnt_namespace *ns);
 
 static inline void put_mnt_ns(struct mnt_namespace *ns)
 {
diff -puN include/linux/nsproxy.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace include/linux/nsproxy.h
--- a/include/linux/nsproxy.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/include/linux/nsproxy.h
@@ -31,10 +31,11 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
-struct nsproxy *dup_namespaces(struct nsproxy *orig);
 int copy_namespaces(int flags, struct task_struct *tsk);
 void get_task_namespaces(struct task_struct *tsk);
 void free_nsproxy(struct nsproxy *ns);
+int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
+	struct fs_struct *);
 
 static inline void put_nsproxy(struct nsproxy *ns)
 {
diff -puN include/linux/pid_namespace.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace include/linux/pid_namespace.h
--- a/include/linux/pid_namespace.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/include/linux/pid_namespace.h
@@ -29,7 +29,7 @@ static inline void get_pid_ns(struct pid
 	kref_get(&ns->kref);
 }
 
-extern int copy_pid_ns(int flags, struct task_struct *tsk);
+extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns);
 extern void free_pid_ns(struct kref *kref);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
diff -puN include/linux/utsname.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace include/linux/utsname.h
--- a/include/linux/utsname.h~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/include/linux/utsname.h
@@ -49,9 +49,7 @@ static inline void get_uts_ns(struct uts
 }
 
 #ifdef CONFIG_UTS_NS
-extern int unshare_utsname(unsigned long unshare_flags,
-				struct uts_namespace **new_uts);
-extern int copy_utsname(int flags, struct task_struct *tsk);
+extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns);
 extern void free_uts_ns(struct kref *kref);
 
 static inline void put_uts_ns(struct uts_namespace *ns)
@@ -59,21 +57,12 @@ static inline void put_uts_ns(struct uts
 	kref_put(&ns->kref, free_uts_ns);
 }
 #else
-static inline int unshare_utsname(unsigned long unshare_flags,
-			struct uts_namespace **new_uts)
+static inline struct uts_namespace *copy_utsname(int flags,
+						struct uts_namespace *ns)
 {
-	if (unshare_flags & CLONE_NEWUTS)
-		return -EINVAL;
-
-	return 0;
+	return ns;
 }
 
-static inline int copy_utsname(int flags, struct task_struct *tsk)
-{
-	if (flags & CLONE_NEWUTS)
-		return -EINVAL;
-	return 0;
-}
 static inline void put_uts_ns(struct uts_namespace *ns)
 {
 }
diff -puN ipc/util.c~merge-sys_clone-sys_unshare-nsproxy-and-namespace ipc/util.c
--- a/ipc/util.c~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/ipc/util.c
@@ -85,53 +85,20 @@ err_mem:
 	return ERR_PTR(err);
 }
 
-int unshare_ipcs(unsigned long unshare_flags, struct ipc_namespace **new_ipc)
+struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
 {
-	struct ipc_namespace *new;
-
-	if (unshare_flags & CLONE_NEWIPC) {
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		new = clone_ipc_ns(current->nsproxy->ipc_ns);
-		if (IS_ERR(new))
-			return PTR_ERR(new);
-
-		*new_ipc = new;
-	}
-
-	return 0;
-}
-
-int copy_ipcs(unsigned long flags, struct task_struct *tsk)
-{
-	struct ipc_namespace *old_ns = tsk->nsproxy->ipc_ns;
 	struct ipc_namespace *new_ns;
-	int err = 0;
 
-	if (!old_ns)
-		return 0;
-
-	get_ipc_ns(old_ns);
+	BUG_ON(!ns);
+	get_ipc_ns(ns);
 
 	if (!(flags & CLONE_NEWIPC))
-		return 0;
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
-	}
+		return ns;
 
-	new_ns = clone_ipc_ns(old_ns);
-	if (!new_ns) {
-		err = -ENOMEM;
-		goto out;
-	}
+	new_ns = clone_ipc_ns(ns);
 
-	tsk->nsproxy->ipc_ns = new_ns;
-out:
-	put_ipc_ns(old_ns);
-	return err;
+	put_ipc_ns(ns);
+	return new_ns;
 }
 
 void free_ipc_ns(struct kref *kref)
@@ -145,11 +112,11 @@ void free_ipc_ns(struct kref *kref)
 	kfree(ns);
 }
 #else
-int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
 {
 	if (flags & CLONE_NEWIPC)
-		return -EINVAL;
-	return 0;
+		return ERR_PTR(-EINVAL);
+	return ns;
 }
 #endif
 
diff -puN kernel/fork.c~merge-sys_clone-sys_unshare-nsproxy-and-namespace kernel/fork.c
--- a/kernel/fork.c~merge-sys_clone-sys_unshare-nsproxy-and-namespace
+++ a/kernel/fork.c
@@ -1516,26 +1516,6 @@ static int unshare
...

Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18974 is a reply to message #18584] Sat, 16 June 2007 19:17 Go to previous messageGo to next message
Herbert Poetzl is currently offline  Herbert Poetzl
Messages: 239
Registered: February 2006
Senior Member
On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
> 
> The patch titled
>      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> has been removed from the -mm tree.  Its filename was
>      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
> 
> This patch was dropped because it was merged into mainline or a subsystem tree
> 
> ------------------------------------------------------
> Subject: Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> From: Badari Pulavarty <pbadari@us.ibm.com>
> 
> sys_clone() and sys_unshare() both makes copies of nsproxy and its associated
> namespaces.  But they have different code paths.
> 
> This patch merges all the nsproxy and its associated namespace copy/clone
> handling (as much as possible).  Posted on container list earlier for
> feedback.
> 
> 
> - Create a new nsproxy and its associated namespaces and pass it back to
>   caller to attach it to right process.
> 
> - Changed all copy_*_ns() routines to return a new copy of namespace
>   instead of attaching it to task->nsproxy.
> 
> - Moved the CAP_SYS_ADMIN checks out of copy_*_ns() routines.
> 
> - Removed unnessary !ns checks from copy_*_ns() and added BUG_ON()
>   just incase.
> 
> - Get rid of all individual unshare_*_ns() routines and make use of
>   copy_*_ns() instead.
> 

.. [zapped] ...

> + * Called from unshare. Unshare all the namespaces part of nsproxy.
> + * On sucess, returns the new nsproxy and a reference to old nsproxy
> + * to make sure it stays around.
> + */
> +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
> +{

this makes sys_unshare leak and nsproxy (reference)

can be tested with the following command sequence:
   vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10

(and some nsproxy accounting/debugging as used in
 Linux-VServer)

we probably want to drop the reference to the old
nsproxy in sys_unshare() but I do not see a good reason
to take the reference in the first place (at least not
with the code in mainline 2.6.22-rc4)

HTH,
Herbert

PS: vcmd can be found here: 
    http://vserver.13thfloor.at/Experimental/TOOLS/vcmd-0.09.tar.bz2

... [more zapped] ...
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18975 is a reply to message #18974] Sun, 17 June 2007 14:38 Go to previous messageGo to next message
Oleg Nesterov is currently offline  Oleg Nesterov
Messages: 143
Registered: August 2006
Senior Member
On 06/16, Herbert Poetzl wrote:
> 
> On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
> > 
> > The patch titled
> >      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> > has been removed from the -mm tree.  Its filename was
> >      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
> > 
> > This patch was dropped because it was merged into mainline or a subsystem tree
> > 
> 
> .. [zapped] ...
> 
> > + * Called from unshare. Unshare all the namespaces part of nsproxy.
> > + * On sucess, returns the new nsproxy and a reference to old nsproxy
> > + * to make sure it stays around.
> > + */
> > +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> > +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
> > +{
> 
> this makes sys_unshare leak and nsproxy (reference)
> 
> can be tested with the following command sequence:
>    vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10

I know almost nothing about this stuff, could you please explain in brief
what this command does and how do you detect a leak?

> (and some nsproxy accounting/debugging as used in
>  Linux-VServer)
> 
> we probably want to drop the reference to the old
> nsproxy in sys_unshare() but I do not see a good reason
> to take the reference in the first place (at least not
> with the code in mainline 2.6.22-rc4)

At first glance, sys_unshare() drops the reference to the old nsproxy,

		old_nsproxy = current->nsproxy;
		current->nsproxy = new_nsproxy;
		new_nsproxy = old_nsproxy;

	...

	if (new_nsproxy)
		put_nsproxy(new_nsproxy);


However, nsproxy's code is full of strange unneeded get/put calls, for
example:

	struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
	{
		struct uts_namespace *new_ns;

		BUG_ON(!old_ns);
		get_uts_ns(old_ns);

		if (!(flags & CLONE_NEWUTS))
			return old_ns;

		new_ns = clone_uts_ns(old_ns);

		put_uts_ns(old_ns);
		return new_ns;
	}

I think it would be better to do

	struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
	{
		struct uts_namespace *new_ns;

		BUG_ON(!old_ns);

		if (!(flags & CLONE_NEWUTS)) {
			get_uts_ns(old_ns);
			return old_ns;
		}

		new_ns = clone_uts_ns(old_ns);
		return new_ns;
	}

Not only this looks better (imho), this is more robust.

Let's look at copy_namespaces(), it does the same "get_xxx() in advance", but
-EPERM forgets to do put_nsproxy(), so we definitely have a leak in copy_process().

So, if the command above does clone() which fails, perhaps this can explain the
problem.

Oleg.

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18976 is a reply to message #18975] Sun, 17 June 2007 14:49 Go to previous messageGo to next message
Oleg Nesterov is currently offline  Oleg Nesterov
Messages: 143
Registered: August 2006
Senior Member
On 06/17, Oleg Nesterov wrote:
>
> Let's look at copy_namespaces(), it does the same "get_xxx() in advance", but
> -EPERM forgets to do put_nsproxy(), so we definitely have a leak in copy_process().

Ugh, I am sorry, EPERM does put_nsproxy(). Still I can't understand why
copy_namespaces() does get_nsproxy() unconditionally.

Oleg.

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18977 is a reply to message #18975] Sun, 17 June 2007 17:09 Go to previous messageGo to next message
Herbert Poetzl is currently offline  Herbert Poetzl
Messages: 239
Registered: February 2006
Senior Member
On Sun, Jun 17, 2007 at 06:38:30PM +0400, Oleg Nesterov wrote:
> On 06/16, Herbert Poetzl wrote:
> > 
> > On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
> > > 
> > > The patch titled
> > >      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> > > has been removed from the -mm tree.  Its filename was
> > >      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
> > > 
> > > This patch was dropped because it was merged into mainline or a subsystem tree
> > > 
> > 
> > .. [zapped] ...
> > 
> > > + * Called from unshare. Unshare all the namespaces part of nsproxy.
> > > + * On sucess, returns the new nsproxy and a reference to old nsproxy
> > > + * to make sure it stays around.
> > > + */
> > > +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> > > +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
> > > +{
> > 
> > this makes sys_unshare leak and nsproxy (reference)
> > 
> > can be tested with the following command sequence:
> >    vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10
> 
> I know almost nothing about this stuff, could you please explain in
> brief what this command does ...

yeah, sure, it basically calls sys_unshare() with
bit 17 (CLONE_NEWNS) set then invokes the chained
command, so we get a sleep which is in a separate
namespace, unshared from a namespace != the main
one ...

> ... and how do you detect a leak?

> > (and some nsproxy accounting/debugging as used in
> >  Linux-VServer)

on Linux-VServer,we have accounting for those
proxies (and several other namespace related stuff)
because we already suspected leakage and reference
bugs in this area some time ago ... btw, I also
suggested to put a similar functionality in mainline
for the time being, but it was ignored, as usual ...

> > we probably want to drop the reference to the old
> > nsproxy in sys_unshare() but I do not see a good reason
> > to take the reference in the first place (at least not
> > with the code in mainline 2.6.22-rc4)
> 
> At first glance, sys_unshare() drops the reference to 
> the old nsproxy,

okay, the 'current' task has an nsproxy, and keeps
a reference to that (let's assume it is the only
task using this nsproxy, then the count will be 1)

unshare_nsproxy_namespaces() now does get_nsproxy()
which makes the count=2, then it creates a new
nsproxy (which will get count=1), and returns ...

> 		old_nsproxy = current->nsproxy;
> 		current->nsproxy = new_nsproxy;
> 		new_nsproxy = old_nsproxy;

sys_unshare, now replaces the current->nsproxy with
the new one, which will have the correct count=1,
and puts the old nsproxy (which has count=2), and
thus the nsproxy will not get released, although
it isn't referenced/used anymore ...

> 	if (new_nsproxy)
> 		put_nsproxy(new_nsproxy);
> 
> 
> However, nsproxy's code is full of strange unneeded get/put 
> calls, for example:

yep, I totally agree, it is quite a mess, and if
not handled carefully, you end up leaking proxies :)

best,
Herbert

> 	struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
> 	{
> 		struct uts_namespace *new_ns;
> 
> 		BUG_ON(!old_ns);
> 		get_uts_ns(old_ns);
> 
> 		if (!(flags & CLONE_NEWUTS))
> 			return old_ns;
> 
> 		new_ns = clone_uts_ns(old_ns);
> 
> 		put_uts_ns(old_ns);
> 		return new_ns;
> 	}
> 
> I think it would be better to do
> 
> 	struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
> 	{
> 		struct uts_namespace *new_ns;
> 
> 		BUG_ON(!old_ns);
> 
> 		if (!(flags & CLONE_NEWUTS)) {
> 			get_uts_ns(old_ns);
> 			return old_ns;
> 		}
> 
> 		new_ns = clone_uts_ns(old_ns);
> 		return new_ns;
> 	}
> 
> Not only this looks better (imho), this is more robust.
> 
> Let's look at copy_namespaces(), it does the same 
> "get_xxx() in advance", but -EPERM forgets to do 
> put_nsproxy(), so we definitely have a leak in copy_process().
> 
> So, if the command above does clone() which fails, perhaps 
> this can explain the problem.
> 
> Oleg.
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18978 is a reply to message #18975] Sun, 17 June 2007 16:30 Go to previous messageGo to next message
Oleg Nesterov is currently offline  Oleg Nesterov
Messages: 143
Registered: August 2006
Senior Member
On 06/17, Oleg Nesterov wrote:
> 
> However, nsproxy's code is full of strange unneeded get/put calls, for
> example:
> 
> 	struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
> 	{
> 		struct uts_namespace *new_ns;
> 
> 		BUG_ON(!old_ns);
> 		get_uts_ns(old_ns);
> 
> 		if (!(flags & CLONE_NEWUTS))
> 			return old_ns;
> 
> 		new_ns = clone_uts_ns(old_ns);
> 
> 		put_uts_ns(old_ns);
> 		return new_ns;
> 	}

Perhaps I missed something again, but this looks wrong to me.

copy_utsname() assumes that old_ns != NULL. OK, it should not.

However, clone_uts_ns() returns NULL if kmalloc() fails.
create_new_namespaces() checks IS_ERR(new_ns), but IS_ERR(NULL) = false.
So the next copy_namespaces/unshare_nsproxy_namespaces will oops ?

The same for all ->xxx_ns fields.

Oleg.

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18979 is a reply to message #18977] Sun, 17 June 2007 18:28 Go to previous messageGo to next message
Oleg Nesterov is currently offline  Oleg Nesterov
Messages: 143
Registered: August 2006
Senior Member
On 06/17, Herbert Poetzl wrote:
>
> On Sun, Jun 17, 2007 at 06:38:30PM +0400, Oleg Nesterov wrote:
> > 
> > At first glance, sys_unshare() drops the reference to 
> > the old nsproxy,
> 
> okay, the 'current' task has an nsproxy, and keeps
> a reference to that (let's assume it is the only
> task using this nsproxy, then the count will be 1)
> 
> unshare_nsproxy_namespaces() now does get_nsproxy()
> which makes the count=2, then it creates a new
> nsproxy (which will get count=1), and returns ...

Ah yes, stupid me.

Looks like we should just remove get/put old_ns from unshare_nsproxy_namespaces()
as you suggested.

Thanks!

Oleg.

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18983 is a reply to message #18584] Mon, 18 June 2007 17:35 Go to previous messageGo to next message
Badari Pulavarty is currently offline  Badari Pulavarty
Messages: 15
Registered: September 2006
Junior Member
On Mon, 2007-06-18 at 14:37 +0200, Cedric Le Goater wrote:
> Herbert Poetzl wrote:
> > On Sun, Jun 17, 2007 at 06:38:30PM +0400, Oleg Nesterov wrote:
> >> On 06/16, Herbert Poetzl wrote:
> >>> On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
> >>>> The patch titled
> >>>>      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> >>>> has been removed from the -mm tree.  Its filename was
> >>>>      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
> >>>>
> >>>> This patch was dropped because it was merged into mainline or a subsystem tree
> >>>>
> >>> .. [zapped] ...
> >>>
> >>>> + * Called from unshare. Unshare all the namespaces part of nsproxy.
> >>>> + * On sucess, returns the new nsproxy and a reference to old nsproxy
> >>>> + * to make sure it stays around.
> >>>> + */
> >>>> +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> >>>> +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
> >>>> +{
> >>> this makes sys_unshare leak and nsproxy (reference)
> >>>
> >>> can be tested with the following command sequence:
> >>>    vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10
> >> I know almost nothing about this stuff, could you please explain in
> >> brief what this command does ...
> > 
> > yeah, sure, it basically calls sys_unshare() with
> > bit 17 (CLONE_NEWNS) set then invokes the chained
> > command, so we get a sleep which is in a separate
> > namespace, unshared from a namespace != the main
> > one ...
> > 
> >> ... and how do you detect a leak?
> > 
> >>> (and some nsproxy accounting/debugging as used in
> >>>  Linux-VServer)
> > 
> > on Linux-VServer,we have accounting for those
> > proxies (and several other namespace related stuff)
> > because we already suspected leakage and reference
> > bugs in this area some time ago ... btw, I also
> > suggested to put a similar functionality in mainline
> > for the time being, but it was ignored, as usual ...
> > 
> >>> we probably want to drop the reference to the old
> >>> nsproxy in sys_unshare() but I do not see a good reason
> >>> to take the reference in the first place (at least not
> >>> with the code in mainline 2.6.22-rc4)
> >> At first glance, sys_unshare() drops the reference to 
> >> the old nsproxy,
> > 
> > okay, the 'current' task has an nsproxy, and keeps
> > a reference to that (let's assume it is the only
> > task using this nsproxy, then the count will be 1)
> > 
> > unshare_nsproxy_namespaces() now does get_nsproxy()
> > which makes the count=2, then it creates a new
> > nsproxy (which will get count=1), and returns ...
> > 
> >> 		old_nsproxy = current->nsproxy;
> >> 		current->nsproxy = new_nsproxy;
> >> 		new_nsproxy = old_nsproxy;
> > 
> > sys_unshare, now replaces the current->nsproxy with
> > the new one, which will have the correct count=1,
> > and puts the old nsproxy (which has count=2), and
> > thus the nsproxy will not get released, although
> > it isn't referenced/used anymore ...
> 
> 
> Herbert,
> 
> Could you give a try to the patch i've sent previously and this one 
> which removes an extra get_nsproxy() ? It fixes the leak for me. I've 
> run the ltp tests we have on namespace unsharing and i could see the 
> no leaks in /proc/slabinfo.
> 
> Badari,
> 
> That extra get_nsproxy() seemed a superfluous remain from the 2.6.20. 
> Do you see any issues with it ?
> 
> If we're all happy with these fixes, i'll send them on lkml@ for review. 
> They might deserve to be in 2.6.22.
> 
> Thanks,
> 
> C. 

Cedric, Oleg and Herbert,

Thanks for working this out. Looks good.

Thanks,
Badari

> 
> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>

Acked.



_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18988 is a reply to message #18976] Mon, 18 June 2007 11:51 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
Oleg Nesterov wrote:
> On 06/17, Oleg Nesterov wrote:
>> Let's look at copy_namespaces(), it does the same "get_xxx() in advance", but
>> -EPERM forgets to do put_nsproxy(), so we definitely have a leak in copy_process().
> 
> Ugh, I am sorry, EPERM does put_nsproxy(). Still I can't understand why
> copy_namespaces() does get_nsproxy() unconditionally.

well, if you're cloning a new task and not unsharing some of the namespaces
you still want to increase the refcount on the nsproxy bc a new task is now
referencing it. nop ?

C. 
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18989 is a reply to message #18977] Mon, 18 June 2007 12:02 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
> on Linux-VServer,we have accounting for those
> proxies (and several other namespace related stuff)
> because we already suspected leakage and reference
> bugs in this area some time ago ... btw, I also
> suggested to put a similar functionality in mainline
> for the time being, but it was ignored, as usual ...

something like a kmem_cache ? and we are not ignoring vserver ! :)

Cheers,

C.

Add a kmem_cache to manage nsproxy objects. 

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
---
 kernel/nsproxy.c |   21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Index: 2.6.22-rc4-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.22-rc4-mm2.orig/kernel/nsproxy.c
+++ 2.6.22-rc4-mm2/kernel/nsproxy.c
@@ -21,6 +21,8 @@
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 
+static struct kmem_cache *nsproxy_cachep;
+
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 static inline void get_nsproxy(struct nsproxy *ns)
@@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nspr
 {
 	struct nsproxy *ns;
 
-	ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
-	if (ns)
+	ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
+	if (ns) {
+		memcpy(ns, orig, sizeof(struct nsproxy));
 		atomic_set(&ns->count, 1);
+	}
 	return ns;
 }
 
@@ -109,7 +113,7 @@ out_uts:
 	if (new_nsp->mnt_ns)
 		put_mnt_ns(new_nsp->mnt_ns);
 out_ns:
-	kfree(new_nsp);
+	kmem_cache_free(nsproxy_cachep, new_nsp);
 	return ERR_PTR(err);
 }
 
@@ -160,7 +164,7 @@ void free_nsproxy(struct nsproxy *ns)
 		put_pid_ns(ns->pid_ns);
 	if (ns->user_ns)
 		put_user_ns(ns->user_ns);
-	kfree(ns);
+	kmem_cache_free(nsproxy_cachep, ns);
 }
 
 /*
@@ -191,3 +195,12 @@ int unshare_nsproxy_namespaces(unsigned 
 	}
 	return err;
 }
+
+static int __init nsproxy_cache_init(void)
+{
+	nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
+					   0, SLAB_PANIC, NULL, NULL);
+	return 0;
+}
+
+module_init(nsproxy_cache_init);
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18990 is a reply to message #18584] Mon, 18 June 2007 12:25 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
Oleg Nesterov wrote:
> On 06/18, Cedric Le Goater wrote:
>> Oleg Nesterov wrote:
>>> On 06/17, Oleg Nesterov wrote:
>>>> Let's look at copy_namespaces(), it does the same "get_xxx() in advance", but
>>>> -EPERM forgets to do put_nsproxy(), so we definitely have a leak in copy_process().
>>> Ugh, I am sorry, EPERM does put_nsproxy(). Still I can't understand why
>>> copy_namespaces() does get_nsproxy() unconditionally.
>> well, if you're cloning a new task and not unsharing some of the namespaces
>> you still want to increase the refcount on the nsproxy bc a new task is now
>> referencing it. nop ?
> 
> Yes, but copy_namespaces() does get_nsproxy() unconditionally, and then it does
> put_nsproxy() if not unsharing, which is not good imho.
> 
> IOW, I think the patch below makes the code a bit better. copy_namespaces()
> doesn't need put_nsproxy() at all.

Indeed it does and also :

nsproxy.c |   23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

Thanks,

C. 

> Oleg.
> 
> --- t/kernel/nsproxy.c~	2007-06-18 16:10:53.000000000 +0400
> +++ t/kernel/nsproxy.c	2007-06-18 16:13:02.000000000 +0400
> @@ -103,31 +103,24 @@ int copy_namespaces(int flags, struct ta
>  {
>  	struct nsproxy *old_ns = tsk->nsproxy;
>  	struct nsproxy *new_ns;
> -	int err = 0;
> 
>  	if (!old_ns)
>  		return 0;
> 
> -	get_nsproxy(old_ns);
> -
> -	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
> +	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) {
> +		get_nsproxy(old_ns);
>  		return 0;
> -
> -	if (!capable(CAP_SYS_ADMIN)) {
> -		err = -EPERM;
> -		goto out;
>  	}
> 
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
>  	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
> -	if (IS_ERR(new_ns)) {
> -		err = PTR_ERR(new_ns);
> -		goto out;
> -	}
> +	if (IS_ERR(new_ns))
> +		return PTR_ERR(new_ns);
> 
>  	tsk->nsproxy = new_ns;
> -out:
> -	put_nsproxy(old_ns);
> -	return err;
> +	return 0;
>  }
> 
>  void free_nsproxy(struct nsproxy *ns)
> 
> 

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18991 is a reply to message #18977] Mon, 18 June 2007 12:37 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
Herbert Poetzl wrote:
> On Sun, Jun 17, 2007 at 06:38:30PM +0400, Oleg Nesterov wrote:
>> On 06/16, Herbert Poetzl wrote:
>>> On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
>>>> The patch titled
>>>>      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
>>>> has been removed from the -mm tree.  Its filename was
>>>>      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
>>>>
>>>> This patch was dropped because it was merged into mainline or a subsystem tree
>>>>
>>> .. [zapped] ...
>>>
>>>> + * Called from unshare. Unshare all the namespaces part of nsproxy.
>>>> + * On sucess, returns the new nsproxy and a reference to old nsproxy
>>>> + * to make sure it stays around.
>>>> + */
>>>> +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
>>>> +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
>>>> +{
>>> this makes sys_unshare leak and nsproxy (reference)
>>>
>>> can be tested with the following command sequence:
>>>    vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10
>> I know almost nothing about this stuff, could you please explain in
>> brief what this command does ...
> 
> yeah, sure, it basically calls sys_unshare() with
> bit 17 (CLONE_NEWNS) set then invokes the chained
> command, so we get a sleep which is in a separate
> namespace, unshared from a namespace != the main
> one ...
> 
>> ... and how do you detect a leak?
> 
>>> (and some nsproxy accounting/debugging as used in
>>>  Linux-VServer)
> 
> on Linux-VServer,we have accounting for those
> proxies (and several other namespace related stuff)
> because we already suspected leakage and reference
> bugs in this area some time ago ... btw, I also
> suggested to put a similar functionality in mainline
> for the time being, but it was ignored, as usual ...
> 
>>> we probably want to drop the reference to the old
>>> nsproxy in sys_unshare() but I do not see a good reason
>>> to take the reference in the first place (at least not
>>> with the code in mainline 2.6.22-rc4)
>> At first glance, sys_unshare() drops the reference to 
>> the old nsproxy,
> 
> okay, the 'current' task has an nsproxy, and keeps
> a reference to that (let's assume it is the only
> task using this nsproxy, then the count will be 1)
> 
> unshare_nsproxy_namespaces() now does get_nsproxy()
> which makes the count=2, then it creates a new
> nsproxy (which will get count=1), and returns ...
> 
>> 		old_nsproxy = current->nsproxy;
>> 		current->nsproxy = new_nsproxy;
>> 		new_nsproxy = old_nsproxy;
> 
> sys_unshare, now replaces the current->nsproxy with
> the new one, which will have the correct count=1,
> and puts the old nsproxy (which has count=2), and
> thus the nsproxy will not get released, although
> it isn't referenced/used anymore ...


Herbert,

Could you give a try to the patch i've sent previously and this one 
which removes an extra get_nsproxy() ? It fixes the leak for me. I've 
run the ltp tests we have on namespace unsharing and i could see the 
no leaks in /proc/slabinfo.

Badari,

That extra get_nsproxy() seemed a superfluous remain from the 2.6.20. 
Do you see any issues with it ?

If we're all happy with these fixes, i'll send them on lkml@ for review. 
They might deserve to be in 2.6.22.

Thanks,

C. 

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
---
 kernel/nsproxy.c |    7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

Index: 2.6.22-rc4-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.22-rc4-mm2.orig/kernel/nsproxy.c
+++ 2.6.22-rc4-mm2/kernel/nsproxy.c
@@ -175,7 +175,6 @@ void free_nsproxy(struct nsproxy *ns)
 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
                struct nsproxy **new_nsp, struct fs_struct *new_fs)
 {
-       struct nsproxy *old_ns = current->nsproxy;
        int err = 0;
 
        if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
@@ -185,14 +184,10 @@ int unshare_nsproxy_namespaces(unsigned 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       get_nsproxy(old_ns);
-
        *new_nsp = create_new_namespaces(unshare_flags, current,
                                new_fs ? new_fs : current->fs);
-       if (IS_ERR(*new_nsp)) {
+       if (IS_ERR(*new_nsp))
                err = PTR_ERR(*new_nsp);
-               put_nsproxy(old_ns);
-       }
        return err;
 }

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18992 is a reply to message #18988] Mon, 18 June 2007 12:21 Go to previous messageGo to next message
Oleg Nesterov is currently offline  Oleg Nesterov
Messages: 143
Registered: August 2006
Senior Member
On 06/18, Cedric Le Goater wrote:
>
> Oleg Nesterov wrote:
> > On 06/17, Oleg Nesterov wrote:
> >> Let's look at copy_namespaces(), it does the same "get_xxx() in advance", but
> >> -EPERM forgets to do put_nsproxy(), so we definitely have a leak in copy_process().
> > 
> > Ugh, I am sorry, EPERM does put_nsproxy(). Still I can't understand why
> > copy_namespaces() does get_nsproxy() unconditionally.
> 
> well, if you're cloning a new task and not unsharing some of the namespaces
> you still want to increase the refcount on the nsproxy bc a new task is now
> referencing it. nop ?

Yes, but copy_namespaces() does get_nsproxy() unconditionally, and then it does
put_nsproxy() if not unsharing, which is not good imho.

IOW, I think the patch below makes the code a bit better. copy_namespaces()
doesn't need put_nsproxy() at all.

Oleg.

--- t/kernel/nsproxy.c~	2007-06-18 16:10:53.000000000 +0400
+++ t/kernel/nsproxy.c	2007-06-18 16:13:02.000000000 +0400
@@ -103,31 +103,24 @@ int copy_namespaces(int flags, struct ta
 {
 	struct nsproxy *old_ns = tsk->nsproxy;
 	struct nsproxy *new_ns;
-	int err = 0;
 
 	if (!old_ns)
 		return 0;
 
-	get_nsproxy(old_ns);
-
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) {
+		get_nsproxy(old_ns);
 		return 0;
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
 	}
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
-	if (IS_ERR(new_ns)) {
-		err = PTR_ERR(new_ns);
-		goto out;
-	}
+	if (IS_ERR(new_ns))
+		return PTR_ERR(new_ns);
 
 	tsk->nsproxy = new_ns;
-out:
-	put_nsproxy(old_ns);
-	return err;
+	return 0;
 }
 
 void free_nsproxy(struct nsproxy *ns)

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18993 is a reply to message #18991] Mon, 18 June 2007 15:38 Go to previous messageGo to next message
Herbert Poetzl is currently offline  Herbert Poetzl
Messages: 239
Registered: February 2006
Senior Member
On Mon, Jun 18, 2007 at 02:37:25PM +0200, Cedric Le Goater wrote:
> Herbert Poetzl wrote:
> > On Sun, Jun 17, 2007 at 06:38:30PM +0400, Oleg Nesterov wrote:
> >> On 06/16, Herbert Poetzl wrote:
> >>> On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
> >>>> The patch titled
> >>>>      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> >>>> has been removed from the -mm tree.  Its filename was
> >>>>      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
> >>>>
> >>>> This patch was dropped because it was merged into mainline or a subsystem tree
> >>>>
> >>> .. [zapped] ...
> >>>
> >>>> + * Called from unshare. Unshare all the namespaces part of nsproxy.
> >>>> + * On sucess, returns the new nsproxy and a reference to old nsproxy
> >>>> + * to make sure it stays around.
> >>>> + */
> >>>> +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> >>>> +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
> >>>> +{
> >>> this makes sys_unshare leak and nsproxy (reference)
> >>>
> >>> can be tested with the following command sequence:
> >>>    vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10
> >> I know almost nothing about this stuff, could you please explain in
> >> brief what this command does ...
> > 
> > yeah, sure, it basically calls sys_unshare() with
> > bit 17 (CLONE_NEWNS) set then invokes the chained
> > command, so we get a sleep which is in a separate
> > namespace, unshared from a namespace != the main
> > one ...
> > 
> >> ... and how do you detect a leak?
> > 
> >>> (and some nsproxy accounting/debugging as used in
> >>>  Linux-VServer)
> > 
> > on Linux-VServer,we have accounting for those
> > proxies (and several other namespace related stuff)
> > because we already suspected leakage and reference
> > bugs in this area some time ago ... btw, I also
> > suggested to put a similar functionality in mainline
> > for the time being, but it was ignored, as usual ...
> > 
> >>> we probably want to drop the reference to the old
> >>> nsproxy in sys_unshare() but I do not see a good reason
> >>> to take the reference in the first place (at least not
> >>> with the code in mainline 2.6.22-rc4)
> >> At first glance, sys_unshare() drops the reference to 
> >> the old nsproxy,
> > 
> > okay, the 'current' task has an nsproxy, and keeps
> > a reference to that (let's assume it is the only
> > task using this nsproxy, then the count will be 1)
> > 
> > unshare_nsproxy_namespaces() now does get_nsproxy()
> > which makes the count=2, then it creates a new
> > nsproxy (which will get count=1), and returns ...
> > 
> >> 		old_nsproxy = current->nsproxy;
> >> 		current->nsproxy = new_nsproxy;
> >> 		new_nsproxy = old_nsproxy;
> > 
> > sys_unshare, now replaces the current->nsproxy with
> > the new one, which will have the correct count=1,
> > and puts the old nsproxy (which has count=2), and
> > thus the nsproxy will not get released, although
> > it isn't referenced/used anymore ...
> 
> 
> Herbert,
> 
> Could you give a try to the patch i've sent previously 
> and this one which removes an extra get_nsproxy() ? 

will do so shortly ...

> It fixes the leak for me. I've run the ltp tests we 
> have on namespace unsharing and i could see the no 
> leaks in /proc/slabinfo.

> Badari,
> 
> That extra get_nsproxy() seemed a superfluous remain 
> from the 2.6.20. 
> Do you see any issues with it ?
> 
> If we're all happy with these fixes, i'll send them on 
> lkml@ for review. 

I'm not terribly happy with the current nsproxy
framework, although it improved somewhat ...

I'm still missing some mechanism to 'mix' two
proxies according to a flagmask (which is required
to enter a guest 'partially') ...

best,
Herbert

> They might deserve to be in 2.6.22.
> 
> Thanks,
> 
> C. 
> 
> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
> ---
>  kernel/nsproxy.c |    7 +------
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> Index: 2.6.22-rc4-mm2/kernel/nsproxy.c
> ===================================================================
> --- 2.6.22-rc4-mm2.orig/kernel/nsproxy.c
> +++ 2.6.22-rc4-mm2/kernel/nsproxy.c
> @@ -175,7 +175,6 @@ void free_nsproxy(struct nsproxy *ns)
>  int unshare_nsproxy_namespaces(unsigned long unshare_flags,
>                 struct nsproxy **new_nsp, struct fs_struct *new_fs)
>  {
> -       struct nsproxy *old_ns = current->nsproxy;
>         int err = 0;
>  
>         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
> @@ -185,14 +184,10 @@ int unshare_nsproxy_namespaces(unsigned 
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EPERM;
>  
> -       get_nsproxy(old_ns);
> -
>         *new_nsp = create_new_namespaces(unshare_flags, current,
>                                 new_fs ? new_fs : current->fs);
> -       if (IS_ERR(*new_nsp)) {
> +       if (IS_ERR(*new_nsp))
>                 err = PTR_ERR(*new_nsp);
> -               put_nsproxy(old_ns);
> -       }
>         return err;
>  }
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18994 is a reply to message #18989] Mon, 18 June 2007 15:41 Go to previous messageGo to next message
Herbert Poetzl is currently offline  Herbert Poetzl
Messages: 239
Registered: February 2006
Senior Member
On Mon, Jun 18, 2007 at 02:02:19PM +0200, Cedric Le Goater wrote:
> 
> > on Linux-VServer,we have accounting for those
> > proxies (and several other namespace related stuff)
> > because we already suspected leakage and reference
> > bugs in this area some time ago ... btw, I also
> > suggested to put a similar functionality in mainline
> > for the time being, but it was ignored, as usual ...
> 
> something like a kmem_cache ? 

maybe, but even simplest accounting of the
different 'objects' would be more then enough
for the test phase (or maybe as statistics :)

> and we are not ignoring vserver ! :)

good for them, our project is called Linux-VServer :)

best,
Herbert

> Cheers,
> 
> C.
> 
> Add a kmem_cache to manage nsproxy objects. 
> 
> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
> ---
>  kernel/nsproxy.c |   21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> Index: 2.6.22-rc4-mm2/kernel/nsproxy.c
> ===================================================================
> --- 2.6.22-rc4-mm2.orig/kernel/nsproxy.c
> +++ 2.6.22-rc4-mm2/kernel/nsproxy.c
> @@ -21,6 +21,8 @@
>  #include <linux/utsname.h>
>  #include <linux/pid_namespace.h>
>  
> +static struct kmem_cache *nsproxy_cachep;
> +
>  struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
>  
>  static inline void get_nsproxy(struct nsproxy *ns)
> @@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nspr
>  {
>  	struct nsproxy *ns;
>  
> -	ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
> -	if (ns)
> +	ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
> +	if (ns) {
> +		memcpy(ns, orig, sizeof(struct nsproxy));
>  		atomic_set(&ns->count, 1);
> +	}
>  	return ns;
>  }
>  
> @@ -109,7 +113,7 @@ out_uts:
>  	if (new_nsp->mnt_ns)
>  		put_mnt_ns(new_nsp->mnt_ns);
>  out_ns:
> -	kfree(new_nsp);
> +	kmem_cache_free(nsproxy_cachep, new_nsp);
>  	return ERR_PTR(err);
>  }
>  
> @@ -160,7 +164,7 @@ void free_nsproxy(struct nsproxy *ns)
>  		put_pid_ns(ns->pid_ns);
>  	if (ns->user_ns)
>  		put_user_ns(ns->user_ns);
> -	kfree(ns);
> +	kmem_cache_free(nsproxy_cachep, ns);
>  }
>  
>  /*
> @@ -191,3 +195,12 @@ int unshare_nsproxy_namespaces(unsigned 
>  	}
>  	return err;
>  }
> +
> +static int __init nsproxy_cache_init(void)
> +{
> +	nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
> +					   0, SLAB_PANIC, NULL, NULL);
> +	return 0;
> +}
> +
> +module_init(nsproxy_cache_init);
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18995 is a reply to message #18991] Mon, 18 June 2007 15:48 Go to previous messageGo to next message
Herbert Poetzl is currently offline  Herbert Poetzl
Messages: 239
Registered: February 2006
Senior Member
On Mon, Jun 18, 2007 at 02:37:25PM +0200, Cedric Le Goater wrote:
> Herbert Poetzl wrote:
> > On Sun, Jun 17, 2007 at 06:38:30PM +0400, Oleg Nesterov wrote:
> >> On 06/16, Herbert Poetzl wrote:
> >>> On Tue, May 08, 2007 at 07:45:35PM -0700, akpm@linux-foundation.org wrote:
> >>>> The patch titled
> >>>>      Merge sys_clone()/sys_unshare() nsproxy and namespace handling
> >>>> has been removed from the -mm tree.  Its filename was
> >>>>      merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch
> >>>>
> >>>> This patch was dropped because it was merged into mainline or a subsystem tree
> >>>>
> >>> .. [zapped] ...
> >>>
> >>>> + * Called from unshare. Unshare all the namespaces part of nsproxy.
> >>>> + * On sucess, returns the new nsproxy and a reference to old nsproxy
> >>>> + * to make sure it stays around.
> >>>> + */
> >>>> +int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> >>>> +		struct nsproxy **new_nsp, struct fs_struct *new_fs)
> >>>> +{
> >>> this makes sys_unshare leak and nsproxy (reference)
> >>>
> >>> can be tested with the following command sequence:
> >>>    vcmd -nu ^17 -- vcmd -nu ^17 -- sleep 10
> >> I know almost nothing about this stuff, could you please explain in
> >> brief what this command does ...
> > 
> > yeah, sure, it basically calls sys_unshare() with
> > bit 17 (CLONE_NEWNS) set then invokes the chained
> > command, so we get a sleep which is in a separate
> > namespace, unshared from a namespace != the main
> > one ...
> > 
> >> ... and how do you detect a leak?
> > 
> >>> (and some nsproxy accounting/debugging as used in
> >>>  Linux-VServer)
> > 
> > on Linux-VServer,we have accounting for those
> > proxies (and several other namespace related stuff)
> > because we already suspected leakage and reference
> > bugs in this area some time ago ... btw, I also
> > suggested to put a similar functionality in mainline
> > for the time being, but it was ignored, as usual ...
> > 
> >>> we probably want to drop the reference to the old
> >>> nsproxy in sys_unshare() but I do not see a good reason
> >>> to take the reference in the first place (at least not
> >>> with the code in mainline 2.6.22-rc4)
> >> At first glance, sys_unshare() drops the reference to 
> >> the old nsproxy,
> > 
> > okay, the 'current' task has an nsproxy, and keeps
> > a reference to that (let's assume it is the only
> > task using this nsproxy, then the count will be 1)
> > 
> > unshare_nsproxy_namespaces() now does get_nsproxy()
> > which makes the count=2, then it creates a new
> > nsproxy (which will get count=1), and returns ...
> > 
> >> 		old_nsproxy = current->nsproxy;
> >> 		current->nsproxy = new_nsproxy;
> >> 		new_nsproxy = old_nsproxy;
> > 
> > sys_unshare, now replaces the current->nsproxy with
> > the new one, which will have the correct count=1,
> > and puts the old nsproxy (which has count=2), and
> > thus the nsproxy will not get released, although
> > it isn't referenced/used anymore ...
> 
> 
> Herbert,
> 
> Could you give a try to the patch i've sent previously and this one 
> which removes an extra get_nsproxy() ? It fixes the leak for me. I've 
> run the ltp tests we have on namespace unsharing and i could see the 
> no leaks in /proc/slabinfo.
> 
> Badari,
> 
> That extra get_nsproxy() seemed a superfluous remain from the 2.6.20. 
> Do you see any issues with it ?
> 
> If we're all happy with these fixes, i'll send them on lkml@ for review. 
> They might deserve to be in 2.6.22.

the patch looks like it should fix the issue 
(will test that soon) but it leaves the comment 
unmodified, which is now wrong ...

 * Called from unshare. Unshare all the namespaces part of nsproxy.
 * On sucess, returns the new nsproxy and a reference to old nsproxy
 * to make sure it stays around.

best,
Herbert

> Thanks,
> 
> C. 
> 
> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
> ---
>  kernel/nsproxy.c |    7 +------
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> Index: 2.6.22-rc4-mm2/kernel/nsproxy.c
> ===================================================================
> --- 2.6.22-rc4-mm2.orig/kernel/nsproxy.c
> +++ 2.6.22-rc4-mm2/kernel/nsproxy.c
> @@ -175,7 +175,6 @@ void free_nsproxy(struct nsproxy *ns)
>  int unshare_nsproxy_namespaces(unsigned long unshare_flags,
>                 struct nsproxy **new_nsp, struct fs_struct *new_fs)
>  {
> -       struct nsproxy *old_ns = current->nsproxy;
>         int err = 0;
>  
>         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
> @@ -185,14 +184,10 @@ int unshare_nsproxy_namespaces(unsigned 
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EPERM;
>  
> -       get_nsproxy(old_ns);
> -
>         *new_nsp = create_new_namespaces(unshare_flags, current,
>                                 new_fs ? new_fs : current->fs);
> -       if (IS_ERR(*new_nsp)) {
> +       if (IS_ERR(*new_nsp))
>                 err = PTR_ERR(*new_nsp);
> -               put_nsproxy(old_ns);
> -       }
>         return err;
>  }
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18996 is a reply to message #18995] Mon, 18 June 2007 16:44 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
[ ... ]

>> If we're all happy with these fixes, i'll send them on lkml@ for review. 
>> They might deserve to be in 2.6.22.
> 
> the patch looks like it should fix the issue 
> (will test that soon) but it leaves the comment 
> unmodified, which is now wrong ...
> 
>  * Called from unshare. Unshare all the namespaces part of nsproxy.
>  * On sucess, returns the new nsproxy and a reference to old nsproxy
>  * to make sure it stays around.

right. I will fix that.

Thanks,

C.
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18997 is a reply to message #18993] Mon, 18 June 2007 16:54 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
[ ... ]

>> It fixes the leak for me. I've run the ltp tests we 
>> have on namespace unsharing and i could see the no 
>> leaks in /proc/slabinfo.
> 
>> Badari,
>>
>> That extra get_nsproxy() seemed a superfluous remain 
>> from the 2.6.20. 
>> Do you see any issues with it ?
>>
>> If we're all happy with these fixes, i'll send them on 
>> lkml@ for review. 
> 
> I'm not terribly happy with the current nsproxy
> framework, although it improved somewhat ...
> 
> I'm still missing some mechanism to 'mix' two
> proxies according to a flagmask (which is required
> to enter a guest 'partially') ...

We have that bind_ns() syscall that does that. I sent it last year 
but it didn't have much success. We still use and there may be room
for improvement to make it altruistically useful.  

Here's the patch on a 2.6.21-mm2. It it's of any interest, I can 
refresh it on the latest -mm.

Thanks,

C.

The following patch defines the new bind_ns syscall specific to
nsproxy and namespaces, which allows a process to bind :

	1 - its nsproxy to some identifier
	2 - to another nsproxy using an identifier or -pid

Here's a sample user space program to use it. 

#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <libgen.h>
#include <sys/syscall.h>

#ifndef HAVE_UNSHARE

#if __i386__
#    define __NR_unshare 310
#elif __x86_64__
#    define __NR_unshare 272
#elif __ia64__
#    define __NR_unshare 1296
#elif __s390x__
#    define __NR_unshare 303
#elif __powerpc__
#    define __NR_unshare 282
#else
#    error "Architecture not supported"
#endif

#endif /* HAVE_UNSHARE */

#if __i386__
#    define __NR_bind_ns 	326
#elif __ia64__
#    define __NR_bind_ns	1305
#elif __powerpc__
#    define __NR_bind_ns	304
#elif __s390x__
#    define __NR_bind_ns	314
#elif __x86_64__
#    define __NR_bind_ns	284
#else
#    error "Architecture not supported"
#endif

#ifndef CLONE_NEWUTS
#define CLONE_NEWUTS		0x04000000
#endif

#ifndef CLONE_NEWIPC
#define CLONE_NEWIPC		0x08000000
#endif

#ifndef CLONE_NEWUSER
#define CLONE_NEWUSER		0x10000000
#endif

#ifndef CLONE_NEWNET2
#define CLONE_NEWNET2		0x20000000
#endif

#ifndef CLONE_NEWNET3
#define CLONE_NEWNET3		0x40000000
#endif

#ifndef CLONE_NEWPID
#define CLONE_NEWPID		0x80000000
#endif


static inline _syscall1 (int,  unshare, unsigned long, flags)
static inline _syscall2 (int,  bind_ns, int, id, unsigned long, flags)

static const char* procname;

static void usage(const char *name)
{
	printf("usage: %s [-h] [-I id] [-muiUnNp] [command [arg ...]]\n", name);
	printf("\n");
	printf("  -h		this message\n");
	printf("\n");
	printf("  -I <id>	bind process to nsproxy <id>\n");
	printf("  -m		mount namespace\n");
	printf("  -u		utsname namespace\n");
	printf("  -i		ipc namespace\n");
	printf("  -U		user namespace\n");
	printf("  -n		net namespace level 2\n");
	printf("  -N		net namespace level 3\n");
	printf("  -p		pid namespace\n");
	printf("\n");
	printf("(C) Copyright IBM Corp. 2006\n");
	printf("\n");
	exit(1);
}

int main(int argc, char *argv[])
{	
	int c;
	unsigned long flags = 0;
	int id = -1;

	procname = basename(argv[0]);

	while ((c = getopt(argc, argv, "+muiUnNphI:")) != EOF) {
		switch (c) {
		case 'I': if (optarg) 
				id = atoi(optarg); break;

		case 'm': flags |= CLONE_NEWNS;  break;
		case 'u': flags |= CLONE_NEWUTS;  break;
		case 'i': flags |= CLONE_NEWIPC;  break;
		case 'U': flags |= CLONE_NEWUSER; break;
		case 'n': flags |= CLONE_NEWNET2;  break;
		case 'N': flags |= CLONE_NEWNET3;  break;
		case 'p': flags |= CLONE_NEWPID;  break;
		case 'h':
		default:
			usage(procname);
		}
	};
    
	argv = &argv[optind];
	argc = argc - optind;	
	
	if (!strcmp(procname, "unsharens")) {
		if (unshare(flags) == -1) {
			perror("unshare");
			return 1;
		}	
	}
	
	if (bind_ns(id, flags) == -1) {
		perror("bind_ns");
		return 1;
	}	
	
	if (argc) {
		execve(argv[0], argv, __environ);
		perror("execve");
		return 1;
	}

	return 0;
}

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>

---
 arch/i386/kernel/syscall_table.S  |    1 
 arch/ia64/kernel/entry.S          |    1 
 arch/s390/kernel/compat_wrapper.S |    6 
 arch/s390/kernel/syscalls.S       |    1 
 arch/x86_64/ia32/ia32entry.S      |    1 
 include/asm-i386/unistd.h         |    3 
 include/asm-ia64/unistd.h         |    3 
 include/asm-powerpc/systbl.h      |    1 
 include/asm-powerpc/unistd.h      |    3 
 include/asm-s390/unistd.h         |    3 
 include/asm-x86_64/unistd.h       |    2 
 include/linux/nsproxy.h           |   10 -
 include/linux/sched.h             |    2 
 include/linux/syscalls.h          |    2 
 kernel/nsproxy.c                  |  264 +++++++++++++++++++++++++++++++++++++-
 kernel/sys_ni.c                   |    2 
 16 files changed, 290 insertions(+), 15 deletions(-)

Index: 2.6.21-mm2/include/linux/syscalls.h
===================================================================
--- 2.6.21-mm2.orig/include/linux/syscalls.h
+++ 2.6.21-mm2/include/linux/syscalls.h
@@ -609,6 +609,8 @@ asmlinkage long sys_timerfd(int ufd, int
 			    const struct itimerspec __user *utmr);
 asmlinkage long sys_eventfd(unsigned int count);
 
+asmlinkage long sys_bind_ns(int id, unsigned long unshare_flags);
+
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage long sys_revokeat(int dfd, const char __user *filename);
Index: 2.6.21-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.21-mm2.orig/kernel/nsproxy.c
+++ 2.6.21-mm2/kernel/nsproxy.c
@@ -22,7 +22,11 @@
 
 static struct kmem_cache *nsproxy_cachep;
 
-#define NS_HASH_BITS 		3 /* this might need some configuration */
+/*
+ * nsproxies are stored in a hash but a rbtree might be more
+ * appropriate.
+ */
+#define NS_HASH_BITS 		3
 #define NS_HASH_SIZE		(1 << NS_HASH_BITS)
 #define NS_HASH_MASK		(NS_HASH_SIZE - 1)
 #define ns_hashfn(id)		(((id >> NS_HASH_BITS) + id) & NS_HASH_MASK)
@@ -63,6 +67,30 @@ static inline struct nsproxy *clone_nspr
 }
 
 /*
+ * copies the nsproxy, setting refcount to 1, and grabbing a
+ * reference to all contained namespaces.  Called from
+ * sys_unshare()
+ */
+static struct nsproxy *dup_namespaces(struct nsproxy *orig)
+{
+	struct nsproxy *ns = clone_nsproxy(orig);
+
+	if (ns) {
+		if (ns->mnt_ns)
+			get_mnt_ns(ns->mnt_ns);
+		if (ns->uts_ns)
+			get_uts_ns(ns->uts_ns);
+		if (ns->ipc_ns)
+			get_ipc_ns(ns->ipc_ns);
+		if (ns->pid_ns)
+			get_pid_ns(ns->pid_ns);
+		if (ns->user_ns)
+			get_user_ns(ns->user_ns);
+	}
+
+	return ns;
+}
+/*
  * Create new nsproxy and all of its the associated namespaces.
  * Return the newly created nsproxy.  Do not attach this to the task,
  * leave it to the caller to do proper locking and attach it to task.
@@ -123,7 +151,7 @@ int copy_namespaces(int flags, struct ta
 
 	get_nsproxy(old_ns);
 
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
+	if (!(flags & NS_ALL))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -143,7 +171,7 @@ out:
 	return err;
 }
 
-void free_nsproxy(struct nsproxy *ns)
+static void free_nsproxy(struct nsproxy *ns)
 {
 	if (ns->mnt_ns)
 		put_mnt_ns(ns->mnt_ns);
@@ -157,6 +185,29 @@ void free_nsproxy(struct nsproxy *ns)
 }
 
 /*
+ * put_nsproxy() is similar to free_uid() in kernel/user.c
+ *
+ * the lock can be taken from a tasklet context (task getting freed by
+ * RCU) which requires to be irq safe.
+ */
+void put_nsproxy(struct nsproxy *ns)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (atomic_dec_and_lock(&ns->count, &ns_hash_lock)) {
+		BUG_ON(!ns->id);
+		if (ns->id != -1)
+			hlist_del(&ns->ns_hash_node);
+		spin_unlock_irqrestore(&ns_hash_lock, flags);
+		free_nsproxy(ns);
+	} else {
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL_GPL(put_nsproxy);
+
+/*
  * Called from unshare. Unshare all the namespaces part of nsproxy.
  * On sucess, returns the new nsproxy and a reference to old nsproxy
  * to make sure it stays around.
@@ -211,6 +262,212 @@ static inline struct nsproxy *ns_hash_fi
 	return NULL;
 }
 
+struct nsproxy *find_nsproxy_by_id(int id)
+{
+ 	struct nsproxy *ns;
+
+ 	if (id < 0)
+ 		return NULL;
+
+ 	spin_lock_irq(&ns_hash_lock);
+ 	ns = ns_hash_find(id);
+	spin_unlock_irq(&ns_hash_lock);
+
+ 	return ns;
+}
+
+EXPORT_SYMBOL_GPL(find_nsproxy_by_id);
+
+static int bind_ns(int id, struct nsproxy *ns)
+{
+ 	struct nsproxy *prev;
+	int ret = 0;
+
+ 	if (id < 0)
+ 		return -EINVAL;
+
+ 	spin_lock_irq(&ns_hash_lock);
+ 	prev = ns_hash_find(id);
+ 	if (!prev) {
+		ns->id = id;
+		hlist_add_head(&ns->ns_hash_node, ns_hash_head(ns->id));
+	}
+	spin_unlock_irq(&ns_hash_lock);
+
+	if (prev) {
+		ret = -EBUSY;
+		put_nsproxy(prev);
+	}
+ 	return ret;
+}
+
+static int switch_ns(int id, unsigned long flags)
+{
+	int err = 0;
+	struct nsproxy *ns = NULL, *old_ns = NULL, *new_ns = NULL;
+
+	if (flags & ~NS_ALL)
+		return -EINVAL;
+
+	/* Let 0 be a default value ? */
+	if (!flags)
+		flags = NS_ALL;
+
+	if (id < 0) {
+		struct task_struct *p;
+
+		err = -ESRCH;
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(-id);
+		if (p) {
+			task_lock(p);
+			get_nsproxy(p->nsproxy);
+			ns = p->nsproxy;
+			task_unlock(p);
+		}
+		read_unlock(&tasklist_lock);
+	} else {
+		err = -ENOENT;
+		spin_lock_irq(&ns_hash_lock);
+		ns = ns_hash_find(id);
+		spin_unlock_irq(&ns_hash_lock);
+	}
+
+	if (!ns)
+		goto out;
+
+	new_ns = ns;
+
+	/*
+	 * clone current nsproxy and populate it with 
...

Re: - merge-sys_clone-sys_unshare-nsproxy-and-namespace.patch removed from -mm tree [message #18998 is a reply to message #18994] Mon, 18 June 2007 17:00 Go to previous messageGo to next message
Cedric Le Goater is currently offline  Cedric Le Goater
Messages: 443
Registered: February 2006
Senior Member
Herbert Poetzl wrote:
> On Mon, Jun 18, 2007 at 02:02:19PM +0200, Cedric Le Goater wrote:
>>> on Linux-VServer,we have accounting for those
>>> proxies (and several other namespace related stuff)
>>> because we already suspected leakage and reference
>>> bugs in this area some time ago ... btw, I also
>>> suggested to put a similar functionality in mainline
>>> for the time being, but it was ignored, as usual ...
>> something like a kmem_cache ? 
> 
> maybe, but even simplest accounting of the
> different 'objects' would be more then enough
> for the test phase (or maybe as statistics :)

Sure but Linux-VServer would certainly beneficiate from a nsproxy 
cache. Some scenarii do a lot of unshare(), nop ?

Anyway, i wouldn't know how to measure the difference :) but 
it certainly helped me to identify the leak real fast !

>> and we are not ignoring vserver ! :)
> 
> good for them, our project is called Linux-VServer :)

arg :) 

Cheers,

C.
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
[PATCH] create_new_namespaces: fix improper return of NULL [message #19032 is a reply to message #18978] Tue, 19 June 2007 13:51 Go to previous message
Oleg Nesterov is currently offline  Oleg Nesterov
Messages: 143
Registered: August 2006
Senior Member
Untested.

dup_mnt_ns() and clone_uts_ns() return NULL on failure. This is wrong,
create_new_namespaces() uses ERR_PTR() to catch an error. This means
that the subsequent create_new_namespaces() will hit BUG_ON() in
copy_mnt_ns() or copy_utsname().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- ns/fs/namespace.c~1_NS_NULL	2007-05-21 13:57:56.000000000 +0400
+++ ns/fs/namespace.c	2007-06-19 17:26:35.000000000 +0400
@@ -1457,7 +1457,7 @@ static struct mnt_namespace *dup_mnt_ns(
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	atomic_set(&new_ns->count, 1);
 	INIT_LIST_HEAD(&new_ns->list);
@@ -1471,7 +1471,7 @@ static struct mnt_namespace *dup_mnt_ns(
 	if (!new_ns->root) {
 		up_write(&namespace_sem);
 		kfree(new_ns);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 	spin_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
--- ns/kernel/utsname.c~1_NS_NULL	2007-05-21 13:57:59.000000000 +0400
+++ ns/kernel/utsname.c	2007-06-19 17:35:22.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/version.h>
+#include <linux/err.h>
 
 /*
  * Clone a new ns copying an original utsname, setting refcount to 1
@@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_n
 	struct uts_namespace *ns;
 
 	ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
-	if (ns) {
-		memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
-		kref_init(&ns->kref);
-	}
+	if (!ns)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+	kref_init(&ns->kref);
 	return ns;
 }
 

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Previous Topic: Re: New pid namespaces patches testing
Next Topic: Re: [PATCH] create_new_namespaces: fix improper return of NULL
Goto Forum:
  


Current Time: Mon Oct 14 14:45:47 GMT 2024

Total time taken to generate the page: 0.05654 seconds