This patch adds the following files to the container filesystem:
notify_on_release - configures/reports whether the container subsystem
should attempt to run a release script when this container becomes
unused
release_agent - configures/reports the release agent to be used for
this hierarchy (top level in each hierarchy only)
releasable - reports whether this container would have been
auto-released if notify_on_release was true and a release agent was
configured (mainly useful for debugging)
To avoid locking issues, invoking the userspace release agent is done
via a workqueue task; containers that need to have their release
agents invoked by the workqueue task are linked on to a list.
When the "cpuset" filesystem is mounted, it automatically sets the
hierarchy's release agent to be /sbin/cpuset_release_agent for
backward-compatibility with existing cpusets users.
Signed-off-by: Paul Menage <menage@google.com>
---
include/linux/container.h | 15 +
kernel/container.c | 364 ++++++++++++++++++++++++++++++++++++++++++----
kernel/cpuset.c | 5
3 files changed, 348 insertions(+), 36 deletions(-)
Index: container-2.6.22-rc2-mm1/include/linux/container.h
============================================================ =======
--- container-2.6.22-rc2-mm1.orig/include/linux/container.h
+++ container-2.6.22-rc2-mm1/include/linux/container.h
@@ -64,11 +64,7 @@ static inline void css_get(struct contai
* css_put() should be called to release a reference taken by
* css_get()
*/
-
-static inline void css_put(struct container_subsys_state *css)
-{
- atomic_dec(&css->refcnt);
-}
+void css_put(struct container_subsys_state *css);
struct container {
unsigned long flags; /* "unsigned long" so bitops work */
@@ -99,6 +95,13 @@ struct container {
* tasks in this container. Protected by css_group_lock
*/
struct list_head css_groups;
+
+ /*
+ * Linked list running through all containers that can
+ * potentially be reaped by the release agent. Protected by
+ * container_mutex
+ */
+ struct list_head release_list;
};
/* A css_group is a structure holding pointers to a set of
@@ -271,6 +274,8 @@ struct task_struct *container_iter_next(
struct container_iter *it);
void container_iter_end(struct container *cont, struct container_iter *it);
+void container_set_release_agent_path(struct container_subsys *ss,
+ const char *path);
#else /* !CONFIG_CONTAINERS */
Index: container-2.6.22-rc2-mm1/kernel/container.c
============================================================ =======
--- container-2.6.22-rc2-mm1.orig/kernel/container.c
+++ container-2.6.22-rc2-mm1/kernel/container.c
@@ -62,6 +62,8 @@
#define CONTAINER_SUPER_MAGIC 0x27e0eb
+static DEFINE_MUTEX(container_mutex);
+
/* Generate an array of container subsystem pointers */
#define SUBSYS(_x) &_x ## _subsys,
@@ -89,6 +91,13 @@ struct containerfs_root {
/* A list running through the mounted hierarchies */
struct list_head root_list;
+
+ /* The path to use for release notifications. No locking
+ * between setting and use - so if userspace updates this
+ * while subcontainers exist, you could miss a
+ * notification. We ensure that it's always a valid
+ * NUL-terminated string */
+ char release_agent_path[PATH_MAX];
};
@@ -115,7 +124,13 @@ static int need_forkexit_callback = 0;
/* bits in struct container flags field */
typedef enum {
+ /* Container is dead */
CONT_REMOVED,
+ /* Container has previously had a child container or a task,
+ * but no longer (only if CONT_NOTIFY_ON_RELEASE is set) */
+ CONT_RELEASABLE,
+ /* Container requires release notifications to userspace */
+ CONT_NOTIFY_ON_RELEASE,
} container_flagbits_t;
/* convenient tests for these bits */
@@ -124,6 +139,19 @@ inline int container_is_removed(const st
return test_bit(CONT_REMOVED, &cont->flags);
}
+inline int container_is_releasable(const struct container *cont)
+{
+ const int bits =
+ (1 << CONT_RELEASABLE) |
+ (1 << CONT_NOTIFY_ON_RELEASE);
+ return (cont->flags & bits) == bits;
+}
+
+inline int notify_on_release(const struct container *cont)
+{
+ return test_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
+}
+
/* for_each_subsys() allows you to iterate on each subsystem attached to
* an active hierarchy */
#define for_each_subsys(_root, _ss) \
@@ -133,6 +161,12 @@ list_for_each_entry(_ss, &_root->subsys_
#define for_each_root(_root) \
list_for_each_entry(_root, &roots, root_list)
+/* the list of containers eligible for automatic release */
+static LIST_HEAD(release_list);
+static void container_release_agent(struct work_struct *work);
+static DECLARE_WORK(release_agent_work, container_release_agent);
+static void check_for_release(struct container *cont);
+
/* Link structure for associating css_group objects with containers */
struct cg_container_link {
/*
@@ -181,11 +215,8 @@ static int css_group_count;
/*
* unlink a css_group from the list and free it
*/
-static void release_css_group(struct kref *k)
+static void unlink_css_group(struct css_group *cg)
{
- struct css_group *cg =
- container_of(k, struct css_group, ref);
- int i;
write_lock(&css_group_lock);
list_del(&cg->list);
css_group_count--;
@@ -198,8 +229,47 @@ static void release_css_group(struct kre
kfree(link);
}
write_unlock(&css_group_lock);
+}
+
+static void release_css_group(struct kref *k)
+{
+ int i;
+ struct css_group *cg = container_of(k, struct css_group, ref);
+ BUG_ON(!mutex_is_locked(&container_mutex));
+
+ unlink_css_group(cg);
for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
- atomic_dec(&cg->subsys[i]->container->count);
+ struct container *cont = cg->subsys[i]->container;
+ if (atomic_dec_and_test(&cont->count) &&
+ container_is_releasable(cont)) {
+ check_for_release(cont);
+ }
+ }
+ kfree(cg);
+}
+
+/*
+ * In the task exit path we want to avoid taking container_mutex
+ * unless absolutely necessary, so the release process is slightly
+ * different.
+ */
+static void release_css_group_taskexit(struct kref *k)
+{
+ int i;
+ struct css_group *cg = container_of(k, struct css_group, ref);
+ unlink_css_group(cg);
+ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
+ struct container *cont = cg->subsys[i]->container;
+ if (notify_on_release(cont)) {
+ mutex_lock(&container_mutex);
+ set_bit(CONT_RELEASABLE, &cont->flags);
+ if (atomic_dec_and_test(&cont->count)) {
+ check_for_release(cont);
+ }
+ mutex_unlock(&container_mutex);
+ } else {
+ atomic_dec(&cont->count);
+ }
}
kfree(cg);
}
@@ -217,6 +287,11 @@ static inline void put_css_group(struct
kref_put(&cg->ref, release_css_group);
}
+static inline void put_css_group_taskexit(struct css_group *cg)
+{
+ kref_put(&cg->ref, release_css_group_taskexit);
+}
+
/*
* find_existing_css_group() is a helper for
* find_css_group(), and checks to see whether an existing
@@ -446,8 +521,6 @@ static struct css_group *find_css_group(
* update of a tasks container pointer by attach_task()
*/
-static DEFINE_MUTEX(container_mutex);
-
/**
* container_lock - lock out any changes to container structures
*
@@ -795,6 +868,7 @@ static int container_fill_super(struct s
root->d_fsdata = &hroot->top_container;
hroot->top_container.dentry = root;
+ strcpy(hroot->release_agent_path, "");
sb->s_fs_info = hroot;
hroot->sb = sb;
@@ -811,6 +885,7 @@ static void init_container_root(struct c
INIT_LIST_HEAD(&cont->sibling);
INIT_LIST_HEAD(&cont->children);
INIT_LIST_HEAD(&cont->css_groups);
+ INIT_LIST_HEAD(&cont->release_list);
list_add(&root->root_list, &roots);
root_count++;
}
@@ -1057,7 +1132,7 @@ static int attach_task(struct container
ss->attach(ss, cont, oldcont, tsk);
}
}
-
+ set_bit(CONT_RELEASABLE, &oldcont->flags);
synchronize_rcu();
put_css_group(cg);
return 0;
@@ -1109,6 +1184,9 @@ typedef enum {
FILE_ROOT,
FILE_DIR,
FILE_TASKLIST,
+ FILE_NOTIFY_ON_RELEASE,
+ FILE_RELEASABLE,
+ FILE_RELEASE_AGENT,
} container_filetype_t;
static ssize_t container_common_file_write(struct container *cont,
@@ -1145,6 +1223,28 @@ static ssize_t container_common_file_wri
case FILE_TASKLIST:
retval = attach_task_by_pid(cont, buffer);
break;
+ case FILE_NOTIFY_ON_RELEASE:
+ clear_bit(CONT_RELEASABLE, &cont->flags);
+ if (simple_strtoul(buffer, NULL, 10) != 0)
+ set_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
+ else
+ clear_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
+ break;
+ case FILE_RELEASE_AGENT:
+ {
+ struct containerfs_root *root = cont->root;
+ if (nbytes < sizeof(root->release_agent_path)) {
+ /* We never write anything other than '\0'
+ * into the last char of release_agent_path,
+ * so it always remains a NUL-terminated
+ * string */
+ strncpy(root->release_agent_path, buffer, nbytes);
+ root->release_agent_path[nbytes] = 0;
+ } else {
+ retval = -ENOSPC;
+ }
+ break;
+ }
default:
retval = -EINVAL;
goto out2;
@@ -1183,6 +1283,49 @@ static ssize_t container_read_uint(struc
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
+static ssize_t container_common_file_read(struct container *cont,
+ struct cftype *cft,
+ struct file *file,
+ char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ container_filetype_t type = cft->private;
+ char *page;
+ ssize_t retval = 0;
+ char *s;
+
+ if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+
+ s = page;
+
+ switch (type) {
+ case FILE_RELEASE_AGENT:
+ {
+ struct containerfs_root *root;
+ size_t n;
+ mutex_lock(&container_mutex);
+ root = cont->root;
+ n = strnlen(root->release_agent_path,
+ sizeof(root->release_ag
...
|