Home » Mailing lists » Devel » [PATCH 08/10] Containers(V10): Share css_group arrays between tasks with same container memberships
[PATCH 08/10] Containers(V10): Share css_group arrays between tasks with same container memberships [message #13539] |
Tue, 29 May 2007 13:01 |
Paul Menage
Messages: 642 Registered: September 2006
|
Senior Member |
|
|
This patch replaces the struct css_group embedded in task_struct with
a pointer; all tasks that have the same set of memberships across all
hierarchies will share a css_group object, and will be linked via
their css_groups field to the "tasks" list_head in the css_group.
Assuming that many tasks share the same container assignments, this
reduces overall space usage and keeps the size of the task_struct down
(three pointers added to task_struct compared to a non-containers
kernel, no matter how many subsystems are registered).
Signed-off-by: Paul Menage <menage@google.com>
---
Documentation/containers.txt | 14 +
include/linux/container.h | 93 ++++++-
include/linux/sched.h | 33 --
kernel/container.c | 524 ++++++++++++++++++++++++++++++++++++-------
kernel/cpuset.c | 15 -
5 files changed, 553 insertions(+), 126 deletions(-)
Index: container-2.6.22-rc2-mm1/include/linux/container.h
============================================================ =======
--- container-2.6.22-rc2-mm1.orig/include/linux/container.h
+++ container-2.6.22-rc2-mm1/include/linux/container.h
@@ -29,6 +29,14 @@ extern void container_unlock(void);
struct containerfs_root;
+/* Define the enumeration of all container subsystems */
+#define SUBSYS(_x) _x ## _subsys_id,
+enum container_subsys_id {
+#include <linux/container_subsys.h>
+ CONTAINER_SUBSYS_COUNT
+};
+#undef SUBSYS
+
/* Per-subsystem/per-container state maintained by the system. */
struct container_subsys_state {
/* The container that this subsystem is attached to. Useful
@@ -85,6 +93,54 @@ struct container {
struct containerfs_root *root;
struct container *top_container;
+
+ /*
+ * List of cg_container_links pointing at css_groups with
+ * tasks in this container. Protected by css_group_lock
+ */
+ struct list_head css_groups;
+};
+
+/* A css_group is a structure holding pointers to a set of
+ * container_subsys_state objects. This saves space in the task struct
+ * object and speeds up fork()/exit(), since a single inc/dec and a
+ * list_add()/del() can bump the reference count on the entire
+ * container set for a task.
+ */
+
+struct css_group {
+
+ /* Reference count */
+ struct kref ref;
+
+ /*
+ * List running through all container groups. Protected by
+ * css_group_lock
+ */
+ struct list_head list;
+
+ /*
+ * List running through all tasks using this container
+ * group. Protected by css_group_lock
+ */
+ struct list_head tasks;
+
+ /*
+ * List of cg_container_link objects on link chains from
+ * containers referenced from this css_group. Protected by
+ * css_group_lock
+ */
+ struct list_head cg_links;
+
+ /* Set of subsystem states, one for each subsystem. NULL for
+ * subsystems that aren't part of this hierarchy. These
+ * pointers reduce the number of dereferences required to get
+ * from a task to its state for a given container, but result
+ * in increased space usage if tasks are in wildly different
+ * groupings across different hierarchies. This array is
+ * immutable after creation */
+ struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT];
+
};
/* struct cftype:
@@ -111,6 +167,10 @@ struct cftype {
ssize_t (*read) (struct container *cont, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos);
+ /*
+ * read_uint() is a shortcut for the common case of returning a
+ * single integer. Use it in place of read()
+ */
u64 (*read_uint) (struct container *cont, struct cftype *cft);
ssize_t (*write) (struct container *cont, struct cftype *cft,
struct file *file,
@@ -131,15 +191,7 @@ int container_is_removed(const struct co
int container_path(const struct container *cont, char *buf, int buflen);
-int __container_task_count(const struct container *cont);
-static inline int container_task_count(const struct container *cont)
-{
- int task_count;
- rcu_read_lock();
- task_count = __container_task_count(cont);
- rcu_read_unlock();
- return task_count;
-}
+int container_task_count(const struct container *cont);
/* Return true if the container is a descendant of the current container */
int container_is_descendant(const struct container *cont);
@@ -186,7 +238,7 @@ static inline struct container_subsys_st
static inline struct container_subsys_state *task_subsys_state(
struct task_struct *task, int subsys_id)
{
- return rcu_dereference(task->containers.subsys[subsys_id]);
+ return rcu_dereference(task->containers->subsys[subsys_id]);
}
static inline struct container* task_container(struct task_struct *task,
@@ -199,6 +251,27 @@ int container_path(const struct containe
int container_clone(struct task_struct *tsk, struct container_subsys *ss);
+/* A container_iter should be treated as an opaque object */
+struct container_iter {
+ struct list_head *cg_link;
+ struct list_head *task;
+};
+
+/* To iterate across the tasks in a container:
+ *
+ * 1) call container_iter_start to intialize an iterator
+ *
+ * 2) call container_iter_next() to retrieve member tasks until it
+ * returns NULL or until you want to end the iteration
+ *
+ * 3) call container_iter_end() to destroy the iterator.
+ */
+void container_iter_start(struct container *cont, struct container_iter *it);
+struct task_struct *container_iter_next(struct container *cont,
+ struct container_iter *it);
+void container_iter_end(struct container *cont, struct container_iter *it);
+
+
#else /* !CONFIG_CONTAINERS */
static inline int container_init_early(void) { return 0; }
Index: container-2.6.22-rc2-mm1/include/linux/sched.h
============================================================ =======
--- container-2.6.22-rc2-mm1.orig/include/linux/sched.h
+++ container-2.6.22-rc2-mm1/include/linux/sched.h
@@ -849,34 +849,6 @@ struct sched_class {
void (*task_new) (struct rq *rq, struct task_struct *p);
};
-#ifdef CONFIG_CONTAINERS
-
-#define SUBSYS(_x) _x ## _subsys_id,
-enum container_subsys_id {
-#include <linux/container_subsys.h>
- CONTAINER_SUBSYS_COUNT
-};
-#undef SUBSYS
-
-/* A css_group is a structure holding pointers to a set of
- * container_subsys_state objects.
- */
-
-struct css_group {
-
- /* Set of subsystem states, one for each subsystem. NULL for
- * subsystems that aren't part of this hierarchy. These
- * pointers reduce the number of dereferences required to get
- * from a task to its state for a given container, but result
- * in increased space usage if tasks are in wildly different
- * groupings across different hierarchies. This array is
- * immutable after creation */
- struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT];
-
-};
-
-#endif /* CONFIG_CONTAINERS */
-
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -1133,7 +1105,10 @@ struct task_struct {
int cpuset_mem_spread_rotor;
#endif
#ifdef CONFIG_CONTAINERS
- struct css_group containers;
+ /* Container info protected by css_group_lock */
+ struct css_group *containers;
+ /* cg_list protected by css_group_lock and tsk->alloc_lock */
+ struct list_head cg_list;
#endif
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
Index: container-2.6.22-rc2-mm1/kernel/container.c
============================================================ =======
--- container-2.6.22-rc2-mm1.orig/kernel/container.c
+++ container-2.6.22-rc2-mm1/kernel/container.c
@@ -101,6 +101,7 @@ static struct containerfs_root rootnode;
/* The list of hierarchy roots */
static LIST_HEAD(roots);
+static int root_count;
/* dummytop is a shorthand for the dummy hierarchy's top container */
#define dummytop (&rootnode.top_container)
@@ -132,12 +133,43 @@ list_for_each_entry(_ss, &_root->subsys_
#define for_each_root(_root) \
list_for_each_entry(_root, &roots, root_list)
-/* Each task_struct has an embedded css_group, so the get/put
- * operation simply takes a reference count on all the containers
- * referenced by subsystems in this css_group. This can end up
- * multiple-counting some containers, but that's OK - the ref-count is
- * just a busy/not-busy indicator; ensuring that we only count each
- * container once would require taking a global lock to ensure that no
+/* Link structure for associating css_group objects with containers */
+struct cg_container_link {
+ /*
+ * List running through cg_container_links associated with a
+ * container, anchored on container->css_groups
+ */
+ struct list_head cont_link_list;
+ /*
+ * List running through cg_container_links pointing at a
+ * single css_group object, anchored on css_group->cg_links
+ */
+ struct list_head cg_link_list;
+ struct css_group *cg;
+};
+
+/* The default css_group - used by init and its children prior to any
+ * hierarchies being mounted. It contains a pointer to the root state
+ * for each subsystem. Also used to anchor the list of css_groups. Not
+ * reference-counted, to improve performance when child containers
+ * haven't been created.
+ */
+
+static struct css_group init_css_group;
+static struct cg_container_link init_css_group_link;
+
+/* css_group_lock protects the list of css_group objects, and the
+ * chain of tasks off each css_group. Nests inside task->alloc_lock */
+static DEFINE_RWLOCK(css_group_lock);
+static int css_group_count;
+
+
+/* When we create or destroy a css_group, the operation simply
+ * takes/releases a reference count on all the containers referenced
+ * by subsystems in this css_group. This can end up multiple-counting
+ * some containers, but that's OK - the ref-count is just a
+ * busy/not-busy indicator; ensuring that we only count each container
+ * once would require taking a global lock to ensure that no
* subsystems moved between hierarchies while we were doing so.
*
* Possible TODO: decide at boot time based on the number of
@@ -146,20 +178,218 @@ list_for_each_entry(_root, &roots, root_
* take a global lock and only
...
|
|
|
Goto Forum:
Current Time: Sat Jul 26 01:20:19 GMT 2025
Total time taken to generate the page: 0.22239 seconds
|