OpenVZ Forum


Home » Mailing lists » Devel » [RFC][PATCH] Per container statistics
Re: [RFC][PATCH] Per container statistics [message #12917 is a reply to message #12907] Tue, 15 May 2007 07:12 Go to previous messageGo to previous message
dev is currently offline  dev
Messages: 1693
Registered: September 2005
Location: Moscow
Senior Member

Balbir Singh wrote:
> This patch is inspired by the discussion at http://lkml.org/lkml/2007/4/11/187
> and implements per container statistics as suggested by Andrew Morton
> in http://lkml.org/lkml/2007/4/11/263. The patch is on top of 2.6.21-mm1
> with Paul's containers v9 patches (forward ported)
>
> This patch implements per container statistics infrastructure and re-uses
> code from the taskstats interface. A new set of container operations are
> registered with commands and attributes. It should be very easy to
> extend per container statistics, by adding members to the containerstats
> structure.
>
> The current model for containerstats is a pull, a push model (to post
> statistics on interesting events), should be very easy to add. Currently
> user space requests for statistics by passing the container path, if
> a container is found to belong to the specified hierarchy, then statistics
> about the state of all the tasks in the container is returned to user space.
>
> TODO's
>
> 1. All data is shared as filesystem paths, it involves a lot of string
> and path name handling. Simplify and make the mechanism more elegant
> 2. Cache the entire container path instead of just the mount point path
>
> Feedback, comments, test results are always welcome!
>
> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
> ---
>
> Documentation/accounting/containerstats.txt | 27 ++++
> include/linux/Kbuild | 1
> include/linux/container.h | 10 +
> include/linux/containerstats.h | 72 ++++++++++++
> include/linux/delayacct.h | 11 +
> kernel/container.c | 158 +++++++++++++++++++++++++++-
> kernel/cpuset.c | 2
> kernel/sched.c | 4
> kernel/taskstats.c | 70 ++++++++++++
> 9 files changed, 349 insertions(+), 6 deletions(-)
>
> diff -puN kernel/container.c~containers-taskstats kernel/container.c
> --- linux-2.6.21-mm1/kernel/container.c~containers-taskstats 2007-05-11 08:50:29.000000000 +0530
> +++ linux-2.6.21-mm1-balbir/kernel/container.c 2007-05-14 22:32:05.000000000 +0530
> @@ -59,8 +59,11 @@
> #include <asm/uaccess.h>
> #include <asm/atomic.h>
> #include <linux/mutex.h>
> +#include <linux/delayacct.h>
> +#include <linux/containerstats.h>
>
> #define CONTAINER_SUPER_MAGIC 0x27e0eb
> +#define MAX_MNT_PATH_LEN 256
>
> /* Generate an array of container subsystem pointers */
> #define SUBSYS(_x) &_x ## _subsys,
> @@ -89,6 +92,10 @@ struct containerfs_root {
>
> /* A list running through the mounted hierarchies */
> struct list_head root_list;
> +
> + char *mnt_name;
> + int mnt_name_len;
> + struct vfsmount *mnt;
> };
>
>
> @@ -574,6 +581,9 @@ static void container_put_super(struct s
> ret = rebind_subsystems(root, 0);
> BUG_ON(ret);
>
> + kfree(root->mnt_name);
> + root->mnt_name_len = -1;
> + root->mnt = NULL;
> kfree(root);
> mutex_unlock(&container_mutex);
> }
> @@ -694,7 +704,8 @@ static int container_fill_super(struct s
> return 0;
> }
>
> -static void init_container_root(struct containerfs_root *root) {
> +static void init_container_root(struct containerfs_root *root)
> +{
> struct container *cont = &root->top_container;
> INIT_LIST_HEAD(&root->subsys_list);
> root->number_of_containers = 1;
> @@ -705,6 +716,44 @@ static void init_container_root(struct c
> list_add(&root->root_list, &roots);
> }
>
> +int container_get_mnt_path(struct containerfs_root *root)
> +{
> + char *start;
> + char *buf = root->mnt_name;
> + int buflen = MAX_MNT_PATH_LEN;
> + struct vfsmount *mnt = root->mnt;
> + struct dentry *dentry;
> + int len;
> +
> + if (root->mnt_name_len > 0)
> + return 0;
> +
> + start = buf + buflen;
> +
> + *--start = '\0';
> + for (;;) {
> + dentry = mnt->mnt_mountpoint;
> + do {
> + if (IS_ROOT(dentry) || (dentry == mnt->mnt_root))
> + break;
> + len = dentry->d_name.len;
> + if ((start -= len) < buf)
> + return -ENAMETOOLONG;
> + memcpy(start, dentry->d_name.name, len);
> + dentry = dentry->d_parent;
> + if (--start < buf)
> + return -ENAMETOOLONG;
> + *start = '/';
> + } while (1);
> + mnt = mnt->mnt_parent;
> + if (mnt == mnt->mnt_parent)
> + break;
> + }
> + memmove(buf, start, buf + buflen - start);
> + root->mnt_name_len = (unsigned long)(buf - start) + buflen - 1;
> + return 0;
> +}
> +
> static int container_get_sb(struct file_system_type *fs_type,
> int flags, const char *unused_dev_name,
> void *data, struct vfsmount *mnt)
> @@ -744,6 +793,12 @@ static int container_get_sb(struct file_
> goto out_unlock;
> }
> init_container_root(root);
> + root->mnt_name = kzalloc(MAX_MNT_PATH_LEN, GFP_KERNEL);
> + if (!root->mnt_name) {
> + ret = -ENOMEM;
> + kfree(root);
> + goto out_unlock;
> + }
> }
>
> if (!root->sb) {
> @@ -776,6 +831,7 @@ static int container_get_sb(struct file_
> if (!ret)
> atomic_inc(&root->sb->s_active);
> }
> + root->mnt = mnt;
>
> out_unlock:
> mutex_unlock(&container_mutex);
> @@ -803,11 +859,18 @@ static inline struct cftype *__d_cft(str
> * Returns 0 on success, -errno on error.
> */
>
> -int container_path(const struct container *cont, char *buf, int buflen)
> +int container_path(const struct container *cont, char *buf, int buflen,
> + bool absolute)
> {
> char *start;
> + int ret;
> + struct containerfs_root *root;
> +
> + if (!cont)
> + return -EINVAL;
>
> start = buf + buflen;
> + root = cont->root;
>
> *--start = '\0';
> for (;;) {
> @@ -824,6 +887,17 @@ int container_path(const struct containe
> return -ENAMETOOLONG;
> *start = '/';
> }
> + if (!absolute)
> + goto copy_out;
> +
> + ret = container_get_mnt_path(root);
> + if (ret)
> + return -EINVAL;
> +
> + if ((start -= (root->mnt_name_len)) < buf)
> + return -ENAMETOOLONG;
> + memcpy(start, root->mnt_name, root->mnt_name_len);
> +copy_out:
> memmove(buf, start, buf + buflen - start);
> return 0;
> }
> @@ -1264,6 +1338,84 @@ array_full:
> return n;
> }
>
> +static inline char* compress_path(char *path)
> +{
> + char *tmp;
> + char *start;
> +
> + tmp = kmalloc(strlen(path) + 1, GFP_KERNEL);
> + start = tmp;
> + if (!tmp)
> + return NULL;
> +
> + while (*path) {
> + while (*path && *path == '/')
> + path++;
> + *tmp++ = '/';
> + while (*path && *path != '/')
> + *tmp++ = *path++;
> + }
> + *tmp++ = '\0';
> + return start;
> +}
> +
> +/*
> + * Build and fill containerstats so that taskstats can export it to user
> + * space
> + */
> +int containerstats_build(struct containerstats *stats, char *path)
> +{
> + int ret = 0;
> + struct task_struct *g, *p;
> + char *buf;
> + struct container *cont, *root_cont;
> + int subsys_id;
> + struct containerfs_root *root;
> +
> + buf = kmalloc(MAX_MNT_PATH_LEN, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + rcu_read_lock();
> +
> + for_each_root(root) {
> + if (!root->subsys_bits)
> + continue;
> + root_cont = &root->top_container;
> + get_first_subsys(root_cont, NULL, &subsys_id);
> + do_each_thread(g, p) {
> + cont = task_container(p, subsys_id);
> + ret = container_path(cont, buf, MAX_MNT_PATH_LEN, true);
> + if (ret)
> + goto err;
> + if (strncmp(path, buf, strlen(path)) == 0) {
> + switch (p->state) {
> + case TASK_RUNNING:
> + stats->nr_running++;
> + break;
> + case TASK_INTERRUPTIBLE:
> + stats->nr_sleeping++;
> + break;
> + case TASK_UNINTERRUPTIBLE:
> + stats->nr_uninterruptible++;
> + break;
> + case TASK_STOPPED:
> + stats->nr_stopped++;
> + break;
> + default:
> + if (delayacct_is_task_waiting_on_io(p))
> + stats->nr_io_wait++;
> + break;
> + }
> + }
> + } while_each_thread(g, p);

oh, please no... Andrew, this loop can be very very long when having > 10,000 tasks on the machine...
we have had enough such issues in OpenVZ and just don't want to come through this again.
Also sum of RUNNING + UNINTERRUPTIBLE is required for per-container loadavg calculations.

> + }
> +err:
> + rcu_read_unlock();
> + kfree(buf);
> + return ret;
> +}
> +
> static int cmppid(const void *a, const void *b)
> {
> return *(pid_t *)a - *(pid_t *)b;
> @@ -1725,7 +1877,7 @@ static int proc_container_show(struct se
> seq_putc(m, ':');
> get_first_subsys(&root->top_container, NULL, &subsys_id);
> cont = task_container(tsk, subsys_id);
> - retval = container_path(cont, buf, PAGE_SIZE);
> + retval = container_path(cont, buf, PAGE_SIZE, false);
> if (retval < 0)
> goto out_unlock;
> seq_puts(m, buf);
> diff -puN include/linux/container.h~containers-taskstats include/linux/container.h
> --- linux-2.6.21-mm1/include/linux/container.h~containers-taskst ats 2007-05-11 08:50:29.000000000 +0530
> +++ linux-2.6.21-mm1-balbir/include/linu
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: PPC64 2.6.18 kernel compilation fix
Next Topic: [PATCH netdev] "wrong timeout value" in sk_wait_data()
Goto Forum:
  


Current Time: Wed Sep 11 01:29:56 GMT 2024

Total time taken to generate the page: 0.05149 seconds