OpenVZ Forum


Home » Mailing lists » Devel » [PATCH 00/10] memcg: Kernel Memory Accounting.
[PATCH 05/10] memcg: Slab accounting. [message #45786 is a reply to message #45782] Mon, 27 February 2012 22:58 Go to previous messageGo to previous message
Suleiman Souhlal is currently offline  Suleiman Souhlal
Messages: 64
Registered: February 2012
Member
Introduce per-cgroup kmem_caches for memcg slab accounting, that
get created the first time we do an allocation of that type in the
cgroup.
If we are not permitted to sleep in that allocation, the cache
gets created asynchronously.
The cgroup cache gets used in subsequent allocations, and permits
accounting of slab on a per-page basis.

The per-cgroup kmem_caches get looked up at slab allocation time,
in a MAX_KMEM_CACHE_TYPES-sized array in the memcg structure, based
on the original kmem_cache's id, which gets allocated when the original
cache gets created.

Allocations that cannot be attributed to a cgroup get charged to
the root cgroup.

Each cgroup kmem_cache has a refcount that dictates the lifetime
of the cache: We destroy a cgroup cache when its cgroup has been
destroyed and there are no more active objects in the cache.

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
---
include/linux/memcontrol.h | 30 ++++-
include/linux/slab.h | 1 +
include/linux/slab_def.h | 94 +++++++++++++-
mm/memcontrol.c | 316 ++++++++++++++++++++++++++++++++++++++++++++
mm/slab.c | 266 +++++++++++++++++++++++++++++++++----
5 files changed, 680 insertions(+), 27 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d34356..f5458b0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -421,13 +421,41 @@ struct sock;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
void sock_update_memcg(struct sock *sk);
void sock_release_memcg(struct sock *sk);
-#else
+struct kmem_cache *mem_cgroup_get_kmem_cache(struct kmem_cache *cachep,
+ gfp_t gfp);
+bool mem_cgroup_charge_slab(struct kmem_cache *cachep, gfp_t gfp, size_t size);
+void mem_cgroup_uncharge_slab(struct kmem_cache *cachep, size_t size);
+void mem_cgroup_flush_cache_create_queue(void);
+void mem_cgroup_remove_child_kmem_cache(struct kmem_cache *cachep, int id);
+#else /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
static inline void sock_update_memcg(struct sock *sk)
{
}
static inline void sock_release_memcg(struct sock *sk)
{
}
+
+static inline bool
+mem_cgroup_charge_slab(struct kmem_cache *cachep, gfp_t gfp, size_t size)
+{
+ return true;
+}
+
+static inline void
+mem_cgroup_uncharge_slab(struct kmem_cache *cachep, size_t size)
+{
+}
+
+static inline struct kmem_cache *
+mem_cgroup_get_kmem_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+ return cachep;
+}
+
+static inline void
+mem_cgroup_flush_cache_create_queue(void)
+{
+}
#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
#endif /* _LINUX_MEMCONTROL_H */

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 573c809..fe21a91 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -21,6 +21,7 @@
#define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */
#define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */
#define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */
+#define SLAB_MEMCG 0x00008000UL /* memcg kmem_cache */
#define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */
#define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */
/*
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index fbd1117..449a0de 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -41,6 +41,10 @@ struct kmem_cache {
/* force GFP flags, e.g. GFP_DMA */
gfp_t gfpflags;

+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ int id; /* id used for slab accounting */
+#endif
+
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
struct kmem_cache *slabp_cache;
@@ -51,7 +55,7 @@ struct kmem_cache {
void (*ctor)(void *obj);

/* 4) cache creation/removal */
- const char *name;
+ char *name;
struct list_head next;

/* 5) statistics */
@@ -78,9 +82,26 @@ struct kmem_cache {
* variables contain the offset to the user object and its size.
*/
int obj_offset;
- int obj_size;
#endif /* CONFIG_DEBUG_SLAB */

+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
+ int obj_size;
+#endif
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ /* Original cache parameters, used when creating a memcg cache */
+ size_t orig_align;
+ unsigned long orig_flags;
+
+ struct mem_cgroup *memcg;
+
+ /* Who we copied from when creating cpuset cache */
+ struct kmem_cache *orig_cache;
+
+ atomic_t refcnt;
+ struct list_head destroyed_list; /* Used when deleting cpuset cache */
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
/* 6) per-cpu/per-node data, touched during every alloc/free */
/*
* We put array[] at the end of kmem_cache, because we want to size
@@ -212,4 +233,73 @@ found:

#endif /* CONFIG_NUMA */

+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+
+#define MAX_KMEM_CACHE_TYPES 300
+
+struct kmem_cache *kmem_cache_create_memcg(struct kmem_cache *cachep,
+ char *name);
+void kmem_cache_destroy_cpuset(struct kmem_cache *cachep);
+void kmem_cache_drop_ref(struct kmem_cache *cachep);
+
+static inline void
+kmem_cache_get_ref(struct kmem_cache *cachep)
+{
+ if ((cachep->flags & SLAB_MEMCG) &&
+ unlikely(!atomic_add_unless(&cachep->refcnt, 1, 0)))
+ BUG();
+}
+
+static inline void
+mem_cgroup_put_kmem_cache(struct kmem_cache *cachep)
+{
+ rcu_read_unlock();
+}
+
+static inline void
+mem_cgroup_kmem_cache_prepare_sleep(struct kmem_cache *cachep)
+{
+ /*
+ * Make sure the cache doesn't get freed while we have interrupts
+ * enabled.
+ */
+ kmem_cache_get_ref(cachep);
+ rcu_read_unlock();
+}
+
+static inline void
+mem_cgroup_kmem_cache_finish_sleep(struct kmem_cache *cachep)
+{
+ rcu_read_lock();
+ kmem_cache_drop_ref(cachep);
+}
+
+#else /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
+static inline void
+kmem_cache_get_ref(struct kmem_cache *cachep)
+{
+}
+
+static inline void
+kmem_cache_drop_ref(struct kmem_cache *cachep)
+{
+}
+
+static inline void
+mem_cgroup_put_kmem_cache(struct kmem_cache *cachep)
+{
+}
+
+static inline void
+mem_cgroup_kmem_cache_prepare_sleep(struct kmem_cache *cachep)
+{
+}
+
+static inline void
+mem_cgroup_kmem_cache_finish_sleep(struct kmem_cache *cachep)
+{
+}
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
#endif /* _LINUX_SLAB_DEF_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c82ca1c..d1c0cd7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -297,6 +297,11 @@ struct mem_cgroup {
#ifdef CONFIG_INET
struct tcp_memcontrol tcp_mem;
#endif
+
+#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_SLAB)
+ /* Slab accounting */
+ struct kmem_cache *slabs[MAX_KMEM_CACHE_TYPES];
+#endif
int independent_kmem_limit;
};

@@ -5633,6 +5638,312 @@ memcg_uncharge_kmem(struct mem_cgroup *memcg, long long delta)
res_counter_uncharge(&memcg->res, delta);
}

+#ifdef CONFIG_SLAB
+static struct kmem_cache *
+memcg_create_kmem_cache(struct mem_cgroup *memcg, int idx,
+ struct kmem_cache *cachep, gfp_t gfp)
+{
+ struct kmem_cache *new_cachep;
+ struct dentry *dentry;
+ char *name;
+ int len;
+
+ if ((gfp & GFP_KERNEL) != GFP_KERNEL)
+ return cachep;
+
+ dentry = memcg->css.cgroup->dentry;
+ BUG_ON(dentry == NULL);
+ len = strlen(cachep->name);
+ len += dentry->d_name.len;
+ len += 7; /* Space for "()", NUL and appending "dead" */
+ name = kmalloc(len, GFP_KERNEL | __GFP_NOACCOUNT);
+
+ if (name == NULL)
+ return cachep;
+
+ snprintf(name, len, "%s(%s)", cachep->name,
+ dentry ? (const char *)dentry->d_name.name : "/");
+ name[len - 5] = '\0'; /* Make sure we can append "dead" later */
+
+ new_cachep = kmem_cache_create_memcg(cachep, name);
+
+ /*
+ * Another CPU is creating the same cache?
+ * We'll use it next time.
+ */
+ if (new_cachep == NULL) {
+ kfree(name);
+ return cachep;
+ }
+
+ new_cachep->memcg = memcg;
+
+ /*
+ * Make sure someone else hasn't created the new cache in the
+ * meantime.
+ * This should behave as a write barrier, so we should be fine
+ * with RCU.
+ */
+ if (cmpxchg(&memcg->slabs[idx], NULL, new_cachep) != NULL) {
+ kmem_cache_destroy(new_cachep);
+ return cachep;
+ }
+
+ return new_cachep;
+}
+
+struct create_work {
+ struct mem_cgroup *memcg;
+ struct kmem_cache *cachep;
+ struct list_head list;
+};
+
+static DEFINE_SPINLOCK(create_queue_lock);
+static LIST_HEAD(create_queue);
+
+/*
+ * Flush the queue of kmem_caches to create, because we're creating a cgroup.
+ *
+ * We might end up flushing other cgroups' creation requests as well, but
+ * they will just get queued again next time someone tries to make a slab
+ * allocation for them.
+ */
+void
+mem_cgroup_flush_cache_create_queue(void)
+{
+ struct create_work *cw, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&create_queue_lock, flags);
+ list_for_each_entry_safe(cw, tmp, &create_queue, list) {
+ list_del(&cw->list);
+ kfree(cw);
+ }
+ spin_unlock_irqrestore(&create_queue_lock, flags);
+}
+
+static void
+memcg_create_cache_work_func(struct work_struct *w)
+{
+ struct kmem_cache *cachep;
+ struct create_work *cw;
+
+ spin_lock_irq(&create_queue_lock);
+ while (!list_empty(&create_queue)) {
+ cw = list_first_entry(&create_queue, struct create_work, list);
+ list_del(&cw->list);
+ spin_unlock_irq(&create_queue_lock);
+ cachep = memcg_create_kmem_cache(cw->memcg, cw->cachep->id,
+ cw->cachep, GFP_KERNEL);
+ if (cachep == NULL && printk_ratelimit())
+ printk(KERN_ALERT "%s: Couldn't create memcg-cache for"
+ " %s memcg %s\n", __func__, cw->cachep->name,
+ cw->memcg->css.cgroup->dentry->d_name.name);
+ kfree(cw);
+ spin_lock_irq(&create_queue_lock);
+ }
+ spin_unlock_irq(&create_queue_lock);
+}
+
+static DECLARE_WORK(memcg_create_cache_work, memcg_create_cache_work_func);
+
+static void
+memcg_create_cache_enqueue(struct mem_cgroup *memcg, struct kmem_cache *cachep)
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: Re: [PATCH 01/10] memcg: Kernel memory accounting infrastructure.
Next Topic: Re: [PATCH 00/10] memcg: Kernel Memory Accounting.
Goto Forum:
  


Current Time: Fri Aug 15 01:21:39 GMT 2025

Total time taken to generate the page: 0.26593 seconds