Home » Mailing lists » Devel » [PATCH 00/23] slab+slub accounting for memcg
Re: [PATCH 17/23] kmem controller charge/uncharge infrastructure [message #46183 is a reply to message #46021] |
Mon, 30 April 2012 20:56   |
Suleiman Souhlal
Messages: 64 Registered: February 2012
|
Member |
|
|
On Sun, Apr 22, 2012 at 4:53 PM, Glauber Costa <glommer@parallels.com> wrote:
> With all the dependencies already in place, this patch introduces
> the charge/uncharge functions for the slab cache accounting in memcg.
>
> Before we can charge a cache, we need to select the right cache.
> This is done by using the function __mem_cgroup_get_kmem_cache().
>
> If we should use the root kmem cache, this function tries to detect
> that and return as early as possible.
>
> The charge and uncharge functions comes in two flavours:
> * __mem_cgroup_(un)charge_slab(), that assumes the allocation is
> a slab page, and
> * __mem_cgroup_(un)charge_kmem(), that does not. This later exists
> because the slub allocator draws the larger kmalloc allocations
> from the page allocator.
>
> In memcontrol.h those functions are wrapped in inline acessors.
> The idea is to later on, patch those with jump labels, so we don't
> incur any overhead when no mem cgroups are being used.
>
> Because the slub allocator tends to inline the allocations whenever
> it can, those functions need to be exported so modules can make use
> of it properly.
>
> I apologize in advance to the reviewers. This patch is quite big, but
> I was not able to split it any further due to all the dependencies
> between the code.
>
> This code is inspired by the code written by Suleiman Souhlal,
> but heavily changed.
>
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: Christoph Lameter <cl@linux.com>
> CC: Pekka Enberg <penberg@cs.helsinki.fi>
> CC: Michal Hocko <mhocko@suse.cz>
> CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> CC: Johannes Weiner <hannes@cmpxchg.org>
> CC: Suleiman Souhlal <suleiman@google.com>
> ---
> include/linux/memcontrol.h | 68 ++++++++
> init/Kconfig | 2 +-
> mm/memcontrol.c | 373 +++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 441 insertions(+), 2 deletions(-)
>
> +
> +static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
> + struct kmem_cache *cachep)
> +{
> + struct kmem_cache *new_cachep;
> + int idx;
> +
> + BUG_ON(!mem_cgroup_kmem_enabled(memcg));
> +
> + idx = cachep->memcg_params.id;
> +
> + mutex_lock(&memcg_cache_mutex);
> + new_cachep = memcg->slabs[idx];
> + if (new_cachep)
> + goto out;
> +
> + new_cachep = kmem_cache_dup(memcg, cachep);
> +
> + if (new_cachep == NULL) {
> + new_cachep = cachep;
> + goto out;
> + }
> +
> + mem_cgroup_get(memcg);
> + memcg->slabs[idx] = new_cachep;
> + new_cachep->memcg_params.memcg = memcg;
> +out:
> + mutex_unlock(&memcg_cache_mutex);
> + return new_cachep;
> +}
> +
> +struct create_work {
> + struct mem_cgroup *memcg;
> + struct kmem_cache *cachep;
> + struct list_head list;
> +};
> +
> +/* Use a single spinlock for destruction and creation, not a frequent op */
> +static DEFINE_SPINLOCK(cache_queue_lock);
> +static LIST_HEAD(create_queue);
> +static LIST_HEAD(destroyed_caches);
> +
> +static void kmem_cache_destroy_work_func(struct work_struct *w)
> +{
> + struct kmem_cache *cachep;
> + char *name;
> +
> + spin_lock_irq(&cache_queue_lock);
> + while (!list_empty(&destroyed_caches)) {
> + cachep = container_of(list_first_entry(&destroyed_caches,
> + struct mem_cgroup_cache_params, destroyed_list), struct
> + kmem_cache, memcg_params);
> + name = (char *)cachep->name;
> + list_del(&cachep->memcg_params.destroyed_list);
> + spin_unlock_irq(&cache_queue_lock);
> + synchronize_rcu();
Is this synchronize_rcu() still needed, now that we don't use RCU to
protect memcgs from disappearing during allocation anymore?
Also, should we drop the memcg reference we got in
memcg_create_kmem_cache() here?
> + kmem_cache_destroy(cachep);
> + kfree(name);
> + spin_lock_irq(&cache_queue_lock);
> + }
> + spin_unlock_irq(&cache_queue_lock);
> +}
> +static DECLARE_WORK(kmem_cache_destroy_work, kmem_cache_destroy_work_func);
> +
> +void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
> +{
> + unsigned long flags;
> +
> + BUG_ON(cachep->memcg_params.id != -1);
> +
> + /*
> + * We have to defer the actual destroying to a workqueue, because
> + * we might currently be in a context that cannot sleep.
> + */
> + spin_lock_irqsave(&cache_queue_lock, flags);
> + list_add(&cachep->memcg_params.destroyed_list, &destroyed_caches);
> + spin_unlock_irqrestore(&cache_queue_lock, flags);
> +
> + schedule_work(&kmem_cache_destroy_work);
> +}
> +
> +
> +/*
> + * Flush the queue of kmem_caches to create, because we're creating a cgroup.
> + *
> + * We might end up flushing other cgroups' creation requests as well, but
> + * they will just get queued again next time someone tries to make a slab
> + * allocation for them.
> + */
> +void mem_cgroup_flush_cache_create_queue(void)
> +{
> + struct create_work *cw, *tmp;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&cache_queue_lock, flags);
> + list_for_each_entry_safe(cw, tmp, &create_queue, list) {
> + list_del(&cw->list);
> + kfree(cw);
> + }
> + spin_unlock_irqrestore(&cache_queue_lock, flags);
> +}
> +
> +static void memcg_create_cache_work_func(struct work_struct *w)
> +{
> + struct kmem_cache *cachep;
> + struct create_work *cw;
> +
> + spin_lock_irq(&cache_queue_lock);
> + while (!list_empty(&create_queue)) {
> + cw = list_first_entry(&create_queue, struct create_work, list);
> + list_del(&cw->list);
> + spin_unlock_irq(&cache_queue_lock);
> + cachep = memcg_create_kmem_cache(cw->memcg, cw->cachep);
> + if (cachep == NULL)
> + printk(KERN_ALERT
> + "%s: Couldn't create memcg-cache for %s memcg %s\n",
> + __func__, cw->cachep->name,
> + cw->memcg->css.cgroup->dentry->d_name.name);
We might need rcu_dereference() here (and hold rcu_read_lock()).
Or we could just remove this message.
> + /* Drop the reference gotten when we enqueued. */
> + css_put(&cw->memcg->css);
> + kfree(cw);
> + spin_lock_irq(&cache_queue_lock);
> + }
> + spin_unlock_irq(&cache_queue_lock);
> +}
> +
> +static DECLARE_WORK(memcg_create_cache_work, memcg_create_cache_work_func);
> +
> +/*
> + * Enqueue the creation of a per-memcg kmem_cache.
> + * Called with rcu_read_lock.
> + */
> +static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
> + struct kmem_cache *cachep)
> +{
> + struct create_work *cw;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&cache_queue_lock, flags);
> + list_for_each_entry(cw, &create_queue, list) {
> + if (cw->memcg == memcg && cw->cachep == cachep) {
> + spin_unlock_irqrestore(&cache_queue_lock, flags);
> + return;
> + }
> + }
> + spin_unlock_irqrestore(&cache_queue_lock, flags);
> +
> + /* The corresponding put will be done in the workqueue. */
> + if (!css_tryget(&memcg->css))
> + return;
> +
> + cw = kmalloc_no_account(sizeof(struct create_work), GFP_NOWAIT);
> + if (cw == NULL) {
> + css_put(&memcg->css);
> + return;
> + }
> +
> + cw->memcg = memcg;
> + cw->cachep = cachep;
> + spin_lock_irqsave(&cache_queue_lock, flags);
> + list_add_tail(&cw->list, &create_queue);
> + spin_unlock_irqrestore(&cache_queue_lock, flags);
> +
> + schedule_work(&memcg_create_cache_work);
> +}
> +
> +/*
> + * Return the kmem_cache we're supposed to use for a slab allocation.
> + * If we are in interrupt context or otherwise have an allocation that
> + * can't fail, we return the original cache.
> + * Otherwise, we will try to use the current memcg's version of the cache.
> + *
> + * If the cache does not exist yet, if we are the first user of it,
> + * we either create it immediately, if possible, or
...
|
|
|
Goto Forum:
Current Time: Mon Jun 23 07:15:34 GMT 2025
Total time taken to generate the page: 0.05068 seconds
|