OpenVZ Forum


Home » Mailing lists » Devel » Re: [RFC/PATCH] cgroup swap subsystem
Re: [RFC/PATCH] cgroup swap subsystem [message #27995] Wed, 05 March 2008 08:33 Go to next message
Pavel Emelianov is currently offline  Pavel Emelianov
Messages: 1149
Registered: September 2006
Senior Member
Daisuke Nishimura wrote:
> Hi.
> 
> Even if limiting memory usage by cgroup memory subsystem
> or isolating memory by cpuset, swap space is shared, so
> resource isolation is not enough. If one group uses up all the
> swap space, it can affect other groups.
> 
> I try making a patch of swap subsystem based on memory
> subsystem, which limits swap usage per cgroup.
> It can now charge and limit the swap usage.
> 
> I implemented this feature as a new subsystem,
> not as a part of memory subsystem, because I don't want to
> make big change to memcontrol.c, and even if implemented
> as other subsystem, users can manage memory and swap on
> the same cgroup directory if mount them together.
> 
> Basic idea of my implementation:
>   - what will be charged ?
>     the number of swap entries.

This is a very obscure thing "a swap entry" for the end user. People
would prefer accounting bytes.

>   - when to charge/uncharge ?
>     charge at get_swap_entry(), and uncharge at swap_entry_free().
> 
>   - to what group charge the swap entry ?
>     To determine to what swap_cgroup (corresponding to mem_cgroup in
>     memory subsystem) the swap entry should be charged,
>     I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and
>     changed the argument of get_swap_entry() from (void) to
>     (struct page *). As a result, get_swap_entry() can determine
>     to what swap_cgroup it should charge the swap entry
>     by referring to page->page_cgroup->mm_struct->swap_cgroup.
> 
>   - from what group uncharge the swap entry ?
>     I added to swap_info_struct a member 'struct swap_cgroup **',
>     array of pointer to which swap_cgroup the swap entry is
>     charged.
> 
> Todo:
>   - rebase new kernel, and split into some patches.
>   - Merge with memory subsystem (if it would be better), or
>     remove dependency on CONFIG_CGROUP_MEM_CONT if possible
>     (needs to make page_cgroup more generic one).

Merge is a must IMHO. I can hardly imagine a situation in which
someone would need these two separately.

>   - More tests, cleanups, and feartures   :-)  
> 
> 
> Any comments or discussions would be appreciated.
> 
> Thanks,
> Daisuke Nishimura
> 
> 
> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
> 
> ---
> diff -uprN linux-2.6.24-mm1/include/linux/cgroup_subsys.h linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h
> --- linux-2.6.24-mm1/include/linux/cgroup_subsys.h	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h	2008-03-03 10:56:56.000000000 +0900
> @@ -42,3 +42,9 @@ SUBSYS(mem_cgroup)
>  #endif
>  
>  /* */
> +
> +#ifdef CONFIG_CGROUP_SWAP_LIMIT
> +SUBSYS(swap)
> +#endif
> +
> +/* */
> diff -uprN linux-2.6.24-mm1/include/linux/memcontrol.h linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h
> --- linux-2.6.24-mm1/include/linux/memcontrol.h	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h	2008-03-03 10:56:56.000000000 +0900
> @@ -29,6 +29,21 @@ struct page;
>  struct mm_struct;
>  
>  #ifdef CONFIG_CGROUP_MEM_CONT
> +/*
> + * A page_cgroup page is associated with every page descriptor. The
> + * page_cgroup helps us identify information about the cgroup
> + */
> +struct page_cgroup {
> +	struct list_head lru;		/* per cgroup LRU list */
> +	struct page *page;
> +	struct mem_cgroup *mem_cgroup;
> +#ifdef CONFIG_CGROUP_SWAP_LIMIT
> +	struct mm_struct *pc_mm;
> +#endif

Try not to add new entries here.

> +	atomic_t ref_cnt;		/* Helpful when pages move b/w  */
> +					/* mapped and cached states     */
> +	int	 flags;
> +};
>  
>  extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
>  extern void mm_free_cgroup(struct mm_struct *mm);
> diff -uprN linux-2.6.24-mm1/include/linux/mm_types.h linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h
> --- linux-2.6.24-mm1/include/linux/mm_types.h	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h	2008-03-03 10:56:56.000000000 +0900
> @@ -233,6 +233,9 @@ struct mm_struct {
>  #ifdef CONFIG_CGROUP_MEM_CONT
>  	struct mem_cgroup *mem_cgroup;
>  #endif
> +#ifdef CONFIG_CGROUP_SWAP_LIMIT
> +	struct swap_cgroup *swap_cgroup;
> +#endif
>  };
>  
>  #endif /* _LINUX_MM_TYPES_H */
> diff -uprN linux-2.6.24-mm1/include/linux/swap.h linux-2.6.24-mm1-swaplimit/include/linux/swap.h
> --- linux-2.6.24-mm1/include/linux/swap.h	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/include/linux/swap.h	2008-03-03 10:56:56.000000000 +0900
> @@ -7,6 +7,7 @@
>  #include <linux/list.h>
>  #include <linux/memcontrol.h>
>  #include <linux/sched.h>
> +#include <linux/swap_limit.h>
>  
>  #include <asm/atomic.h>
>  #include <asm/page.h>
> @@ -141,6 +142,9 @@ struct swap_info_struct {
>  	struct swap_extent *curr_swap_extent;
>  	unsigned old_block_size;
>  	unsigned short * swap_map;
> +#ifdef CONFIG_CGROUP_SWAP_LIMIT
> +	struct swap_cgroup **swap_cgroup;
> +#endif
>  	unsigned int lowest_bit;
>  	unsigned int highest_bit;
>  	unsigned int cluster_next;
> @@ -239,7 +243,7 @@ extern struct page *swapin_readahead(swp
>  extern long total_swap_pages;
>  extern unsigned int nr_swapfiles;
>  extern void si_swapinfo(struct sysinfo *);
> -extern swp_entry_t get_swap_page(void);
> +extern swp_entry_t get_swap_page(struct page *);
>  extern swp_entry_t get_swap_page_of_type(int);
>  extern int swap_duplicate(swp_entry_t);
>  extern int valid_swaphandles(swp_entry_t, unsigned long *);
> @@ -342,7 +346,7 @@ static inline int remove_exclusive_swap_
>  	return 0;
>  }
>  
> -static inline swp_entry_t get_swap_page(void)
> +static inline swp_entry_t get_swap_page(struct page *page)
>  {
>  	swp_entry_t entry;
>  	entry.val = 0;
> diff -uprN linux-2.6.24-mm1/include/linux/swap_limit.h linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h
> --- linux-2.6.24-mm1/include/linux/swap_limit.h	1970-01-01 09:00:00.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h	2008-03-03 10:56:56.000000000 +0900
> @@ -0,0 +1,65 @@
> +/*
> + * swap_limit.h
> + *
> + */
> +#ifndef _LINUX_SWAP_LIMIT_H
> +#define _LINUX_SWAP_LIMIT_H
> +
> +#include <linux/swap.h>
> +#include <linux/cgroup.h>
> +#include <linux/res_counter.h>
> +
> +struct swap_cgroup;
> +struct swap_info_struct;
> +
> +#ifdef CONFIG_CGROUP_SWAP_LIMIT
> +struct swap_cgroup {
> +	struct cgroup_subsys_state css;
> +	struct res_counter res;
> +};
> +
> +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp)
> +{
> +	return container_of(cgroup_subsys_state(cgrp, swap_subsys_id),
> +				struct swap_cgroup,
> +				css);
> +}
> +
> +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p)
> +{
> +	return container_of(task_subsys_state(p, swap_subsys_id),
> +				struct swap_cgroup, css);
> +}
> +
> +extern int swap_cgroup_charge(struct page *page,
> +				struct swap_info_struct *si,
> +				unsigned long offset);
> +extern void swap_cgroup_uncharge(struct swap_info_struct *si,
> +				unsigned long offset);
> +
> +#else /* CONFIG_CGROUP_SWAP_LIMIT */
> +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp)
> +{
> +	return NULL;
> +}
> +
> +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p)
> +{
> +	return NULL;
> +}
> +
> +static inline int swap_cgroup_charge(struct page *page,
> +					struct swap_info_struct *si,
> +					unsigned long offset)
> +{
> +	return 0;
> +}
> +
> +static inline void swap_cgroup_uncharge(struct swap_info_struct *si,
> +					unsigned long offset)
> +{
> +}
> +
> +#endif
> +
> +#endif
> diff -uprN linux-2.6.24-mm1/init/Kconfig linux-2.6.24-mm1-swaplimit/init/Kconfig
> --- linux-2.6.24-mm1/init/Kconfig	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/init/Kconfig	2008-03-03 10:56:56.000000000 +0900
> @@ -383,6 +383,12 @@ config CGROUP_MEM_CONT
>  	  Provides a memory controller that manages both page cache and
>  	  RSS memory.
>  
> +config CGROUP_SWAP_LIMIT
> +	bool "cgroup subsystem for swap"
> +	depends on CGROUP_MEM_CONT && SWAP
> +	help
> +	  Provides a swap controller that manages and limits swap usage.
> +
>  config PROC_PID_CPUSET
>  	bool "Include legacy /proc/<pid>/cpuset file"
>  	depends on CPUSETS
> diff -uprN linux-2.6.24-mm1/mm/Makefile linux-2.6.24-mm1-swaplimit/mm/Makefile
> --- linux-2.6.24-mm1/mm/Makefile	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/mm/Makefile	2008-03-03 10:56:56.000000000 +0900
> @@ -32,4 +32,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
>  obj-$(CONFIG_SMP) += allocpercpu.o
>  obj-$(CONFIG_QUICKLIST) += quicklist.o
>  obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
> +obj-$(CONFIG_CGROUP_SWAP_LIMIT) += swap_limit.o
>  
> diff -uprN linux-2.6.24-mm1/mm/memcontrol.c linux-2.6.24-mm1-swaplimit/mm/memcontrol.c
> --- linux-2.6.24-mm1/mm/memcontrol.c	2008-02-04 14:34:24.000000000 +0900
> +++ linux-2.6.24-mm1-swaplimit/mm/memcontrol.c	2008-03-03 10:56:56.000000000 +0900
> @@ -19,6 +19,7 @@
>  
>  #include <linux/res_counter.h>
>  #include <linux/memcontrol.h>
> +#include <linux/swap_limit.h>
>  #include <linux/cgr
...

Re: [RFC/PATCH] cgroup swap subsystem [message #28015 is a reply to message #27995] Wed, 05 March 2008 14:07 Go to previous messageGo to next message
Hugh Dickins is currently offline  Hugh Dickins
Messages: 16
Registered: September 2007
Junior Member
On Wed, 5 Mar 2008, Pavel Emelyanov wrote:
> Daisuke Nishimura wrote:
> > 
> > Todo:
> >   - rebase new kernel, and split into some patches.
> >   - Merge with memory subsystem (if it would be better), or
> >     remove dependency on CONFIG_CGROUP_MEM_CONT if possible
> >     (needs to make page_cgroup more generic one).
> 
> Merge is a must IMHO. I can hardly imagine a situation in which
> someone would need these two separately.

Strongly agree.  Nobody's interested in swap as such: it's just
secondary memory, where RAM is primary memory.  People want to
control memory as the sum of the two; and I expect they may also
want to control primary memory (all that the current memcg does)
within that.  I wonder if such nesting of limits fits easily
into cgroups or will be problematic.

Hugh
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: [RFC/PATCH] cgroup swap subsystem [message #28016 is a reply to message #28015] Wed, 05 March 2008 14:14 Go to previous messageGo to next message
Pavel Emelianov is currently offline  Pavel Emelianov
Messages: 1149
Registered: September 2006
Senior Member
Hugh Dickins wrote:
> On Wed, 5 Mar 2008, Pavel Emelyanov wrote:
>> Daisuke Nishimura wrote:
>>> Todo:
>>>   - rebase new kernel, and split into some patches.
>>>   - Merge with memory subsystem (if it would be better), or
>>>     remove dependency on CONFIG_CGROUP_MEM_CONT if possible
>>>     (needs to make page_cgroup more generic one).
>> Merge is a must IMHO. I can hardly imagine a situation in which
>> someone would need these two separately.
> 
> Strongly agree.  Nobody's interested in swap as such: it's just
> secondary memory, where RAM is primary memory.  People want to
> control memory as the sum of the two; and I expect they may also
> want to control primary memory (all that the current memcg does)
> within that.  I wonder if such nesting of limits fits easily
> into cgroups or will be problematic.

This nesting would affect the res_couter abstraction, not the
cgroup infrastructure. Current design of resource counters doesn't
allow for such thing, but the extension is a couple-of-lines patch :)

> Hugh
> 

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: [RFC/PATCH] cgroup swap subsystem [message #28026 is a reply to message #28016] Thu, 06 March 2008 00:33 Go to previous messageGo to next message
KAMEZAWA Hiroyuki is currently offline  KAMEZAWA Hiroyuki
Messages: 463
Registered: September 2006
Senior Member
On Wed, 05 Mar 2008 17:14:12 +0300
Pavel Emelyanov <xemul@openvz.org> wrote:
> > Strongly agree.  Nobody's interested in swap as such: it's just
> > secondary memory, where RAM is primary memory.  People want to
> > control memory as the sum of the two; and I expect they may also
> > want to control primary memory (all that the current memcg does)
> > within that.  I wonder if such nesting of limits fits easily
> > into cgroups or will be problematic.
> 
> This nesting would affect the res_couter abstraction, not the
> cgroup infrastructure. Current design of resource counters doesn't
> allow for such thing, but the extension is a couple-of-lines patch :)
> 
IMHO, keeping res_counter simple is better.

Is this kind of new entry in mem_cgroup not good ?
==
struct mem_cgroup {
	...
	struct res_counter	memory_limit.
	struct res_counter	swap_limit.
	..
}

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: [RFC/PATCH] cgroup swap subsystem [message #28027 is a reply to message #28026] Thu, 06 March 2008 00:35 Go to previous messageGo to next message
Paul Menage is currently offline  Paul Menage
Messages: 642
Registered: September 2006
Senior Member
On Wed, Mar 5, 2008 at 4:33 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
>  Is this kind of new entry in mem_cgroup not good ?
>  ==
>  struct mem_cgroup {
>         ...
>         struct res_counter      memory_limit.
>         struct res_counter      swap_limit.
>         ..

I agree with this - main memory and swap memory are rather different
kinds of resources, with very different performance characteristics.
It should be possible to control them completely independently (e.g.
this job gets 100M of main memory, and doesn't swap at all).

Paul
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: [RFC/PATCH] cgroup swap subsystem [message #28032 is a reply to message #28026] Thu, 06 March 2008 08:20 Go to previous message
Pavel Emelianov is currently offline  Pavel Emelianov
Messages: 1149
Registered: September 2006
Senior Member
KAMEZAWA Hiroyuki wrote:
> On Wed, 05 Mar 2008 17:14:12 +0300
> Pavel Emelyanov <xemul@openvz.org> wrote:
>>> Strongly agree.  Nobody's interested in swap as such: it's just
>>> secondary memory, where RAM is primary memory.  People want to
>>> control memory as the sum of the two; and I expect they may also
>>> want to control primary memory (all that the current memcg does)
>>> within that.  I wonder if such nesting of limits fits easily
>>> into cgroups or will be problematic.
>> This nesting would affect the res_couter abstraction, not the
>> cgroup infrastructure. Current design of resource counters doesn't
>> allow for such thing, but the extension is a couple-of-lines patch :)
>>
> IMHO, keeping res_counter simple is better.
> 
> Is this kind of new entry in mem_cgroup not good ?
> ==
> struct mem_cgroup {
> 	...
> 	struct res_counter	memory_limit.
> 	struct res_counter	swap_limit.
> 	..
> }

I meant the same thing actually. By "nesting would affect" I
meant, that we might want to make res_counters hierarchical.

That would kill two birds with one stone - we will make a true
hierarchical memory accounting and let charging of two counters
with one call.

> 

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Re: [RFC/PATCH] cgroup swap subsystem [message #28151 is a reply to message #27995] Wed, 05 March 2008 08:51 Go to previous message
Daisuke Nishimura is currently offline  Daisuke Nishimura
Messages: 54
Registered: March 2008
Member
Hi.

>> @@ -664,6 +665,10 @@ retry:
>>  	pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
>>  	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
>>  		pc->flags |= PAGE_CGROUP_FLAG_CACHE;
>> +#ifdef CONFIG_CGROUP_SWAP_LIMIT
>> +	atomic_inc(&mm->mm_count);
>> +	pc->pc_mm = mm;
>> +#endif
> 
> What kernel is this patch for? I cannot find this code in 2.6.25-rc3-mm1
> 
For linux-2.6.24-mm1.

Thanks,
Daisuke Nishimura.

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
Previous Topic: network namespace ipv6 perfs
Next Topic: Re: [RFC/PATCH] cgroup swap subsystem
Goto Forum:
  


Current Time: Fri Aug 16 19:24:32 GMT 2024

Total time taken to generate the page: 0.02942 seconds