OpenVZ Forum


Home » Mailing lists » Devel » [RFC/PATCH] cgroup swap subsystem
[RFC/PATCH] cgroup swap subsystem [message #28148] Wed, 05 March 2008 05:59
Daisuke Nishimura is currently offline  Daisuke Nishimura
Messages: 54
Registered: March 2008
Member
Hi.

Even if limiting memory usage by cgroup memory subsystem
or isolating memory by cpuset, swap space is shared, so
resource isolation is not enough. If one group uses up all the
swap space, it can affect other groups.

I try making a patch of swap subsystem based on memory
subsystem, which limits swap usage per cgroup.
It can now charge and limit the swap usage.

I implemented this feature as a new subsystem,
not as a part of memory subsystem, because I don't want to
make big change to memcontrol.c, and even if implemented
as other subsystem, users can manage memory and swap on
the same cgroup directory if mount them together.

Basic idea of my implementation:
  - what will be charged ?
    the number of swap entries.

  - when to charge/uncharge ?
    charge at get_swap_entry(), and uncharge at swap_entry_free().

  - to what group charge the swap entry ?
    To determine to what swap_cgroup (corresponding to mem_cgroup in
    memory subsystem) the swap entry should be charged,
    I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and
    changed the argument of get_swap_entry() from (void) to
    (struct page *). As a result, get_swap_entry() can determine
    to what swap_cgroup it should charge the swap entry
    by referring to page->page_cgroup->mm_struct->swap_cgroup.

  - from what group uncharge the swap entry ?
    I added to swap_info_struct a member 'struct swap_cgroup **',
    array of pointer to which swap_cgroup the swap entry is
    charged.

Todo:
  - rebase new kernel, and split into some patches.
  - Merge with memory subsystem (if it would be better), or
    remove dependency on CONFIG_CGROUP_MEM_CONT if possible
    (needs to make page_cgroup more generic one).
  - More tests, cleanups, and feartures   :-)  


Any comments or discussions would be appreciated.

Thanks,
Daisuke Nishimura


Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>

---
diff -uprN linux-2.6.24-mm1/include/linux/cgroup_subsys.h linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h
--- linux-2.6.24-mm1/include/linux/cgroup_subsys.h	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h	2008-03-03 10:56:56.000000000 +0900
@@ -42,3 +42,9 @@ SUBSYS(mem_cgroup)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+SUBSYS(swap)
+#endif
+
+/* */
diff -uprN linux-2.6.24-mm1/include/linux/memcontrol.h linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h
--- linux-2.6.24-mm1/include/linux/memcontrol.h	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h	2008-03-03 10:56:56.000000000 +0900
@@ -29,6 +29,21 @@ struct page;
 struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_CONT
+/*
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ */
+struct page_cgroup {
+	struct list_head lru;		/* per cgroup LRU list */
+	struct page *page;
+	struct mem_cgroup *mem_cgroup;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+	struct mm_struct *pc_mm;
+#endif
+	atomic_t ref_cnt;		/* Helpful when pages move b/w  */
+					/* mapped and cached states     */
+	int	 flags;
+};
 
 extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
 extern void mm_free_cgroup(struct mm_struct *mm);
diff -uprN linux-2.6.24-mm1/include/linux/mm_types.h linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h
--- linux-2.6.24-mm1/include/linux/mm_types.h	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h	2008-03-03 10:56:56.000000000 +0900
@@ -233,6 +233,9 @@ struct mm_struct {
 #ifdef CONFIG_CGROUP_MEM_CONT
 	struct mem_cgroup *mem_cgroup;
 #endif
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+	struct swap_cgroup *swap_cgroup;
+#endif
 };
 
 #endif /* _LINUX_MM_TYPES_H */
diff -uprN linux-2.6.24-mm1/include/linux/swap.h linux-2.6.24-mm1-swaplimit/include/linux/swap.h
--- linux-2.6.24-mm1/include/linux/swap.h	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/swap.h	2008-03-03 10:56:56.000000000 +0900
@@ -7,6 +7,7 @@
 #include <linux/list.h>
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
+#include <linux/swap_limit.h>
 
 #include <asm/atomic.h>
 #include <asm/page.h>
@@ -141,6 +142,9 @@ struct swap_info_struct {
 	struct swap_extent *curr_swap_extent;
 	unsigned old_block_size;
 	unsigned short * swap_map;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+	struct swap_cgroup **swap_cgroup;
+#endif
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
 	unsigned int cluster_next;
@@ -239,7 +243,7 @@ extern struct page *swapin_readahead(swp
 extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
 extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct page *);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
@@ -342,7 +346,7 @@ static inline int remove_exclusive_swap_
 	return 0;
 }
 
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct page *page)
 {
 	swp_entry_t entry;
 	entry.val = 0;
diff -uprN linux-2.6.24-mm1/include/linux/swap_limit.h linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h
--- linux-2.6.24-mm1/include/linux/swap_limit.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h	2008-03-03 10:56:56.000000000 +0900
@@ -0,0 +1,65 @@
+/*
+ * swap_limit.h
+ *
+ */
+#ifndef _LINUX_SWAP_LIMIT_H
+#define _LINUX_SWAP_LIMIT_H
+
+#include <linux/swap.h>
+#include <linux/cgroup.h>
+#include <linux/res_counter.h>
+
+struct swap_cgroup;
+struct swap_info_struct;
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+struct swap_cgroup {
+	struct cgroup_subsys_state css;
+	struct res_counter res;
+};
+
+static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp)
+{
+	return container_of(cgroup_subsys_state(cgrp, swap_subsys_id),
+				struct swap_cgroup,
+				css);
+}
+
+static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p)
+{
+	return container_of(task_subsys_state(p, swap_subsys_id),
+				struct swap_cgroup, css);
+}
+
+extern int swap_cgroup_charge(struct page *page,
+				struct swap_info_struct *si,
+				unsigned long offset);
+extern void swap_cgroup_uncharge(struct swap_info_struct *si,
+				unsigned long offset);
+
+#else /* CONFIG_CGROUP_SWAP_LIMIT */
+static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
+static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p)
+{
+	return NULL;
+}
+
+static inline int swap_cgroup_charge(struct page *page,
+					struct swap_info_struct *si,
+					unsigned long offset)
+{
+	return 0;
+}
+
+static inline void swap_cgroup_uncharge(struct swap_info_struct *si,
+					unsigned long offset)
+{
+}
+
+#endif
+
+#endif
diff -uprN linux-2.6.24-mm1/init/Kconfig linux-2.6.24-mm1-swaplimit/init/Kconfig
--- linux-2.6.24-mm1/init/Kconfig	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/init/Kconfig	2008-03-03 10:56:56.000000000 +0900
@@ -383,6 +383,12 @@ config CGROUP_MEM_CONT
 	  Provides a memory controller that manages both page cache and
 	  RSS memory.
 
+config CGROUP_SWAP_LIMIT
+	bool "cgroup subsystem for swap"
+	depends on CGROUP_MEM_CONT && SWAP
+	help
+	  Provides a swap controller that manages and limits swap usage.
+
 config PROC_PID_CPUSET
 	bool "Include legacy /proc/<pid>/cpuset file"
 	depends on CPUSETS
diff -uprN linux-2.6.24-mm1/mm/Makefile linux-2.6.24-mm1-swaplimit/mm/Makefile
--- linux-2.6.24-mm1/mm/Makefile	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/Makefile	2008-03-03 10:56:56.000000000 +0900
@@ -32,4 +32,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
+obj-$(CONFIG_CGROUP_SWAP_LIMIT) += swap_limit.o
 
diff -uprN linux-2.6.24-mm1/mm/memcontrol.c linux-2.6.24-mm1-swaplimit/mm/memcontrol.c
--- linux-2.6.24-mm1/mm/memcontrol.c	2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/memcontrol.c	2008-03-03 10:56:56.000000000 +0900
@@ -19,6 +19,7 @@
 
 #include <linux/res_counter.h>
 #include <linux/memcontrol.h>
+#include <linux/swap_limit.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -146,18 +147,6 @@ struct mem_cgroup {
 #define PAGE_CGROUP_LOCK_BIT 	0x0
 #define PAGE_CGROUP_LOCK 		(1 << PAGE_CGROUP_LOCK_BIT)
 
-/*
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- */
-struct page_cgroup {
-	struct list_head lru;		/* per cgroup LRU list */
-	struct page *page;
-	struct mem_cgroup *mem_cgroup;
-	atomic_t ref_cnt;		/* Helpful when pages move b/w  */
-					/* mapped and cached states     */
-	int	 flags;
-};
 #define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
 #define PAGE_CGROUP_FLAG_ACTIVE (0x2)	/* page is active in this cgroup */
 
@@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task(
 void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
 {
 	struct mem_cgroup *mem;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+	struct swap_cgroup *swap;
+#endif
 
 	mem = mem_cgroup_from_task(p);
 	css_get(&mem->css);
 	mm->mem_cgroup = mem;
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+	swap = swap_cgroup_from_task(p);
+	css_get(&swap->css);
+	mm->swap_cgroup = swap;
+#endif
 }
 
 void mm_free_cgroup(struct mm_struct *mm)
 {
 	css_put(&mm->mem_cgroup->css);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+	css_put(&mm->swap_cgroup->css);
+#endif
 }
 
 static inline int page_cgroup_locked(struct page *page)
@@ -664,6 +665,10 @@ retry:
 	pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
 	if (ctype == ME
...

 
Read Message
Previous Topic: [RFC][PATCH 1/1]a new optional function for task assignment to cgroup
Next Topic: Re: [RFC/PATCH] cgroup swap subsystem
Goto Forum:
  


Current Time: Wed Sep 18 13:23:23 GMT 2024

Total time taken to generate the page: 0.04496 seconds