OpenVZ Forum


Home » Mailing lists » Devel » [RFC] memory controller : backgorund reclaim and avoid excessive locking [0/5]
[RFC] memory controller : backgorund reclaim and avoid excessive locking [2/5] background reclaim. [message #27299 is a reply to message #27298] Thu, 14 February 2008 08:27 Go to previous messageGo to previous message
KAMEZAWA Hiroyuki is currently offline  KAMEZAWA Hiroyuki
Messages: 463
Registered: September 2006
Senior Member
A patch for background reclaim based on high-low watermak in res_counter.
The daemon is called as "memcontd", here.

Implements following:
 * If res->usage is higher than res->hwmark, start memcontd.
 * memcontd calls try_to_free_pages.
 * memcontd stops if res->usage is lower than res->lwmark.

Maybe we can add more tunings but no extra params now.

ChangeLog:
 - start "memcontd" at first change in hwmark.
   (In old verion, it started at cgroup creation.)
 - changed "relax" logic in memcontd daemon.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

 mm/memcontrol.c |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 109 insertions(+), 3 deletions(-)

Index: linux-2.6.24-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-mm1.orig/mm/memcontrol.c
+++ linux-2.6.24-mm1/mm/memcontrol.c
@@ -30,6 +30,8 @@
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <asm/uaccess.h>
 
@@ -136,6 +138,13 @@ struct mem_cgroup {
 	 * statistics.
 	 */
 	struct mem_cgroup_stat stat;
+	/*
+	 * background reclaim.
+	 */
+	struct {
+		wait_queue_head_t	waitq;
+		struct task_struct	*kthread;
+	} daemon;
 };
 
 /*
@@ -504,6 +513,14 @@ long mem_cgroup_calc_reclaim_inactive(st
 	return (nr_inactive >> priority);
 }
 
+static inline void mem_cgroup_schedule_daemon(struct mem_cgroup *mem)
+{
+	if (likely(mem->daemon.kthread) && /* can be NULL at boot */
+	    waitqueue_active(&mem->daemon.waitq))
+		wake_up_interruptible(&mem->daemon.waitq);
+}
+
+
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -658,6 +675,9 @@ retry:
 		congestion_wait(WRITE, HZ/10);
 	}
 
+	if (res_counter_above_hwmark(&mem->res))
+		mem_cgroup_schedule_daemon(mem);
+
 	atomic_set(&pc->ref_cnt, 1);
 	pc->mem_cgroup = mem;
 	pc->page = page;
@@ -762,6 +782,50 @@ void mem_cgroup_uncharge_page(struct pag
 }
 
 /*
+ * background page reclaim routine for cgroup.
+ */
+static int mem_cgroup_reclaim_daemon(void *data)
+{
+	DEFINE_WAIT(wait);
+	struct mem_cgroup *mem = data;
+
+	css_get(&mem->css);
+	current->flags |= PF_SWAPWRITE;
+	set_freezable();
+
+	while (!kthread_should_stop()) {
+		prepare_to_wait(&mem->daemon.waitq, &wait, TASK_INTERRUPTIBLE);
+		if (res_counter_below_lwmark(&mem->res)) {
+			if (!kthread_should_stop()) {
+				schedule();
+				try_to_freeze();
+			}
+			finish_wait(&mem->daemon.waitq, &wait);
+			continue;
+		}
+		finish_wait(&mem->daemon.waitq, &wait);
+		try_to_free_mem_cgroup_pages(mem, GFP_HIGHUSER_MOVABLE);
+		/* Am I in hurry ? */
+		if (!res_counter_above_hwmark(&mem->res)) {
+			/*
+			 * Extra relaxing..memory reclaim is hevay work.
+			 * we don't know there is I/O congestion or not.
+			 * So use just relax rather than congesiton_wait().
+			 * HZ/10 is widely used value under /mm.
+			 */
+			schedule_timeout(HZ/10);
+		} else {
+			/* Avoid occupation */
+			yield();
+		}
+	}
+
+	css_put(&mem->css);
+	return 0;
+}
+
+
+/*
  * Returns non-zero if a page (under migration) has valid page_cgroup member.
  * Refcnt of page_cgroup is incremented.
  */
@@ -931,15 +995,40 @@ static ssize_t mem_cgroup_read(struct cg
 				NULL);
 }
 
+static DEFINE_MUTEX(modify_param_mutex);
 static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 				struct file *file, const char __user *userbuf,
 				size_t nbytes, loff_t *ppos)
 {
-	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos,
+	int ret;
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+
+	mutex_lock(&modify_param_mutex);
+	/* Attach new background reclaim daemon.
+	   This must be done before change values (for easy error handling */
+
+	if (cft->private == RES_HWMARK &&
+	    !mem->daemon.kthread) {
+		struct task_struct *thr;
+		thr = kthread_run(mem_cgroup_reclaim_daemon, mem, "memcontd");
+		if (IS_ERR(thr)) {
+			ret = PTR_ERR(thr);
+			goto out;
+		}
+		mem->daemon.kthread = thr;
+	}
+	ret =  res_counter_write(&mem->res, cft->private, userbuf, nbytes, ppos,
 				mem_cgroup_write_strategy);
+
+	/* Even on error, don't stop reclaim daemon here. not so problematic. */
+
+out:
+	mutex_unlock(&modify_param_mutex);
+	return ret;
 }
 
+
+
 static ssize_t mem_force_empty_write(struct cgroup *cont,
 				struct cftype *cft, struct file *file,
 				const char __user *userbuf,
@@ -1032,6 +1121,20 @@ static struct cftype mem_cgroup_files[] 
 		.write = mem_cgroup_write,
 		.read = mem_cgroup_read,
 	},
+
+	{
+		.name = "lwmark_in_bytes",
+		.private = RES_LWMARK,
+		.write = mem_cgroup_write,
+		.read = mem_cgroup_read,
+	},
+	{
+		.name = "hwmark_in_bytes",
+		.private = RES_HWMARK,
+		.write = mem_cgroup_write,
+		.read = mem_cgroup_read,
+	},
+
 	{
 		.name = "failcnt",
 		.private = RES_FAILCNT,
@@ -1110,7 +1213,8 @@ mem_cgroup_create(struct cgroup_subsys *
 	for_each_node_state(node, N_POSSIBLE)
 		if (alloc_mem_cgroup_per_zone_info(mem, node))
 			goto free_out;
-
+	init_waitqueue_head(&mem->daemon.waitq);
+	mem->daemon.kthread = NULL;
 	return &mem->css;
 free_out:
 	for_each_node_state(node, N_POSSIBLE)
@@ -1125,6 +1229,8 @@ static void mem_cgroup_pre_destroy(struc
 {
 	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
 	mem_cgroup_force_empty(mem);
+	if (mem->daemon.kthread)
+		kthread_stop(mem->daemon.kthread);
 }
 
 static void mem_cgroup_destroy(struct cgroup_subsys *ss,

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: [RFC][PATCH 4/7] CGroup API: Add res_counter_read_uint()
Next Topic: [PATCH 1/7] cgroup: fix and update documentation
Goto Forum:
  


Current Time: Thu Jul 31 22:33:28 GMT 2025

Total time taken to generate the page: 0.52717 seconds