OpenVZ Forum


Home » Mailing lists » Devel » [RFC][PATCH][0/4] Memory controller (RSS Control) (
[RFC][PATCH][3/4] Add reclaim support ( [message #10622 is a reply to message #10620] Sat, 24 February 2007 14:45 Go to previous messageGo to previous message
Balbir Singh is currently offline  Balbir Singh
Messages: 491
Registered: August 2006
Senior Member
Changelog

1. Move void *container to struct container (in scan_control and vmscan.c
and rmap.c)
2. The last set of patches churned the LRU list, in this release, pages
that can do not belong to the container are moved to a skipped_pages
list. At the end of the isolation they are added back to the zone
list using list_spice_tail (a new function added in list.h).
The disadvantage of this approach is that pages moved to skipped_pages
will not be available for general reclaim. General testing on UML
and a powerpc box showed that the changes worked.

Other alternatives tried
------------------------
a. Do not delete the page from lru list, but that quickly lead to
a panic, since the page was on LRU and we released the lru_lock
in page_in_container

TODO's

1. Try a per-container LRU list, but that would mean expanding the page
struct or special tricks like overloading the LRU pointer. A per-container
list would also make it more difficult to handle shared pages, as a
page will belong to just one container at-a-time.

This patch reclaims pages from a container when the container limit is hit.
The executable is oom'ed only when the container it is running in, is overlimit
and we could not reclaim any pages belonging to the container

A parameter called pushback, controls how much memory is reclaimed when the
limit is hit. It should be easy to expose this knob to user space, but
currently it is hard coded to 20% of the total limit of the container.

isolate_lru_pages() has been modified to isolate pages belonging to a
particular container, so that reclaim code will reclaim only container
pages. For shared pages, reclaim does not unmap all mappings of the page,
it only unmaps those mappings that are over their limit. This ensures
that other containers are not penalized while reclaiming shared pages.

Parallel reclaim per container is not allowed. Each controller has a wait
queue that ensures that only one task per control is running reclaim on
that container.

Signed-off-by: <balbir@in.ibm.com>
---

include/linux/list.h | 26 +++++++++
include/linux/memcontrol.h | 12 ++++
include/linux/rmap.h | 20 ++++++-
include/linux/swap.h | 3 +
mm/memcontrol.c | 122 +++++++++++++++++++++++++++++++++++++++++----
mm/migrate.c | 2
mm/rmap.c | 100 +++++++++++++++++++++++++++++++++++-
mm/vmscan.c | 114 +++++++++++++++++++++++++++++++++++++-----
8 files changed, 370 insertions(+), 29 deletions(-)

diff -puN include/linux/memcontrol.h~memcontrol-reclaim-on-limit include/linux/memcontrol.h
--- linux-2.6.20/include/linux/memcontrol.h~memcontrol-reclaim-o n-limit 2007-02-24 19:40:56.000000000 +0530
+++ linux-2.6.20-balbir/include/linux/memcontrol.h 2007-02-24 19:50:34.000000000 +0530
@@ -37,6 +37,7 @@ enum {
};

#ifdef CONFIG_CONTAINER_MEMCONTROL
+#include <linux/wait.h>

#ifndef kB
#define kB 1024 /* One Kilo Byte */
@@ -53,6 +54,9 @@ extern void memcontrol_mm_free(struct mm
extern void memcontrol_mm_assign_container(struct mm_struct *mm,
struct task_struct *p);
extern int memcontrol_update_rss(struct mm_struct *mm, int count, bool check);
+extern int memcontrol_mm_overlimit(struct mm_struct *mm, void *sc_cont);
+extern wait_queue_head_t memcontrol_reclaim_wq;
+extern bool memcontrol_reclaim_in_progress;

#else /* CONFIG_CONTAINER_MEMCONTROL */

@@ -76,5 +80,13 @@ static inline int memcontrol_update_rss(
return 0;
}

+/*
+ * In the absence of memory control, we always free mappings.
+ */
+static inline int memcontrol_mm_overlimit(struct mm_struct *mm, void *sc_cont)
+{
+ return 1;
+}
+
#endif /* CONFIG_CONTAINER_MEMCONTROL */
#endif /* _LINUX_MEMCONTROL_H */
diff -puN include/linux/rmap.h~memcontrol-reclaim-on-limit include/linux/rmap.h
--- linux-2.6.20/include/linux/rmap.h~memcontrol-reclaim-on-limi t 2007-02-24 19:40:56.000000000 +0530
+++ linux-2.6.20-balbir/include/linux/rmap.h 2007-02-24 19:40:56.000000000 +0530
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
+#include <linux/container.h>

/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -90,7 +91,17 @@ static inline void page_dup_rmap(struct
* Called from mm/vmscan.c to handle paging out
*/
int page_referenced(struct page *, int is_locked);
-int try_to_unmap(struct page *, int ignore_refs);
+int try_to_unmap(struct page *, int ignore_refs, struct container *container);
+#ifdef CONFIG_CONTAINER_MEMCONTROL
+bool page_in_container(struct page *page, struct zone *zone,
+ struct container *container);
+#else
+static inline bool page_in_container(struct page *page, struct zone *zone,
+ struct container *container)
+{
+ return true;
+}
+#endif /* CONFIG_CONTAINER_MEMCONTROL */

/*
* Called from mm/filemap_xip.c to unmap empty zero page
@@ -118,7 +129,12 @@ int page_mkclean(struct page *);
#define anon_vma_link(vma) do {} while (0)

#define page_referenced(page,l) TestClearPageReferenced(page)
-#define try_to_unmap(page, refs) SWAP_FAIL
+#define try_to_unmap(page, refs, container) SWAP_FAIL
+static inline bool page_in_container(struct page *page, struct zone *zone,
+ struct container *container)
+{
+ return true;
+}

static inline int page_mkclean(struct page *page)
{
diff -puN include/linux/swap.h~memcontrol-reclaim-on-limit include/linux/swap.h
--- linux-2.6.20/include/linux/swap.h~memcontrol-reclaim-on-limi t 2007-02-24 19:40:56.000000000 +0530
+++ linux-2.6.20-balbir/include/linux/swap.h 2007-02-24 19:40:56.000000000 +0530
@@ -6,6 +6,7 @@
#include <linux/mmzone.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/container.h>

#include <asm/atomic.h>
#include <asm/page.h>
@@ -188,6 +189,8 @@ extern void swap_setup(void);
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zone **, gfp_t);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
+extern unsigned long memcontrol_shrink_mapped_memory(unsigned long nr_pages,
+ struct container *container);
extern int vm_swappiness;
extern int remove_mapping(struct address_space *mapping, struct page *page);
extern long vm_total_pages;
diff -puN mm/memcontrol.c~memcontrol-reclaim-on-limit mm/memcontrol.c
--- linux-2.6.20/mm/memcontrol.c~memcontrol-reclaim-on-limit 2007-02-24 19:40:56.000000000 +0530
+++ linux-2.6.20-balbir/mm/memcontrol.c 2007-02-24 19:40:56.000000000 +0530
@@ -24,6 +24,7 @@
#include <linux/fs.h>
#include <linux/container.h>
#include <linux/memcontrol.h>
+#include <linux/swap.h>

#include <asm/uaccess.h>

@@ -31,6 +32,12 @@
static const char version[] = "0.1";

/*
+ * Explore exporting these knobs to user space
+ */
+static const int pushback = 20; /* What percentage of memory to reclaim */
+static const int nr_retries = 5; /* How many times do we try to reclaim */
+
+/*
* Locking notes
*
* Each mm_struct belongs to a container, when the thread group leader
@@ -52,6 +59,9 @@ static const char version[] = "0.1";
struct memcontrol {
struct container_subsys_state css;
struct res_counter counter;
+ wait_queue_head_t wq;
+ bool reclaim_in_progress;
+ spinlock_t lock;
};

static struct container_subsys memcontrol_subsys;
@@ -67,6 +77,41 @@ static inline struct memcontrol *memcont
return memcontrol_from_cont(task_container(p, &memcontrol_subsys));
}

+/*
+ * checks if the mm's container and scan control passed container match, if
+ * so, is the container over it's limit. Returns 1 to indicate that the
+ * pages from the mm_struct in question should be reclaimed.
+ */
+int memcontrol_mm_overlimit(struct mm_struct *mm, void *sc_cont)
+{
+ struct container *cont;
+ struct memcontrol *mem;
+ long usage, limit;
+ int ret = 1;
+
+ /*
+ * Regular reclaim, let it proceed as usual
+ */
+ if (!sc_cont)
+ goto out;
+
+ ret = 0;
+ read_lock(&mm->container_lock);
+ cont = mm->container;
+ if (cont != sc_cont)
+ goto out_unlock;
+
+ mem = memcontrol_from_cont(cont);
+ usage = atomic_long_read(&mem->counter.usage);
+ limit = atomic_long_read(&mem->counter.limit);
+ if (limit && (usage > limit))
+ ret = 1;
+out_unlock:
+ read_unlock(&mm->container_lock);
+out:
+ return ret;
+}
+
int memcontrol_mm_init(struct mm_struct *mm)
{
mm->counter = kmalloc(sizeof(struct res_counter), GFP_KERNEL);
@@ -99,6 +144,46 @@ void memcontrol_mm_assign_container(stru
memcontrol_mm_assign_container_direct(mm, cont);
}

+static int memcontrol_check_and_reclaim(struct container *cont, long usage,
+ long limit)
+{
+ unsigned long nr_pages = 0;
+ unsigned long nr_reclaimed = 0;
+ int retries = nr_retries;
+ int ret = 0;
+ struct memcontrol *mem;
+
+ mem = memcontrol_from_cont(cont);
+ spin_lock(&mem->lock);
+ while ((retries-- > 0) && limit && (usage > limit)) {
+ if (mem->reclaim_in_progress) {
+ spin_unlock(&mem->lock);
+ wait_event(mem->wq, !mem->reclaim_in_progress);
+ spin_lock(&mem->lock);
+ } else {
+ if (!nr_pages)
+ nr_pages = (pushback * limit) / 100;
+ mem->reclaim_in_progress = true;
+ spin_unlock(&mem->lock);
+ nr_reclaimed +=
+ memcontrol_shrink_mapped_memory(nr_pages, cont);
+ spin_lock(&mem->lock);
+ mem->reclaim_in_progress = false;
+ wake_up_all(&mem->wq);
+ }
+ /*
+ * Resample usage and limit after reclaim
+ */
+ usage = atomic_long_read(&mem->counter.usage);
+ limit = atomic_long_read(&mem->counter.limit);
+ }
+ spin_unlock(&mem->lock);
+
+ if (limit && (usage > limit))
+ ret = -ENOMEM;
+ return ret;
+}
+
/*
* Update the rss usage counters for the mm_struct and the container it belongs
* to. We do not fail rss for pages shared during f
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: [PATCH] Use struct pid parameter in copy_process()
Next Topic: [PATCH 1/3] IRQ: add __must_check to request_irq
Goto Forum:
  


Current Time: Sat Sep 20 20:22:39 GMT 2025

Total time taken to generate the page: 0.06197 seconds