OpenVZ Forum: Devel » [RFC][PATCH 0/6] Add group fairness to CFS

Home » Mailing lists » Devel » [RFC][PATCH 0/6] Add group fairness to CFS - v1

Show: Today's Messages :: Show Polls :: Message Navigator
E-mail to friend

[RFC][PATCH 5/6] core changes for group fairness [message #18879 is a reply to message #18874]

Mon, 11 June 2007 15:56

Srivatsa Vaddagiri
Messages: 241
Registered: August 2006

Senior Member

This patch introduces the core changes in CFS required to accomplish
group fairness at higher levels. It also modifies load balance interface
between classes a bit, so that move_tasks (which is centric to load
balance) can be reused to balance between runqueues of various types
(struct rq in case of SCHED_RT tasks, struct lrq in case of
SCHED_NORMAL/BATCH tasks).

Signed-off-by : Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

---
 include/linux/sched.h |   14 ++
 kernel/sched.c        |  155 ++++++++++++++++--------------
 kernel/sched_fair.c   |  252 +++++++++++++++++++++++++++++++++++++++++++++-----
 kernel/sched_rt.c     |   49 ++++++++-
 4 files changed, 367 insertions(+), 103 deletions(-)

Index: current/include/linux/sched.h
===================================================================
--- current.orig/include/linux/sched.h	2007-06-09 15:04:54.000000000 +0530
+++ current/include/linux/sched.h	2007-06-09 15:07:37.000000000 +0530
@@ -866,8 +866,13 @@
 	struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
 
-	struct task_struct * (*load_balance_start) (struct rq *rq);
-	struct task_struct * (*load_balance_next) (struct rq *rq);
+#ifdef CONFIG_SMP
+	int (*load_balance) (struct rq *this_rq, int this_cpu,
+			struct rq *busiest,
+		 	unsigned long max_nr_move, unsigned long max_load_move,
+			struct sched_domain *sd, enum idle_type idle,
+			int *all_pinned, unsigned long *total_load_moved);
+#endif
 	void (*task_tick) (struct rq *rq, struct task_struct *p);
 	void (*task_new) (struct rq *rq, struct task_struct *p);
 };
@@ -893,6 +898,11 @@
 	s64 fair_key;
 	s64 sum_wait_runtime, sum_sleep_runtime;
 	unsigned long wait_runtime_overruns, wait_runtime_underruns;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct sched_entity *parent;
+	struct lrq *lrq,   /* runqueue on which this entity is (to be) queued */
+		   *my_q;  /* runqueue "owned" by this entity/group */
+#endif
 };
 
 struct task_struct {
Index: current/kernel/sched.c
===================================================================
--- current.orig/kernel/sched.c	2007-06-09 15:07:36.000000000 +0530
+++ current/kernel/sched.c	2007-06-09 15:07:37.000000000 +0530
@@ -133,6 +133,20 @@
 	struct rb_root tasks_timeline;
 	struct rb_node *rb_leftmost;
 	struct rb_node *rb_load_balance_curr;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct sched_entity *curr;
+	struct rq *rq;
+
+	/* leaf lrqs are those that hold tasks (lowest schedulable entity in a
+	 * hierarchy). Non-leaf lrqs hold other higher schedulable entities
+	 * (like users, containers etc.)
+	 *
+	 * leaf_lrq_list ties together list of leaf lrq's in a cpu. This list
+	 * is used during load balance.
+	 */
+	struct list_head leaf_lrq_list;
+#endif
 };
 
 /*
@@ -161,6 +175,9 @@
 #endif
 #endif
 	struct lrq lrq;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct list_head leaf_lrq_list;	/* list of leaf lrqs on this cpu */
+#endif
 
 	u64 nr_switches;
 
@@ -619,6 +636,16 @@
 }
 
 static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
+#ifdef CONFIG_SMP
+static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+		      unsigned long max_nr_move, unsigned long max_load_move,
+		      struct sched_domain *sd, enum idle_type idle,
+		      int *all_pinned, unsigned long *load_moved,
+		      int this_best_prio, int best_prio, int best_prio_seen,
+		      void *iterator_arg,
+		      struct task_struct *(*iterator_start)(void *arg),
+		      struct task_struct *(*iterator_next)(void *arg));
+#endif
 
 #include "sched_stats.h"
 #include "sched_rt.c"
@@ -757,6 +784,9 @@
 static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
 	task_thread_info(p)->cpu = cpu;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	p->se.lrq = &cpu_rq(cpu)->lrq;
+#endif
 }
 
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@ -781,6 +811,9 @@
 
 	task_thread_info(p)->cpu = new_cpu;
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	p->se.lrq = &new_rq->lrq;
+#endif
 }
 
 struct migration_req {
@@ -1761,89 +1794,28 @@
 	return 1;
 }
 
-/*
- * Load-balancing iterator: iterate through the hieararchy of scheduling
- * classes, starting with the highest-prio one:
- */
-
-struct task_struct * load_balance_start(struct rq *rq)
-{
-	struct sched_class *class = sched_class_highest;
-	struct task_struct *p;
-
-	do {
-		p = class->load_balance_start(rq);
-		if (p) {
-			rq->load_balance_class = class;
-			return p;
-		}
-		class = class->next;
-	} while (class);
-
-	return NULL;
-}
-
-struct task_struct * load_balance_next(struct rq *rq)
-{
-	struct sched_class *class = rq->load_balance_class;
-	struct task_struct *p;
-
-	p = class->load_balance_next(rq);
-	if (p)
-		return p;
-	/*
-	 * Pick up the next class (if any) and attempt to start
-	 * the iterator there:
-	 */
-	while ((class = class->next)) {
-		p = class->load_balance_start(rq);
-		if (p) {
-			rq->load_balance_class = class;
-			return p;
-		}
-	}
-	return NULL;
-}
-
-#define rq_best_prio(rq) (rq)->curr->prio
-
-/*
- * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
- * load from busiest to this_rq, as part of a balancing operation within
- * "domain". Returns the number of tasks moved.
- *
- * Called with both runqueues locked.
- */
-static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      unsigned long max_nr_move, unsigned long max_load_move,
 		      struct sched_domain *sd, enum idle_type idle,
-		      int *all_pinned)
+		      int *all_pinned, unsigned long *load_moved,
+		      int this_best_prio, int best_prio, int best_prio_seen,
+		      void *iterator_arg,
+		      struct task_struct *(*iterator_start)(void *arg),
+		      struct task_struct *(*iterator_next)(void *arg))
 {
-	int pulled = 0, pinned = 0, this_best_prio, best_prio,
-	    best_prio_seen, skip_for_load;
+	int pulled = 0, pinned = 0, skip_for_load;
 	struct task_struct *p;
-	long rem_load_move;
+	long rem_load_move = max_load_move;
 
 	if (max_nr_move == 0 || max_load_move == 0)
 		goto out;
 
-	rem_load_move = max_load_move;
 	pinned = 1;
-	this_best_prio = rq_best_prio(this_rq);
-	best_prio = rq_best_prio(busiest);
-	/*
-	 * Enable handling of the case where there is more than one task
-	 * with the best priority.   If the current running task is one
-	 * of those with prio==best_prio we know it won't be moved
-	 * and therefore it's safe to override the skip (based on load) of
-	 * any task we find with that prio.
-	 */
-	best_prio_seen = best_prio == busiest->curr->prio;
 
 	/*
 	 * Start the load-balancing iterator:
 	 */
-	p = load_balance_start(busiest);
+	p = (*iterator_start)(iterator_arg);
 next:
 	if (!p)
 		goto out;
@@ -1860,7 +1832,7 @@
 	    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
 
 		best_prio_seen |= p->prio == best_prio;
-		p = load_balance_next(busiest);
+		p = (*iterator_next)(iterator_arg);
 		goto next;
 	}
 
@@ -1875,7 +1847,7 @@
 	if (pulled < max_nr_move && rem_load_move > 0) {
 		if (p->prio < this_best_prio)
 			this_best_prio = p->prio;
-		p = load_balance_next(busiest);
+		p = (*iterator_next)(iterator_arg);
 		goto next;
 	}
 out:
@@ -1888,10 +1860,39 @@
 
 	if (all_pinned)
 		*all_pinned = pinned;
+	*load_moved = max_load_move - rem_load_move;
 	return pulled;
 }
 
 /*
+ * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
+ * load from busiest to this_rq, as part of a balancing operation within
+ * "domain". Returns the number of tasks moved.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+		      unsigned long max_nr_move, unsigned long max_load_move,
+		      struct sched_domain *sd, enum idle_type idle,
+		      int *all_pinned)
+{
+	struct sched_class *class = sched_class_highest;
+	unsigned long load_moved, total_nr_moved = 0, nr_moved;
+
+	do {
+		nr_moved = class->load_balance(this_rq, this_cpu, busiest,
+					max_nr_move, max_load_move, sd, idle,
+					all_pinned, &load_moved);
+		total_nr_moved += nr_moved;
+		max_nr_move -= nr_moved;
+		max_load_move -= load_moved;
+		class = class->next;
+	} while (class && max_nr_move && max_load_move);
+
+	return total_nr_moved;
+}
+
+/*
  * find_busiest_group finds and returns the busiest CPU group within the
  * domain. It calculates and returns the amount of weighted load which
  * should be moved to restore balance via the imbalance parameter.
@@ -4503,6 +4504,9 @@
 #else
 	task_thread_info(idle)->preempt_count = 0;
 #endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	idle->se.lrq = &rq->lrq;
+#endif
 }
 
 /*
@@ -6086,6 +6090,9 @@
 	lrq->nr_running = 0;
 	for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
 		lrq->cpu_load[j] = 0;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	lrq->rq = rq;
+#endif
 }
 
 void __init sched_init(void)
@@ -6110,6 +6117,10 @@
 		rq->nr_running = 0;
 		rq->clock = 1;
 		init_lrq(&rq->lrq, rq);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+		INIT_LIST_HEAD(&rq->leaf_lrq_list);
+		list_add(&rq->lrq.leaf_lrq_list, &rq->leaf_lrq_list);
+#endif
 
 #ifdef CONFIG_SMP
 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
Index: current/kernel/sched_fair.c
===================================================================
--- current.orig/kernel/sched_fair.c	2007-06-09 15:07:36.000000000 +0530
+++ current/kernel/sched_fair.c	2007-06-09 15:07:37.000000000 +0530
@@ -46,6 +46,29 @@
 /*            BEGIN : CFS operations on generic schedulable entities          */
 /******************************************************************************/
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+/* cpu runqueue to which this lrq is attached */
+static inline struct rq *lrq_rq(struc

...

[ Show the rest of the message ]

Report message to a moderator

[Message index]

		[RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:47
		[RFC][PATCH 1/6] Introduce struct sched_entity and struct lrq By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:50
		Re: [ckrm-tech] [RFC][PATCH 1/6] Introduce struct sched_entity and struct lrq By: Balbir Singh on Tue, 12 June 2007 02:15
		Re: [ckrm-tech] [RFC][PATCH 1/6] Introduce struct sched_entity and struct lrq By: Srivatsa Vaddagiri on Tue, 12 June 2007 03:52
		[RFC][PATCH 2/6] task's cpu information needs to be always correct By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:52
		Re: [ckrm-tech] [RFC][PATCH 2/6] task's cpu information needs to be always correct By: Balbir Singh on Tue, 12 June 2007 02:17
		[RFC][PATCH 3/6] core changes in CFS By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:53
		Re: [RFC][PATCH 3/6] core changes in CFS By: Balbir Singh on Tue, 12 June 2007 02:29
		Re: [RFC][PATCH 3/6] core changes in CFS By: Srivatsa Vaddagiri on Tue, 12 June 2007 04:22
		[RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:55
		Re: [RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Dmitry Adamushko on Tue, 12 June 2007 09:03
		Re: [RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Srivatsa Vaddagiri on Tue, 12 June 2007 10:26
		Re: [RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Dmitry Adamushko on Tue, 12 June 2007 12:23
		Re: [RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Srivatsa Vaddagiri on Tue, 12 June 2007 13:30
		Re: [RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Dmitry Adamushko on Tue, 12 June 2007 14:31
		Re: [RFC][PATCH 4/6] Fix (bad?) interactions between SCHED_RT and SCHED_NORMAL tasks By: Srivatsa Vaddagiri on Tue, 12 June 2007 15:43
		[RFC][PATCH 5/6] core changes for group fairness By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:56
		Re: [RFC][PATCH 5/6] core changes for group fairness By: Srivatsa Vaddagiri on Thu, 14 June 2007 12:06
		[RFC][PATCH 6/6] Hook up to container infrastructure By: Srivatsa Vaddagiri on Mon, 11 June 2007 15:58
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Srivatsa Vaddagiri on Mon, 11 June 2007 16:02
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Ingo Molnar on Mon, 11 June 2007 19:37
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Ingo Molnar on Mon, 11 June 2007 19:39
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Srivatsa Vaddagiri on Tue, 12 June 2007 05:50
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Ingo Molnar on Tue, 12 June 2007 06:26
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: dev on Fri, 15 June 2007 12:46
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Srivatsa Vaddagiri on Fri, 15 June 2007 14:06
		Re: [RFC][PATCH 0/6] Add group fairness to CFS - v1 By: Srivatsa Vaddagiri on Tue, 12 June 2007 10:56

Previous Topic:	[PATCH] diskquota: 32bit quota tools on 64bit architectures
Next Topic:	[PATCH 01/17] Pid-NS(V3) Define and use task_active_pid_ns() wrapper

Goto Forum:

-=] Back to Top [=-

[ Syndicate this forum (XML) ] [

] [

]

Current Time: Sun Nov 30 12:45:58 GMT 2025

Total time taken to generate the page: 0.54342 seconds