This patch introduces core changes in CFS work to operate on generic
schedulable entities. The task specific operations (like enqueue, dequeue,
task_tick etc) is then rewritten to work off this generic CFS "library".
Signed-off-by : Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
---
kernel/sched_debug.c | 2
kernel/sched_fair.c | 574 ++++++++++++++++++++++++++++++---------------------
2 files changed, 345 insertions(+), 231 deletions(-)
Index: current/kernel/sched_fair.c
===================================================================
--- current.orig/kernel/sched_fair.c 2007-06-09 15:07:16.000000000 +0530
+++ current/kernel/sched_fair.c 2007-06-09 15:07:33.000000000 +0530
@@ -42,19 +42,54 @@
extern struct sched_class fair_sched_class;
+/******************************************************************************/
+/* BEGIN : CFS operations on generic schedulable entities */
+/******************************************************************************/
+
+static inline struct rq *lrq_rq(struct lrq *lrq)
+{
+ return container_of(lrq, struct rq, lrq);
+}
+
+static inline struct sched_entity *lrq_curr(struct lrq *lrq)
+{
+ struct rq *rq = lrq_rq(lrq);
+ struct sched_entity *se = NULL;
+
+ if (rq->curr->sched_class == &fair_sched_class)
+ se = &rq->curr->se;
+
+ return se;
+}
+
+static long lrq_nr_running(struct lrq *lrq)
+{
+ struct rq *rq = lrq_rq(lrq);
+
+ return rq->nr_running;
+}
+
+#define entity_is_task(se) 1
+
+static inline struct task_struct *entity_to_task(struct sched_entity *se)
+{
+ return container_of(se, struct task_struct, se);
+}
+
+
/**************************************************************/
/* Scheduling class tree data structure manipulation methods:
*/
/*
- * Enqueue a task into the rb-tree:
+ * Enqueue a entity into the rb-tree:
*/
-static inline void __enqueue_task_fair(struct rq *rq, struct task_struct *p)
+static inline void __enqueue_entity(struct lrq *lrq, struct sched_entity *p)
{
- struct rb_node **link = &rq->lrq.tasks_timeline.rb_node;
+ struct rb_node **link = &lrq->tasks_timeline.rb_node;
struct rb_node *parent = NULL;
- struct task_struct *entry;
- s64 key = p->se.fair_key;
+ struct sched_entity *entry;
+ s64 key = p->fair_key;
int leftmost = 1;
/*
@@ -62,12 +97,12 @@
*/
while (*link) {
parent = *link;
- entry = rb_entry(parent, struct task_struct, se.run_node);
+ entry = rb_entry(parent, struct sched_entity, run_node);
/*
* We dont care about collisions. Nodes with
* the same key stay together.
*/
- if ((s64)(key - entry->se.fair_key) < 0) {
+ if ((s64)(key - entry->fair_key) < 0) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
@@ -80,31 +115,31 @@
* used):
*/
if (leftmost)
- rq->lrq.rb_leftmost = &p->se.run_node;
+ lrq->rb_leftmost = &p->run_node;
- rb_link_node(&p->se.run_node, parent, link);
- rb_insert_color(&p->se.run_node, &rq->lrq.tasks_timeline);
+ rb_link_node(&p->run_node, parent, link);
+ rb_insert_color(&p->run_node, &lrq->tasks_timeline);
}
-static inline void __dequeue_task_fair(struct rq *rq, struct task_struct *p)
+static inline void __dequeue_entity(struct lrq *lrq, struct sched_entity *p)
{
- if (rq->lrq.rb_leftmost == &p->se.run_node)
- rq->lrq.rb_leftmost = NULL;
- rb_erase(&p->se.run_node, &rq->lrq.tasks_timeline);
+ if (lrq->rb_leftmost == &p->run_node)
+ lrq->rb_leftmost = NULL;
+ rb_erase(&p->run_node, &lrq->tasks_timeline);
}
-static inline struct rb_node * first_fair(struct rq *rq)
+static inline struct rb_node * first_fair(struct lrq *lrq)
{
- if (rq->lrq.rb_leftmost)
- return rq->lrq.rb_leftmost;
+ if (lrq->rb_leftmost)
+ return lrq->rb_leftmost;
/* Cache the value returned by rb_first() */
- rq->lrq.rb_leftmost = rb_first(&rq->lrq.tasks_timeline);
- return rq->lrq.rb_leftmost;
+ lrq->rb_leftmost = rb_first(&lrq->tasks_timeline);
+ return lrq->rb_leftmost;
}
-static struct task_struct * __pick_next_task_fair(struct rq *rq)
+static struct sched_entity * __pick_next_entity(struct lrq *lrq)
{
- return rb_entry(first_fair(rq), struct task_struct, se.run_node);
+ return rb_entry(first_fair(lrq), struct sched_entity, run_node);
}
/**************************************************************/
@@ -116,21 +151,21 @@
* nice level, but only linearly, not exponentially:
*/
static u64
-niced_granularity(struct task_struct *curr, unsigned long granularity)
+niced_granularity(struct sched_entity *curr, unsigned long granularity)
{
/*
* Negative nice levels get the same granularity as nice-0:
*/
- if (curr->se.load_weight >= NICE_0_LOAD)
+ if (curr->load_weight >= NICE_0_LOAD)
return granularity;
/*
* Positive nice level tasks get linearly finer
* granularity:
*/
- return curr->se.load_weight * (s64)(granularity / NICE_0_LOAD);
+ return curr->load_weight * (s64)(granularity / NICE_0_LOAD);
}
-static void limit_wait_runtime(struct rq *rq, struct task_struct *p)
+static void limit_wait_runtime(struct lrq *lrq, struct sched_entity *p)
{
s64 limit = sysctl_sched_runtime_limit;
@@ -138,30 +173,31 @@
* Niced tasks have the same history dynamic range as
* non-niced tasks:
*/
- if (p->se.wait_runtime > limit) {
- p->se.wait_runtime = limit;
- p->se.wait_runtime_overruns++;
- rq->lrq.wait_runtime_overruns++;
- }
- if (p->se.wait_runtime < -limit) {
- p->se.wait_runtime = -limit;
- p->se.wait_runtime_underruns++;
- rq->lrq.wait_runtime_underruns++;
+ if (p->wait_runtime > limit) {
+ p->wait_runtime = limit;
+ p->wait_runtime_overruns++;
+ lrq->wait_runtime_overruns++;
+ }
+ if (p->wait_runtime < -limit) {
+ p->wait_runtime = -limit;
+ p->wait_runtime_underruns++;
+ lrq->wait_runtime_underruns++;
}
}
-static void __add_wait_runtime(struct rq *rq, struct task_struct *p, s64 delta)
+static void
+__add_wait_runtime(struct lrq *lrq, struct sched_entity *p, s64 delta)
{
- p->se.wait_runtime += delta;
- p->se.sum_wait_runtime += delta;
- limit_wait_runtime(rq, p);
+ p->wait_runtime += delta;
+ p->sum_wait_runtime += delta;
+ limit_wait_runtime(lrq, p);
}
-static void add_wait_runtime(struct rq *rq, struct task_struct *p, s64 delta)
+static void add_wait_runtime(struct lrq *lrq, struct sched_entity *p, s64 delta)
{
- rq->lrq.wait_runtime -= p->se.wait_runtime;
- __add_wait_runtime(rq, p, delta);
- rq->lrq.wait_runtime += p->se.wait_runtime;
+ lrq->wait_runtime -= p->wait_runtime;
+ __add_wait_runtime(lrq, p, delta);
+ lrq->wait_runtime += p->wait_runtime;
}
static s64 div64_s(s64 divident, unsigned long divisor)
@@ -183,49 +219,51 @@
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
*/
-static inline void update_curr(struct rq *rq, u64 now)
+static inline void update_curr(struct lrq *lrq, u64 now)
{
- unsigned long load = rq->lrq.raw_weighted_load;
+ unsigned long load = lrq->raw_weighted_load;
u64 delta_exec, delta_fair, delta_mine;
- struct task_struct *curr = rq->curr;
+ struct sched_entity *curr = lrq_curr(lrq);
+ struct rq *rq = lrq_rq(lrq);
+ struct task_struct *curtask = rq->curr;
- if (curr->sched_class != &fair_sched_class || curr == rq->idle || !load)
+ if (!curr || curtask == rq->idle || !load)
return;
/*
* Get the amount of time the current task was running
* since the last time we changed raw_weighted_load:
*/
- delta_exec = now - curr->se.exec_start;
+ delta_exec = now - curr->exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
- if (unlikely(delta_exec > curr->se.exec_max))
- curr->se.exec_max = delta_exec;
+ if (unlikely(delta_exec > curr->exec_max))
+ curr->exec_max = delta_exec;
- curr->se.sum_exec_runtime += delta_exec;
- curr->se.exec_start = now;
- rq->lrq.exec_clock += delta_exec;
+ curr->sum_exec_runtime += delta_exec;
+ curr->exec_start = now;
+ lrq->exec_clock += delta_exec;
delta_fair = delta_exec * NICE_0_LOAD;
delta_fair += load >> 1; /* rounding */
do_div(delta_fair, load);
/* Load-balancing accounting. */
- rq->lrq.delta_fair_clock += delta_fair;
- rq->lrq.delta_exec_clock += delta_exec;
+ lrq->delta_fair_clock += delta_fair;
+ lrq->delta_exec_clock += delta_exec;
/*
* Task already marked for preemption, do not burden
* it with the cost of not having left the CPU yet:
*/
if (unlikely(sysctl_sched_features & 1))
- if (unlikely(test_tsk_thread_flag(curr, TIF_NEED_RESCHED)))
+ if (unlikely(test_tsk_thread_flag(curtask, TIF_NEED_RESCHED)))
return;
- delta_mine = delta_exec * curr->se.load_weight;
+ delta_mine = delta_exec * curr->load_weight;
delta_mine += load >> 1; /* rounding */
do_div(delta_mine, load);
- rq->lrq.fair_clock += delta_fair;
+ lrq->fair_clock += delta_fair;
/*
* We executed delta_exec amount of time on the CPU,
* but we were only entitled to delta_mine amount of
@@ -233,21 +271,21 @@
* the two values are equal)
* [Note: delta_mine - delta_exec is negative]:
*/
- add_wait_runtime(rq, curr, delta_mine - delta_exec);
+ add_wait_runtime(lrq, curr, delta_mine - delta_exec);
}
static inline void
-update_stats_wait_start(struct rq *rq, struct task_struct *p, u64 now)
+update_stats_wait_start(struct lrq *lrq, struct sched_entity *p, u64 now)
{
- p->se.wait_start_fair = rq->lrq.fair_clock;
- p->se.wait_start = now;
+ p->wait_start_fair = lrq->fair_clock;
+ p->wait_start = now;
}
/*
* Task is being enqueued - update stats:
*/
static inline void
-update_stats_enqueue(struct rq *rq, str
...