This patch introduces two new structures:
struct sched_entity
stores essential attributes/execution-history used by CFS core
to drive fairness between 'schedulable entities' (tasks, users etc)
struct lrq
runqueue used to hold ready-to-run entities
These new structures are formed by grouping together existing fields in
existing structures (task_struct and rq) and hence represents rework
with zero functionality change.
Signed-off-by : Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
---
fs/proc/array.c | 4
include/linux/sched.h | 43 +++++----
kernel/exit.c | 2
kernel/posix-cpu-timers.c | 16 +--
kernel/sched.c | 186 +++++++++++++++++++++-------------------
kernel/sched_debug.c | 86 +++++++++---------
kernel/sched_fair.c | 211 +++++++++++++++++++++++-----------------------
kernel/sched_rt.c | 14 +--
8 files changed, 289 insertions(+), 273 deletions(-)
Index: current/include/linux/sched.h
===================================================================
--- current.orig/include/linux/sched.h 2007-06-09 15:01:39.000000000 +0530
+++ current/include/linux/sched.h 2007-06-09 15:04:54.000000000 +0530
@@ -872,6 +872,29 @@
void (*task_new) (struct rq *rq, struct task_struct *p);
};
+/* CFS stats for a schedulable entity (task, task-group etc) */
+struct sched_entity {
+ int load_weight; /* for niceness load balancing purposes */
+ int on_rq;
+ struct rb_node run_node;
+ u64 wait_start_fair;
+ u64 wait_start;
+ u64 exec_start;
+ u64 sleep_start, sleep_start_fair;
+ u64 block_start;
+ u64 sleep_max;
+ u64 block_max;
+ u64 exec_max;
+ u64 wait_max;
+ u64 last_ran;
+
+ s64 wait_runtime;
+ u64 sum_exec_runtime;
+ s64 fair_key;
+ s64 sum_wait_runtime, sum_sleep_runtime;
+ unsigned long wait_runtime_overruns, wait_runtime_underruns;
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -886,33 +909,15 @@
int oncpu;
#endif
#endif
- int load_weight; /* for niceness load balancing purposes */
int prio, static_prio, normal_prio;
- int on_rq;
struct list_head run_list;
- struct rb_node run_node;
+ struct sched_entity se;
unsigned short ioprio;
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
- /* CFS scheduling class statistics fields: */
- u64 wait_start_fair;
- u64 wait_start;
- u64 exec_start;
- u64 sleep_start, sleep_start_fair;
- u64 block_start;
- u64 sleep_max;
- u64 block_max;
- u64 exec_max;
- u64 wait_max;
-
- s64 wait_runtime;
- u64 sum_exec_runtime;
- s64 fair_key;
- s64 sum_wait_runtime, sum_sleep_runtime;
- unsigned long wait_runtime_overruns, wait_runtime_underruns;
unsigned int policy;
cpumask_t cpus_allowed;
Index: current/fs/proc/array.c
===================================================================
--- current.orig/fs/proc/array.c 2007-06-09 15:01:39.000000000 +0530
+++ current/fs/proc/array.c 2007-06-09 15:04:54.000000000 +0530
@@ -329,7 +329,7 @@
* Use CFS's precise accounting, if available:
*/
if (!(sysctl_sched_features & 128)) {
- u64 temp = (u64)nsec_to_clock_t(p->sum_exec_runtime);
+ u64 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
if (total) {
temp *= utime;
@@ -351,7 +351,7 @@
* by userspace grows monotonically - apps rely on that):
*/
if (!(sysctl_sched_features & 128))
- stime = nsec_to_clock_t(p->sum_exec_runtime) - task_utime(p);
+ stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
return stime;
}
Index: current/kernel/exit.c
===================================================================
--- current.orig/kernel/exit.c 2007-06-09 14:56:50.000000000 +0530
+++ current/kernel/exit.c 2007-06-09 15:04:54.000000000 +0530
@@ -126,7 +126,7 @@
sig->nivcsw += tsk->nivcsw;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
- sig->sum_sched_runtime += tsk->sum_exec_runtime;
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
Index: current/kernel/posix-cpu-timers.c
===================================================================
--- current.orig/kernel/posix-cpu-timers.c 2007-06-09 15:01:39.000000000 +0530
+++ current/kernel/posix-cpu-timers.c 2007-06-09 15:04:54.000000000 +0530
@@ -249,7 +249,7 @@
cpu->sched = p->signal->sum_sched_runtime;
/* Add in each other live thread. */
while ((t = next_thread(t)) != p) {
- cpu->sched += t->sum_exec_runtime;
+ cpu->sched += t->se.sum_exec_runtime;
}
cpu->sched += sched_ns(p);
break;
@@ -467,7 +467,7 @@
void posix_cpu_timers_exit(struct task_struct *tsk)
{
cleanup_timers(tsk->cpu_timers,
- tsk->utime, tsk->stime, tsk->sum_exec_runtime);
+ tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
@@ -475,7 +475,7 @@
cleanup_timers(tsk->signal->cpu_timers,
cputime_add(tsk->utime, tsk->signal->utime),
cputime_add(tsk->stime, tsk->signal->stime),
- tsk->sum_exec_runtime + tsk->signal->sum_sched_runtime);
+ tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
}
@@ -536,7 +536,7 @@
nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
- ns = t->sum_exec_runtime + nsleft;
+ ns = t->se.sum_exec_runtime + nsleft;
if (t->it_sched_expires == 0 ||
t->it_sched_expires > ns) {
t->it_sched_expires = ns;
@@ -1004,7 +1004,7 @@
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || tsk->sum_exec_runtime < t->expires.sched) {
+ if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
tsk->it_sched_expires = t->expires.sched;
break;
}
@@ -1049,7 +1049,7 @@
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
- sum_sched_runtime += t->sum_exec_runtime;
+ sum_sched_runtime += t->se.sum_exec_runtime;
t = next_thread(t);
} while (t != tsk);
ptime = cputime_add(utime, stime);
@@ -1208,7 +1208,7 @@
t->it_virt_expires = ticks;
}
- sched = t->sum_exec_runtime + sched_left;
+ sched = t->se.sum_exec_runtime + sched_left;
if (sched_expires && (t->it_sched_expires == 0 ||
t->it_sched_expires > sched)) {
t->it_sched_expires = sched;
@@ -1300,7 +1300,7 @@
if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
(tsk->it_sched_expires == 0 ||
- tsk->sum_exec_runtime < tsk->it_sched_expires))
+ tsk->se.sum_exec_runtime < tsk->it_sched_expires))
return;
#undef UNEXPIRED
Index: current/kernel/sched.c
===================================================================
--- current.orig/kernel/sched.c 2007-06-09 15:01:39.000000000 +0530
+++ current/kernel/sched.c 2007-06-09 15:07:17.000000000 +0530
@@ -116,6 +116,23 @@
struct list_head queue[MAX_RT_PRIO];
};
+/* CFS-related fields in a runqueue */
+struct lrq {
+ unsigned long raw_weighted_load;
+ #define CPU_LOAD_IDX_MAX 5
+ unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+ unsigned long nr_load_updates;
+
+ u64 fair_clock, delta_fair_clock;
+ u64 exec_clock, delta_exec_clock;
+ s64 wait_runtime;
+ unsigned long wait_runtime_overruns, wait_runtime_underruns;
+
+ struct rb_root tasks_timeline;
+ struct rb_node *rb_leftmost;
+ struct rb_node *rb_load_balance_curr;
+};
+
/*
* This is the main, per-CPU runqueue data structure.
*
@@ -131,16 +148,13 @@
* remote CPUs use both these fields when doing load calculation.
*/
long nr_running;
- unsigned long raw_weighted_load;
- #define CPU_LOAD_IDX_MAX 5
- unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+ struct lrq lrq;
unsigned char idle_at_tick;
#ifdef CONFIG_NO_HZ
unsigned char in_nohz_recently;
#endif
u64 nr_switches;
- unsigned long nr_load_updates;
/*
* This is part of a global counter where only the total sum
@@ -156,10 +170,6 @@
u64 clock, prev_clock_raw;
s64 clock_max_delta;
- u64 fair_clock, delta_fair_clock;
- u64 exec_clock, delta_exec_clock;
- s64 wait_runtime;
- unsigned long wait_runtime_overruns, wait_runtime_underruns;
unsigned int clock_warps, clock_overflows;
unsigned int clock_unstable_events;
@@ -170,10 +180,6 @@
int rt_load_balance_idx;
struct list_head *rt_load_balance_head, *rt_load_balance_curr;
- struct rb_root tasks_timeline;
- struct rb_node *rb_leftmost;
- struct rb_node *rb_load_balance_curr;
-
atomic_t nr_iowait;
#ifdef CONFIG_SMP
@@ -583,13 +589,13 @@
static inline void
inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
{
- rq->raw_weighted_load += p->load_weight;
+ rq->lrq.raw_weighted_load += p->se.load_weight;
}
static inline void
dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
{
- rq->raw_weighted_load -= p->load_weight;
+ rq->lrq.raw_weighted_load -= p->se.load_weight;
}
static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
@@ -615,22 +621,22 @@
static void set_load_weight(struct task_struct *p)
{
- task_rq(p)->wait_runtime -= p->wait_runtime;
- p->wait_runtime = 0;
+ task_rq(p)->lrq.wait_runtime -= p->se.wait_runtime;
+ p->se.wait_runtime = 0;
if (has_rt_policy(p)) {
- p->load_weight = prio_to_weight[0] * 2;
+ p->se.load_weight = prio_to_weight[0] * 2;
return;
}
/*
* SCHED_IDLEPRIO tasks get minimal weight:
*/
if (p->policy == SCHED_IDLEPRIO) {
- p->load_weight = 1;
+ p->se.load_weight = 1;
return;
}
- p->load_weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
+ p->se.load_weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
}
static v
...