This patch introduced to control cfq_data.
Its algorithm is similar to one when CFQ synchronous I/O.
The new cfq optional operations:
The "cfq_dispatch_requests_fn" defines a function which is implemented
request dispatching algorithm.
This becomes main function for fairness.
The "cfq_completed_request_after_fn" defines a function which winds up I/O's
affairs.
The "cfq_active_check_fn" defines a function which make sure whether selecting cfq_data is equal to active cfq_data.
The "cfq_empty_fn" defines a function which check whether active data exists.
   Signed-off-by: Satoshi UCHIDA <uchida@ap.jp.nec.com>
diff --git a/block/cfq-cgroup.c b/block/cfq-cgroup.c
index 568e433..f040c98 100644
--- a/block/cfq-cgroup.c
+++ b/block/cfq-cgroup.c
@@ -15,9 +15,35 @@
 #include <linux/cgroup.h>
 #include <linux/cfq-iosched.h>
 
+
 #define CFQ_CGROUP_SLICE_SCALE		(5)
 #define CFQ_CGROUP_MAX_IOPRIO		(8)
 
+static const int cfq_cgroup_slice = HZ / 10;
+
+enum cfqd_state_flags {
+	CFQ_CFQD_FLAG_on_rr = 0,	/* on round-robin busy list */
+	CFQ_CFQD_FLAG_slice_new,	/* no requests dispatched in slice */
+};
+
+#define CFQ_CFQD_FNS(name)						\
+static inline void cfq_mark_cfqd_##name(struct cfq_data *cfqd)		\
+{									\
+	(cfqd)->flags |= (1 << CFQ_CFQD_FLAG_##name);			\
+}									\
+static inline void cfq_clear_cfqd_##name(struct cfq_data *cfqd)	\
+{									\
+	(cfqd)->flags &= ~(1 << CFQ_CFQD_FLAG_##name);			\
+}									\
+static inline int cfq_cfqd_##name(const struct cfq_data *cfqd)		\
+{									\
+	return ((cfqd)->flags & (1 << CFQ_CFQD_FLAG_##name)) != 0;	\
+}
+
+CFQ_CFQD_FNS(on_rr);
+CFQ_CFQD_FNS(slice_new);
+#undef CFQ_CFQD_FNS
+
 static const int cfq_cgroup_slice_idle = HZ / 125;
 
 struct cfq_cgroup {
@@ -45,6 +71,7 @@ static inline struct cfq_cgroup *task_to_cfq_cgroup(struct task_struct *tsk)
  * Add device or cgroup data functions.
  */
 struct cfq_data *__cfq_cgroup_init_queue(struct request_queue *q, void *data);
+static void cfq_cgroup_idle_slice_timer(unsigned long data);
 
 static struct cfq_meta_data *cfq_cgroup_init_meta_data(struct cfq_data *cfqd, struct request_queue *q)
 {
@@ -61,13 +88,18 @@ static struct cfq_meta_data *cfq_cgroup_init_meta_data(struct cfq_data *cfqd, st
 	cfqmd->cfq_driv_d.last_end_request = jiffies;
        
 	init_timer(&cfqmd->cfq_driv_d.idle_slice_timer);
-	cfqmd->cfq_driv_d.idle_slice_timer.function = cfq_idle_slice_timer;
-	cfqmd->cfq_driv_d.idle_slice_timer.data = (unsigned long) cfqd;
+	cfqmd->cfq_driv_d.idle_slice_timer.function = cfq_cgroup_idle_slice_timer;
+	cfqmd->cfq_driv_d.idle_slice_timer.data = (unsigned long) cfqmd;
 	cfqmd->cfq_driv_d.cfq_slice_idle = cfq_cgroup_slice_idle;
 
 	cfqmd->sibling_tree = RB_ROOT;
 	cfqmd->siblings = 0;
 
+	cfqmd->service_tree = CFQ_RB_ROOT;
+	cfqmd->busy_data = 0;
+
+	cfqmd->cfq_slice = cfq_cgroup_slice;
+
 	return cfqmd;
 }
 
@@ -170,6 +202,8 @@ struct cfq_data *__cfq_cgroup_init_queue(struct request_queue *q, void *data)
 
 	RB_CLEAR_NODE(&cfqd->sib_node);
   	RB_CLEAR_NODE(&cfqd->group_node);
+  	RB_CLEAR_NODE(&cfqd->rb_node);
+	cfqd->rb_key = 0;
 
 	if (!cfqmd) {
        		cfqmd = cfq_cgroup_init_meta_data(cfqd, q);
@@ -424,7 +458,295 @@ struct cfq_data *cfq_cgroup_search_data(void *data,
 }
 
 
+/*
+ *  service tree control.
+ */
+static inline int cfq_cgroup_slice_used(struct cfq_data *cfqd)
+{
+  	if (cfq_cfqd_slice_new(cfqd))
+		return 0;
+  	if (time_before(jiffies, cfqd->slice_end))
+		return 0;
+
+	return 1;
+}
+
+
+static inline int cfq_cgroup_prio_slice(struct cfq_data *cfqd,
+				 unsigned short prio)
+{
+	const int base_slice = cfqd->cfqmd->cfq_slice;
+
+	WARN_ON(prio >= IOPRIO_BE_NR);
+
+	return base_slice + (base_slice/CFQ_CGROUP_SLICE_SCALE *
+			     (CFQ_CGROUP_MAX_IOPRIO / 2 - prio));
+}
+
+static inline void
+cfq_cgroup_set_prio_slice(struct cfq_data *cfqd)
+{
+	cfqd->slice_end = cfq_cgroup_prio_slice(cfqd, cfqd->cfqc->ioprio) + jiffies;
+}
+
+static unsigned long cfq_cgroup_slice_offset(struct cfq_data *cfqd)
+{
+	return (cfqd->cfqmd->busy_data - 1) * 
+		(cfq_cgroup_prio_slice(cfqd, 0) -
+		 cfq_cgroup_prio_slice(cfqd, cfqd->cfqc->ioprio));
+}
+
+static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd,
+				 int add_front)
+{
+	struct rb_node **p, *parent;
+	struct cfq_data *__cfqd;
+	struct cfq_meta_data *cfqmd = cfqd->cfqmd;
+	unsigned long rb_key;
+	int left;
+
+	if (!add_front) {
+		rb_key = cfq_cgroup_slice_offset(cfqd) + jiffies;
+		rb_key += cfqd->slice_resid;
+		cfqd->slice_resid = 0;
+	} else
+		rb_key = 0;
+
+	if (!RB_EMPTY_NODE(&cfqd->rb_node)) {
+		if (rb_key == cfqd->rb_key)
+			return;
+		cfq_rb_erase(&cfqd->rb_node, &cfqmd->service_tree);
+	}
+
+	left = 1;
+	parent = NULL;
+	p = &cfqmd->service_tree.rb.rb_node;
+	while (*p) {
+		struct rb_node **n;
+
+		parent = *p;
+		__cfqd = rb_entry(parent, struct cfq_data, rb_node);
+
+		if (rb_key < __cfqd->rb_key)
+			n = &(*p)->rb_left;
+		else
+			n = &(*p)->rb_right;
+
+		if (n == &(*p)->rb_right)
+			left = 0;
+
+		p = n;
+	}
+
+	if (left)
+		cfqmd->service_tree.left = &cfqd->rb_node;
+
+	cfqd->rb_key = rb_key;
+	rb_link_node(&cfqd->rb_node, parent, p);
+	rb_insert_color(&cfqd->rb_node, &cfqmd->service_tree.rb);
+}
+
+
+static void
+__cfq_cgroup_slice_expired(struct cfq_meta_data *cfqmd,struct cfq_data *cfqd,
+			   int timed_out)
+{
+	if (timed_out && !cfq_cfqd_slice_new(cfqd))
+		cfqd->slice_resid = cfqd->slice_end - jiffies;
+	
+	if (cfq_cfqd_on_rr(cfqd)) {
+		cfq_cgroup_service_tree_add(cfqd,0);
+	}
+
+	if (cfqd == cfqmd->active_data) {
+		cfqmd->active_data = NULL;
+	}
+}
+
+static inline void
+cfq_cgroup_slice_expired(struct cfq_meta_data *cfqmd, int timed_out)
+{
+	struct cfq_data *cfqd = cfqmd->active_data;
+
+	if (cfqd) {
+		cfq_slice_expired(cfqd, 1);
+		__cfq_cgroup_slice_expired(cfqmd, cfqd, timed_out);
+	}
+}
+
+static struct cfq_data *cfq_cgroup_rb_first(struct cfq_rb_root *root)
+{
+	if (!root->left)
+		root->left = rb_first(&root->rb);
+
+	if (root->left)
+		return rb_entry(root->left, struct cfq_data, rb_node);
+
+	return NULL;
+}
+
+static struct cfq_data *cfq_cgroup_get_next_data(struct cfq_meta_data *cfqmd)
+{
+	if (RB_EMPTY_ROOT(&cfqmd->service_tree.rb))
+		return NULL;
+
+	return cfq_cgroup_rb_first(&cfqmd->service_tree);
+}
+
+static void
+__cfq_cgroup_set_active_data(struct cfq_meta_data *cfqmd,
+			     struct cfq_data *cfqd)
+{
+	if (cfqd) {
+		cfqd->slice_end = 0;
+		cfq_mark_cfqd_slice_new(cfqd);
+	}
+
+	cfqmd->active_data = cfqd;
+}
+
+static struct cfq_data *cfq_cgroup_set_active_data(struct cfq_meta_data *cfqmd)
+{
+	struct cfq_data *cfqd;
+
+	cfqd = cfq_cgroup_get_next_data(cfqmd);
+	__cfq_cgroup_set_active_data(cfqmd , cfqd);
+
+	return cfqd;
+}
+
+struct cfq_data *cfq_cgroup_select_data(struct cfq_meta_data *cfqmd)
+{
+	struct cfq_data *cfqd;
+
+	cfqd = cfqmd->active_data;
+	if (!cfqd)
+		goto new_data;
+
+	if (cfq_cgroup_slice_used(cfqd))
+		goto expire;
+
+	if (!RB_EMPTY_ROOT(&cfqd->service_tree.rb))
+		goto keep_data;
+	
+	if (wait_request_checker(cfqd))
+		goto keep_data;
+	
+expire:
+	cfq_cgroup_slice_expired(cfqmd, 0);
+new_data:
+	cfqd = cfq_cgroup_set_active_data(cfqmd);
+keep_data:
+	return cfqd;
+}
+
+int cfq_cgroup_forced_dispatch(struct cfq_data *cfqd)
+{
+	struct cfq_meta_data *cfqmd = cfqd->cfqmd;
+	int dispatched = 0;
+
+	while ((cfqd = cfq_cgroup_rb_first(&cfqmd->service_tree)) != NULL)
+		dispatched += cfq_forced_dispatch(cfqd);
+
+	cfq_cgroup_slice_expired(cfqmd, 0);
+
+	BUG_ON(cfqmd->busy_data);
+
+	return dispatched;
+}
+
+int cfq_cgroup_dispatch_requests(struct cfq_data *cfqd, int force)
+{
+	struct cfq_meta_data *cfqmd = cfqd->cfqmd;
+	int dispatched;
+
+
+	if (!cfqmd->busy_data)
+		return 0;
+
+	if (unlikely(force))
+		return cfq_cgroup_forced_dispatch(cfqd);
+
+	dispatched = 0;
+	cfqd = cfq_cgroup_select_data(cfqmd);
+
+	if (cfqd)
+		dispatched = cfq_queue_dispatch_requests(cfqd, force);
+
+	return dispatched;
+}
+
+/*
+ * Timer running if the active_queue is currently idling inside its time slice
+ */
+static void cfq_cgroup_idle_slice_timer(unsigned long data)
+{
+	struct cfq_meta_data *cfqmd = (struct cfq_meta_data *) data;
+	struct cfq_data *cfqd = cfqmd->elv_data;
+	int timed_out = 1;
+	unsigned long flags;
+
+
+	spin_lock_irqsave(CFQ_DRV_UNIQ_DATA(cfqd).queue->queue_lock, flags);
+
+	cfqd = cfqmd->active_data;
+	if (cfqd) {
+		timed_out = 0;
+
+		if (cfq_cgroup_slice_used(cfqd))
+			goto expire_cgroup;
+
+		if (!cfqmd->busy_data)
+			goto out_cont;
+
+		if (__cfq_idle_slice_timer(cfqd))
+			goto out_cont;
+		else
+			goto out_kick;
+		
+	}
+expire_cgroup:
+	cfq_cgroup_slice_expired(cfqmd, timed_out);
+out_kick:	
+	cfq_schedule_dispatch(cfqmd->elv_data);
+out_cont:
+	spin_unlock_irqrestore(CFQ_DRV_UNIQ_DATA(cfqd).queue->queue_lock, flags);
+}
+
+int cfq_cgroup_completed_request_after(struct cfq_data *cfqd)
+{
+	if (cfqd->cfqmd->active_data == cfqd) {
+		if (cfq_cfqd_slice_new(cfqd)) {
+			cfq_cgroup_set_prio_slice(cfqd);
+			cfq_clear_cfqd_slice_new(cfqd);
+
+		}
+		if (cfq_cgroup_slice_used(cfqd)) {
+			cfq_cgroup_slice_expired(cfqd->cfqmd, 1);
+			return 0;
+		}	     
+		return 1;
+	}
+
+	return 0;
+}
+
+static int cfq_cgroup_queue_empty(struct cfq_data *cfqd)
+{
+	return !cfqd->cfqmd->busy_data;
+}
+
+static int cfq_cgroup_active_data_check(struct cfq_data *cfqd)
+{
+	return (cfqd->cfqmd->active_data == cfqd);
+}
+
 struct cfq_ops opt = {
 	.cfq_init_queue_fn = __cfq_cgroup_init_queue,
 	.cfq_exit_queue_fn = __cfq_cgroup_exit_data,
+	.cfq_search_data_fn = cfq_cgroup_search_data,
+	.cfq_dispatch_requests_fn = cfq_cgroup_dispatch_requests,
+	.cfq_completed_request_a
...