This patch introduced to control cfq_data.
Its algorithm is similar to one when CFQ synchronous I/O.
The new cfq optional operations:
The "cfq_dispatch_requests_fn" defines a function which is implemented
request dispatching algorithm.
This becomes main function for fairness.
The "cfq_completed_request_after_fn" defines a function which winds up I/O's
affairs.
The "cfq_active_check_fn" defines a function which make sure whether selecting cfq_data is equal to active cfq_data.
The "cfq_empty_fn" defines a function which check whether active data exists.
Signed-off-by: Satoshi UCHIDA <uchida@ap.jp.nec.com>
---
block/cfq-cgroup.c | 326 ++++++++++++++++++++++++++++++++++++++++++-
block/cfq-iosched.c | 89 +++++++++---
include/linux/cfq-iosched.h | 41 ++++++-
3 files changed, 434 insertions(+), 22 deletions(-)
diff --git a/block/cfq-cgroup.c b/block/cfq-cgroup.c
index 6a8a219..27a9a7a 100644
--- a/block/cfq-cgroup.c
+++ b/block/cfq-cgroup.c
@@ -15,9 +15,35 @@
#include <linux/cgroup.h>
#include <linux/cfq-iosched.h>
+
#define CFQ_CGROUP_SLICE_SCALE (5)
#define CFQ_CGROUP_MAX_IOPRIO (8)
+static const int cfq_cgroup_slice = HZ / 10;
+
+enum cfqd_state_flags {
+ CFQ_CFQD_FLAG_on_rr = 0, /* on round-robin busy list */
+ CFQ_CFQD_FLAG_slice_new, /* no requests dispatched in slice */
+};
+
+#define CFQ_CFQD_FNS(name) \
+static inline void cfq_mark_cfqd_##name(struct cfq_data *cfqd) \
+{ \
+ (cfqd)->flags |= (1 << CFQ_CFQD_FLAG_##name); \
+} \
+static inline void cfq_clear_cfqd_##name(struct cfq_data *cfqd) \
+{ \
+ (cfqd)->flags &= ~(1 << CFQ_CFQD_FLAG_##name); \
+} \
+static inline int cfq_cfqd_##name(const struct cfq_data *cfqd) \
+{ \
+ return ((cfqd)->flags & (1 << CFQ_CFQD_FLAG_##name)) != 0; \
+}
+
+CFQ_CFQD_FNS(on_rr);
+CFQ_CFQD_FNS(slice_new);
+#undef CFQ_CFQD_FNS
+
static const int cfq_cgroup_slice_idle = HZ / 125;
struct cfq_cgroup {
@@ -45,6 +71,7 @@ static inline struct cfq_cgroup *task_to_cfq_cgroup(struct task_struct *tsk)
* Add device or cgroup data functions.
*/
struct cfq_data *__cfq_cgroup_init_queue(struct request_queue *q, void *data);
+static void cfq_cgroup_idle_slice_timer(unsigned long data);
static struct cfq_meta_data *cfq_cgroup_init_meta_data(struct cfq_data *cfqd, struct request_queue *q)
{
@@ -61,13 +88,18 @@ static struct cfq_meta_data *cfq_cgroup_init_meta_data(struct cfq_data *cfqd, st
cfqmd->cfq_driv_d.last_end_request = jiffies;
init_timer(&cfqmd->cfq_driv_d.idle_slice_timer);
- cfqmd->cfq_driv_d.idle_slice_timer.function = cfq_idle_slice_timer;
- cfqmd->cfq_driv_d.idle_slice_timer.data = (unsigned long) cfqd;
+ cfqmd->cfq_driv_d.idle_slice_timer.function = cfq_cgroup_idle_slice_timer;
+ cfqmd->cfq_driv_d.idle_slice_timer.data = (unsigned long) cfqmd;
cfqmd->cfq_driv_d.cfq_slice_idle = cfq_cgroup_slice_idle;
cfqmd->sibling_tree = RB_ROOT;
cfqmd->siblings = 0;
+ cfqmd->service_tree = CFQ_RB_ROOT;
+ cfqmd->busy_data = 0;
+
+ cfqmd->cfq_slice = cfq_cgroup_slice;
+
return cfqmd;
}
@@ -170,6 +202,8 @@ struct cfq_data *__cfq_cgroup_init_queue(struct request_queue *q, void *data)
RB_CLEAR_NODE(&cfqd->sib_node);
RB_CLEAR_NODE(&cfqd->group_node);
+ RB_CLEAR_NODE(&cfqd->rb_node);
+ cfqd->rb_key = 0;
if (!cfqmd) {
cfqmd = cfq_cgroup_init_meta_data(cfqd, q);
@@ -424,7 +458,295 @@ struct cfq_data *cfq_cgroup_search_data(void *data,
}
+/*
+ * service tree control.
+ */
+static inline int cfq_cgroup_slice_used(struct cfq_data *cfqd)
+{
+ if (cfq_cfqd_slice_new(cfqd))
+ return 0;
+ if (time_before(jiffies, cfqd->slice_end))
+ return 0;
+
+ return 1;
+}
+
+
+static inline int cfq_cgroup_prio_slice(struct cfq_data *cfqd,
+ unsigned short prio)
+{
+ const int base_slice = cfqd->cfqmd->cfq_slice;
+
+ WARN_ON(prio >= IOPRIO_BE_NR);
+
+ return base_slice + (base_slice/CFQ_CGROUP_SLICE_SCALE *
+ (CFQ_CGROUP_MAX_IOPRIO / 2 - prio));
+}
+
+static inline void
+cfq_cgroup_set_prio_slice(struct cfq_data *cfqd)
+{
+ cfqd->slice_end = cfq_cgroup_prio_slice(cfqd, cfqd->cfqc->ioprio) + jiffies;
+}
+
+static unsigned long cfq_cgroup_slice_offset(struct cfq_data *cfqd)
+{
+ return (cfqd->cfqmd->busy_data - 1) *
+ (cfq_cgroup_prio_slice(cfqd, 0) -
+ cfq_cgroup_prio_slice(cfqd, cfqd->cfqc->ioprio));
+}
+
+static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd,
+ int add_front)
+{
+ struct rb_node **p, *parent;
+ struct cfq_data *__cfqd;
+ struct cfq_meta_data *cfqmd = cfqd->cfqmd;
+ unsigned long rb_key;
+ int left;
+
+ if (!add_front) {
+ rb_key = cfq_cgroup_slice_offset(cfqd) + jiffies;
+ rb_key += cfqd->slice_resid;
+ cfqd->slice_resid = 0;
+ } else
+ rb_key = 0;
+
+ if (!RB_EMPTY_NODE(&cfqd->rb_node)) {
+ if (rb_key == cfqd->rb_key)
+ return;
+ cfq_rb_erase(&cfqd->rb_node, &cfqmd->service_tree);
+ }
+
+ left = 1;
+ parent = NULL;
+ p = &cfqmd->service_tree.rb.rb_node;
+ while (*p) {
+ struct rb_node **n;
+
+ parent = *p;
+ __cfqd = rb_entry(parent, struct cfq_data, rb_node);
+
+ if (rb_key < __cfqd->rb_key)
+ n = &(*p)->rb_left;
+ else
+ n = &(*p)->rb_right;
+
+ if (n == &(*p)->rb_right)
+ left = 0;
+
+ p = n;
+ }
+
+ if (left)
+ cfqmd->service_tree.left = &cfqd->rb_node;
+
+ cfqd->rb_key = rb_key;
+ rb_link_node(&cfqd->rb_node, parent, p);
+ rb_insert_color(&cfqd->rb_node, &cfqmd->service_tree.rb);
+}
+
+
+static void
+__cfq_cgroup_slice_expired(struct cfq_meta_data *cfqmd,struct cfq_data *cfqd,
+ int timed_out)
+{
+ if (timed_out && !cfq_cfqd_slice_new(cfqd))
+ cfqd->slice_resid = cfqd->slice_end - jiffies;
+
+ if (cfq_cfqd_on_rr(cfqd)) {
+ cfq_cgroup_service_tree_add(cfqd,0);
+ }
+
+ if (cfqd == cfqmd->active_data) {
+ cfqmd->active_data = NULL;
+ }
+}
+
+static inline void
+cfq_cgroup_slice_expired(struct cfq_meta_data *cfqmd, int timed_out)
+{
+ struct cfq_data *cfqd = cfqmd->active_data;
+
+ if (cfqd) {
+ cfq_slice_expired(cfqd, 1);
+ __cfq_cgroup_slice_expired(cfqmd, cfqd, timed_out);
+ }
+}
+
+static struct cfq_data *cfq_cgroup_rb_first(struct cfq_rb_root *root)
+{
+ if (!root->left)
+ root->left = rb_first(&root->rb);
+
+ if (root->left)
+ return rb_entry(root->left, struct cfq_data, rb_node);
+
+ return NULL;
+}
+
+static struct cfq_data *cfq_cgroup_get_next_data(struct cfq_meta_data *cfqmd)
+{
+ if (RB_EMPTY_ROOT(&cfqmd->service_tree.rb))
+ return NULL;
+
+ return cfq_cgroup_rb_first(&cfqmd->service_tree);
+}
+
+static void
+__cfq_cgroup_set_active_data(struct cfq_meta_data *cfqmd,
+ struct cfq_data *cfqd)
+{
+ if (cfqd) {
+ cfqd->slice_end = 0;
+ cfq_mark_cfqd_slice_new(cfqd);
+ }
+
+ cfqmd->active_data = cfqd;
+}
+
+static struct cfq_data *cfq_cgroup_set_active_data(struct cfq_meta_data *cfqmd)
+{
+ struct cfq_data *cfqd;
+
+ cfqd = cfq_cgroup_get_next_data(cfqmd);
+ __cfq_cgroup_set_active_data(cfqmd , cfqd);
+
+ return cfqd;
+}
+
+struct cfq_data *cfq_cgroup_select_data(struct cfq_meta_data *cfqmd)
+{
+ struct cfq_data *cfqd;
+
+ cfqd = cfqmd->active_data;
+ if (!cfqd)
+ goto new_data;
+
+ if (cfq_cgroup_slice_used(cfqd))
+ goto expire;
+
+ if (!RB_EMPTY_ROOT(&cfqd->service_tree.rb))
+ goto keep_data;
+
+ if (wait_request_checker(cfqd))
+ goto keep_data;
+
+expire:
+ cfq_cgroup_slice_expired(cfqmd, 0);
+new_data:
+ cfqd = cfq_cgroup_set_active_data(cfqmd);
+keep_data:
+ return cfqd;
+}
+
+int cfq_cgroup_forced_dispatch(struct cfq_data *cfqd)
+{
+ struct cfq_meta_data *cfqmd = cfqd->cfqmd;
+ int dispatched = 0;
+
+ while ((cfqd = cfq_cgroup_rb_first(&cfqmd->service_tree)) != NULL)
+ dispatched += cfq_forced_dispatch(cfqd);
+
+ cfq_cgroup_slice_expired(cfqmd, 0);
+
+ BUG_ON(cfqmd->busy_data);
+
+ return dispatched;
+}
+
+int cfq_cgroup_dispatch_requests(struct cfq_data *cfqd, int force)
+{
+ struct cfq_meta_data *cfqmd = cfqd->cfqmd;
+ int dispatched;
+
+
+ if (!cfqmd->busy_data)
+ return 0;
+
+ if (unlikely(force))
+ return cfq_cgroup_forced_dispatch(cfqd);
+
+ dispatched = 0;
+ cfqd = cfq_cgroup_select_data(cfqmd);
+
+ if (cfqd)
+ dispatched = cfq_queue_dispatch_requests(cfqd, force);
+
+ return dispatched;
+}
+
+/*
+ * Timer running if the active_queue is currently idling inside its time slice
+ */
+static void cfq_cgroup_idle_slice_timer(unsigned long data)
+{
+ struct cfq_meta_data *cfqmd = (struct cfq_meta_data *) data;
+ struct cfq_data *cfqd = cfqmd->elv_data;
+ int timed_out = 1;
+ unsigned long flags;
+
+
+ spin_lock_irqsave(CFQ_DRV_UNIQ_DATA(cfqd).queue->queue_lock, flags);
+
+ cfqd = cfqmd->active_data;
+ if (cfqd) {
+ timed_out = 0;
+
+ if (cfq_cgroup_slice_used(cfqd))
+ goto expire_cgroup;
+
+ if (!cfqmd->busy_data)
+ goto out_cont;
+
+ if (__cfq_idle_slice_timer(cfqd))
+ goto out_cont;
+ else
+ goto out_kick;
+
+ }
+expire_cgroup:
+ cfq_cgroup_slice_expired(cfqmd, timed_out);
+out_kick:
+ cfq_schedule_dispatch(cfqmd->elv_data);
+out_cont:
+ spin_unlock_irqrestore(CFQ_DRV_UNIQ_DATA(cfqd).queue->queue_lock, flags);
+}
+
+int cfq_cgroup_completed_request_after(struct cfq_data *cfqd)
+{
+ if (cfqd->cfqmd->active_data == cfqd) {
+ if (cfq_cfqd_slice_new(cfqd)) {
+ cfq_cgroup_set_prio_slice(cfqd);
+ cfq_clear_cfqd_slice_new(cfqd);
+
+ }
+ if (cfq_cgroup_slice_used(cfqd)) {
+ cfq_cgroup_slice_expired(cfqd->cfqmd, 1);
+ return 0;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+static int cfq_cgroup_queue_empty(struct cfq_data *cfqd)
+{
+ return !cfqd->cfqmd->busy_data;
+}
+
+static int cfq_cgroup_active_data_check(struct cfq_data *cfqd)
+{
+ return (cfqd->cfqmd->active_data == cfqd);
+}
+
struct cfq_ops opt
...