Here is the patch of dm-ioband.
Based on 2.6.26-rc5-mm3
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
diff -uprN linux-2.6.26-rc5-mm3.orig/drivers/md/Kconfig linux-2.6.26-rc5-mm3/drivers/md/Kconfig
--- linux-2.6.26-rc5-mm3.orig/drivers/md/Kconfig 2008-06-25 15:58:50.000000000 +0900
+++ linux-2.6.26-rc5-mm3/drivers/md/Kconfig 2008-07-04 12:21:10.000000000 +0900
@@ -271,4 +271,17 @@ config DM_UEVENT
---help---
Generate udev events for DM events.
+config DM_IOBAND
+ tristate "I/O bandwidth control (EXPERIMENTAL)"
+ depends on BLK_DEV_DM && EXPERIMENTAL
+ ---help---
+ This device-mapper target allows to define how the
+ available bandwidth of a storage device should be
+ shared between processes, cgroups, the partitions or the LUNs.
+
+ Information on how to use dm-ioband is available in:
+ <file:Documentation/device-mapper/ioband.txt>.
+
+ If unsure, say N.
+
endif # MD
diff -uprN linux-2.6.26-rc5-mm3.orig/drivers/md/Makefile linux-2.6.26-rc5-mm3/drivers/md/Makefile
--- linux-2.6.26-rc5-mm3.orig/drivers/md/Makefile 2008-06-25 15:58:50.000000000 +0900
+++ linux-2.6.26-rc5-mm3/drivers/md/Makefile 2008-07-04 12:21:10.000000000 +0900
@@ -7,6 +7,7 @@ dm-mod-objs := dm.o dm-table.o dm-target
dm-multipath-objs := dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o
dm-mirror-objs := dm-raid1.o
+dm-ioband-objs := dm-ioband-ctl.o dm-ioband-policy.o dm-ioband-type.o
md-mod-objs := md.o bitmap.o
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
@@ -36,6 +37,7 @@ obj-$(CONFIG_DM_MULTIPATH) += dm-multipa
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
+obj-$(CONFIG_DM_IOBAND) += dm-ioband.o
quiet_cmd_unroll = UNROLL $@
cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
diff -uprN linux-2.6.26-rc5-mm3.orig/drivers/md/dm-ioband-ctl.c linux-2.6.26-rc5-mm3/drivers/md/dm-ioband-ctl.c
--- linux-2.6.26-rc5-mm3.orig/drivers/md/dm-ioband-ctl.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.26-rc5-mm3/drivers/md/dm-ioband-ctl.c 2008-07-04 12:21:10.000000000 +0900
@@ -0,0 +1,1315 @@
+/*
+ * Copyright (C) 2008 VA Linux Systems Japan K.K.
+ * Authors: Hirokazu Takahashi <taka@valinux.co.jp>
+ * Ryo Tsuruta <ryov@valinux.co.jp>
+ *
+ * I/O bandwidth control
+ *
+ * This file is released under the GPL.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/raid/md.h>
+#include <linux/rbtree.h>
+#include "dm.h"
+#include "dm-bio-list.h"
+#include "dm-ioband.h"
+
+#define DM_MSG_PREFIX "ioband"
+#define POLICY_PARAM_START 6
+#define POLICY_PARAM_DELIM "=:,"
+
+static LIST_HEAD(ioband_device_list);
+/* to protect ioband_device_list */
+static DEFINE_SPINLOCK(ioband_devicelist_lock);
+
+static void suspend_ioband_device(struct ioband_device *, unsigned long, int);
+static void resume_ioband_device(struct ioband_device *);
+static void ioband_conduct(struct work_struct *);
+static void ioband_hold_bio(struct ioband_group *, struct bio *);
+static struct bio *ioband_pop_bio(struct ioband_group *);
+static int ioband_set_param(struct ioband_group *, char *, char *);
+static int ioband_group_attach(struct ioband_group *, int, char *);
+static int ioband_group_type_select(struct ioband_group *, char *);
+
+long ioband_debug; /* just for debugging */
+
+static void do_nothing(void) {}
+
+static int policy_init(struct ioband_device *dp, char *name,
+ int argc, char **argv)
+{
+ struct policy_type *p;
+ struct ioband_group *gp;
+ unsigned long flags;
+ int r;
+
+ for (p = dm_ioband_policy_type; p->p_name; p++) {
+ if (!strcmp(name, p->p_name))
+ break;
+ }
+ if (!p->p_name)
+ return -EINVAL;
+
+ spin_lock_irqsave(&dp->g_lock, flags);
+ if (dp->g_policy == p) {
+ /* do nothing if the same policy is already set */
+ spin_unlock_irqrestore(&dp->g_lock, flags);
+ return 0;
+ }
+
+ suspend_ioband_device(dp, flags, 1);
+ list_for_each_entry(gp, &dp->g_groups, c_list)
+ dp->g_group_dtr(gp);
+
+ /* switch to the new policy */
+ dp->g_policy = p;
+ r = p->p_policy_init(dp, argc, argv);
+ if (!dp->g_hold_bio)
+ dp->g_hold_bio = ioband_hold_bio;
+ if (!dp->g_pop_bio)
+ dp->g_pop_bio = ioband_pop_bio;
+
+ list_for_each_entry(gp, &dp->g_groups, c_list)
+ dp->g_group_ctr(gp, NULL);
+ resume_ioband_device(dp);
+ spin_unlock_irqrestore(&dp->g_lock, flags);
+ return r;
+}
+
+static struct ioband_device *alloc_ioband_device(char *name,
+ int io_throttle, int io_limit)
+
+{
+ struct ioband_device *dp, *new;
+ unsigned long flags;
+
+ new = kzalloc(sizeof(struct ioband_device), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ spin_lock_irqsave(&ioband_devicelist_lock, flags);
+ list_for_each_entry(dp, &ioband_device_list, g_list) {
+ if (!strcmp(dp->g_name, name)) {
+ dp->g_ref++;
+ spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+ kfree(new);
+ return dp;
+ }
+ }
+
+ /*
+ * Prepare its own workqueue as generic_make_request() may
+ * potentially block the workqueue when submitting BIOs.
+ */
+ new->g_ioband_wq = create_workqueue("kioband");
+ if (!new->g_ioband_wq) {
+ spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+ kfree(new);
+ return NULL;
+ }
+
+ INIT_WORK(&new->g_conductor, ioband_conduct);
+ INIT_LIST_HEAD(&new->g_groups);
+ INIT_LIST_HEAD(&new->g_list);
+ spin_lock_init(&new->g_lock);
+ mutex_init(&new->g_lock_device);
+ bio_list_init(&new->g_urgent_bios);
+ new->g_io_throttle = io_throttle;
+ new->g_io_limit[0] = io_limit;
+ new->g_io_limit[1] = io_limit;
+ new->g_issued[0] = 0;
+ new->g_issued[1] = 0;
+ new->g_blocked = 0;
+ new->g_ref = 1;
+ new->g_flags = 0;
+ strlcpy(new->g_name, name, sizeof(new->g_name));
+ new->g_policy = NULL;
+ new->g_hold_bio = NULL;
+ new->g_pop_bio = NULL;
+ init_waitqueue_head(&new->g_waitq);
+ init_waitqueue_head(&new->g_waitq_suspend);
+ init_waitqueue_head(&new->g_waitq_flush);
+ list_add_tail(&new->g_list, &ioband_device_list);
+
+ spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+ return new;
+}
+
+static void release_ioband_device(struct ioband_device *dp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioband_devicelist_lock, flags);
+ dp->g_ref--;
+ if (dp->g_ref > 0) {
+ spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+ return;
+ }
+ list_del(&dp->g_list);
+ spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+ destroy_workqueue(dp->g_ioband_wq);
+ kfree(dp);
+}
+
+static int is_ioband_device_flushed(struct ioband_device *dp,
+ int wait_completion)
+{
+ struct ioband_group *gp;
+
+ if (wait_completion && dp->g_issued[0] + dp->g_issued[1] > 0)
+ return 0;
+ if (dp->g_blocked || waitqueue_active(&dp->g_waitq))
+ return 0;
+ list_for_each_entry(gp, &dp->g_groups, c_list)
+ if (waitqueue_active(&gp->c_waitq))
+ return 0;
+ return 1;
+}
+
+static void suspend_ioband_device(struct ioband_device *dp,
+ unsigned long flags, int wait_completion)
+{
+ struct ioband_group *gp;
+
+ /* block incoming bios */
+ set_device_suspended(dp);
+
+ /* wake up all blocked processes and go down all ioband groups */
+ wake_up_all(&dp->g_waitq);
+ list_for_each_entry(gp, &dp->g_groups, c_list) {
+ if (!is_group_down(gp)) {
+ set_group_down(gp);
+ set_group_need_up(gp);
+ }
+ wake_up_all(&gp->c_waitq);
+ }
+
+ /* flush the already mapped bios */
+ spin_unlock_irqrestore(&dp->g_lock, flags);
+ queue_work(dp->g_ioband_wq, &dp->g_conductor);
+ flush_workqueue(dp->g_ioband_wq);
+
+ /* wait for all processes to wake up and bios to release */
+ spin_lock_irqsave(&dp->g_lock, flags);
+ wait_event_lock_irq(dp->g_waitq_flush,
+ is_ioband_device_flushed(dp, wait_completion),
+ dp->g_lock, do_nothing());
+}
+
+static void resume_ioband_device(struct ioband_device *dp)
+{
+ struct ioband_group *gp;
+
+ /* go up ioband groups */
+ list_for_each_entry(gp, &dp->g_groups, c_list) {
+ if (group_need_up(gp)) {
+ clear_group_need_up(gp);
+ clear_group_down(gp);
+ }
+ }
+
+ /* accept incoming bios */
+ wake_up_all(&dp->g_waitq_suspend);
+ clear_device_suspended(dp);
+}
+
+static struct ioband_group *ioband_group_find(
+ struct ioband_group *head, int id)
+{
+ struct rb_node *node = head->c_group_root.rb_node;
+
+ while (node) {
+ struct ioband_group *p =
+ container_of(node, struct ioband_group, c_group_node);
+
+ if (p->c_id == id || id == IOBAND_ID_ANY)
+ return p;
+ node = (id < p->c_id) ? node->rb_left : node->rb_right;
+ }
+ return NULL;
+}
+
+static void ioband_group_add_node(struct rb_root *root,
+ struct ioband_group *gp)
+{
+ struct rb_node **new = &root->rb_node, *parent = NULL;
+ struct ioband_group *p;
+
+ while (*new) {
+ p = container_of(*new, struct ioband_group, c_group_node);
+ parent = *new;
+ new = (gp->c_id < p->c_id) ?
+ &(*new)->rb_left : &(*new)->rb_right;
+ }
+
+ rb_link_node(&gp->c_group_node, parent, new);
+ rb_insert_color(&gp->c_group_node, root);
+}
+
+static int ioband_group_init(struct ioband_group *gp,
+ struct ioband_group *head, struct ioband_device *dp, int id, char *param)
+{
+ unsigned long flags;
+ int r;
+
+ INIT_LIST_HEAD(&gp->c_list);
+ bio_list_init(&gp->c_blocked_bios);
+ bio_list_init(&gp->c_prio_bios);
+ gp->c_id = id; /* should be verified */
+
...