OpenVZ Forum


Home » Mailing lists » Devel » [PATCH 0/2] dm-ioband: I/O bandwidth controller v1.2.0: Introduction
[PATCH 0/2] dm-ioband: I/O bandwidth controller v1.2.0: Introduction [message #31597] Fri, 04 July 2008 03:40 Go to next message
Ryo Tsuruta is currently offline  Ryo Tsuruta
Messages: 35
Registered: January 2008
Member
Hi everyone,

This is the dm-ioband version 1.2.0 release.

Dm-ioband is an I/O bandwidth controller implemented as a device-mapper
driver, which gives specified bandwidth to each job running on the same
physical device.

- Can be applied to the kernel 2.6.26-rc5-mm3.
- Changes from 1.1.0 (posted on June 2, 2008):
  - Dynamic policy switching
    A user can change the bandwidth control policy while the dm-ioband
    device is active. It is useful when the dm-ioband device is used
    as a root device.
  - I/O smoothing take #1
    This feature makes I/O requests of each group issued smoothly.
    Once a certain group has used up its tokens, all I/O requests to
    the group will be blocked until all the other groups used up theirs.
    This feature is to minimize this blocking time.
    We are now testing on the 2nd step and we will release it soon,
    which will improve the smoothness significantly.
  - Replace simple_strtol() with strict_strtol().
  - Fix I/O errors encountered after "dmsetup resume."

Thanks,
Ryo Tsuruta
Linux Block I/O Bandwidth Control Project
http://people.valinux.co.jp/~ryov/bwctl/
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
[PATCH 1/2] dm-ioband: I/O bandwidth controller v1.2.0: Source code and patch [message #31598 is a reply to message #31597] Fri, 04 July 2008 03:41 Go to previous messageGo to next message
Ryo Tsuruta is currently offline  Ryo Tsuruta
Messages: 35
Registered: January 2008
Member
Here is the patch of dm-ioband.

Based on 2.6.26-rc5-mm3
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -uprN linux-2.6.26-rc5-mm3.orig/drivers/md/Kconfig linux-2.6.26-rc5-mm3/drivers/md/Kconfig
--- linux-2.6.26-rc5-mm3.orig/drivers/md/Kconfig	2008-06-25 15:58:50.000000000 +0900
+++ linux-2.6.26-rc5-mm3/drivers/md/Kconfig	2008-07-04 12:21:10.000000000 +0900
@@ -271,4 +271,17 @@ config DM_UEVENT
 	---help---
 	Generate udev events for DM events.
 
+config DM_IOBAND
+	tristate "I/O bandwidth control (EXPERIMENTAL)"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	---help---
+	This device-mapper target allows to define how the
+	available bandwidth of a storage device should be
+	shared between processes, cgroups, the partitions or the LUNs.
+
+	Information on how to use dm-ioband is available in:
+	   <file:Documentation/device-mapper/ioband.txt>.
+
+	If unsure, say N.
+
 endif # MD
diff -uprN linux-2.6.26-rc5-mm3.orig/drivers/md/Makefile linux-2.6.26-rc5-mm3/drivers/md/Makefile
--- linux-2.6.26-rc5-mm3.orig/drivers/md/Makefile	2008-06-25 15:58:50.000000000 +0900
+++ linux-2.6.26-rc5-mm3/drivers/md/Makefile	2008-07-04 12:21:10.000000000 +0900
@@ -7,6 +7,7 @@ dm-mod-objs	:= dm.o dm-table.o dm-target
 dm-multipath-objs := dm-path-selector.o dm-mpath.o
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-raid1.o
+dm-ioband-objs	:= dm-ioband-ctl.o dm-ioband-policy.o dm-ioband-type.o
 md-mod-objs     := md.o bitmap.o
 raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
@@ -36,6 +37,7 @@ obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipa
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o dm-log.o
 obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
+obj-$(CONFIG_DM_IOBAND)		+= dm-ioband.o
 
 quiet_cmd_unroll = UNROLL  $@
       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
diff -uprN linux-2.6.26-rc5-mm3.orig/drivers/md/dm-ioband-ctl.c linux-2.6.26-rc5-mm3/drivers/md/dm-ioband-ctl.c
--- linux-2.6.26-rc5-mm3.orig/drivers/md/dm-ioband-ctl.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.26-rc5-mm3/drivers/md/dm-ioband-ctl.c	2008-07-04 12:21:10.000000000 +0900
@@ -0,0 +1,1315 @@
+/*
+ * Copyright (C) 2008 VA Linux Systems Japan K.K.
+ * Authors: Hirokazu Takahashi <taka@valinux.co.jp>
+ *          Ryo Tsuruta <ryov@valinux.co.jp>
+ *
+ *  I/O bandwidth control
+ *
+ * This file is released under the GPL.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/raid/md.h>
+#include <linux/rbtree.h>
+#include "dm.h"
+#include "dm-bio-list.h"
+#include "dm-ioband.h"
+
+#define DM_MSG_PREFIX "ioband"
+#define POLICY_PARAM_START 6
+#define POLICY_PARAM_DELIM "=:,"
+
+static LIST_HEAD(ioband_device_list);
+/* to protect ioband_device_list */
+static DEFINE_SPINLOCK(ioband_devicelist_lock);
+
+static void suspend_ioband_device(struct ioband_device *, unsigned long, int);
+static void resume_ioband_device(struct ioband_device *);
+static void ioband_conduct(struct work_struct *);
+static void ioband_hold_bio(struct ioband_group *, struct bio *);
+static struct bio *ioband_pop_bio(struct ioband_group *);
+static int ioband_set_param(struct ioband_group *, char *, char *);
+static int ioband_group_attach(struct ioband_group *, int, char *);
+static int ioband_group_type_select(struct ioband_group *, char *);
+
+long ioband_debug;	/* just for debugging */
+
+static void do_nothing(void) {}
+
+static int policy_init(struct ioband_device *dp, char *name,
+						int argc, char **argv)
+{
+	struct policy_type *p;
+	struct ioband_group *gp;
+	unsigned long flags;
+	int r;
+
+	for (p = dm_ioband_policy_type; p->p_name; p++) {
+		if (!strcmp(name, p->p_name))
+			break;
+	}
+	if (!p->p_name)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dp->g_lock, flags);
+	if (dp->g_policy == p) {
+		/* do nothing if the same policy is already set */
+		spin_unlock_irqrestore(&dp->g_lock, flags);
+		return 0;
+	}
+
+	suspend_ioband_device(dp, flags, 1);
+	list_for_each_entry(gp, &dp->g_groups, c_list)
+		dp->g_group_dtr(gp);
+
+	/* switch to the new policy */
+	dp->g_policy = p;
+	r = p->p_policy_init(dp, argc, argv);
+	if (!dp->g_hold_bio)
+		dp->g_hold_bio = ioband_hold_bio;
+	if (!dp->g_pop_bio)
+		dp->g_pop_bio = ioband_pop_bio;
+
+	list_for_each_entry(gp, &dp->g_groups, c_list)
+		dp->g_group_ctr(gp, NULL);
+	resume_ioband_device(dp);
+	spin_unlock_irqrestore(&dp->g_lock, flags);
+	return r;
+}
+
+static struct ioband_device *alloc_ioband_device(char *name,
+					int io_throttle, int io_limit)
+
+{
+	struct ioband_device *dp, *new;
+	unsigned long flags;
+
+	new = kzalloc(sizeof(struct ioband_device), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	spin_lock_irqsave(&ioband_devicelist_lock, flags);
+	list_for_each_entry(dp, &ioband_device_list, g_list) {
+		if (!strcmp(dp->g_name, name)) {
+			dp->g_ref++;
+			spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+			kfree(new);
+			return dp;
+		}
+	}
+
+	/*
+	 * Prepare its own workqueue as generic_make_request() may
+	 * potentially block the workqueue when submitting BIOs.
+	 */
+	new->g_ioband_wq = create_workqueue("kioband");
+	if (!new->g_ioband_wq) {
+		spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+		kfree(new);
+		return NULL;
+	}
+
+	INIT_WORK(&new->g_conductor, ioband_conduct);
+	INIT_LIST_HEAD(&new->g_groups);
+	INIT_LIST_HEAD(&new->g_list);
+	spin_lock_init(&new->g_lock);
+	mutex_init(&new->g_lock_device);
+	bio_list_init(&new->g_urgent_bios);
+	new->g_io_throttle = io_throttle;
+	new->g_io_limit[0] = io_limit;
+	new->g_io_limit[1] = io_limit;
+	new->g_issued[0] = 0;
+	new->g_issued[1] = 0;
+	new->g_blocked = 0;
+	new->g_ref = 1;
+	new->g_flags = 0;
+	strlcpy(new->g_name, name, sizeof(new->g_name));
+	new->g_policy = NULL;
+	new->g_hold_bio = NULL;
+	new->g_pop_bio = NULL;
+	init_waitqueue_head(&new->g_waitq);
+	init_waitqueue_head(&new->g_waitq_suspend);
+	init_waitqueue_head(&new->g_waitq_flush);
+	list_add_tail(&new->g_list, &ioband_device_list);
+
+	spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+	return new;
+}
+
+static void release_ioband_device(struct ioband_device *dp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioband_devicelist_lock, flags);
+	dp->g_ref--;
+	if (dp->g_ref > 0) {
+		spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+		return;
+	}
+	list_del(&dp->g_list);
+	spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+	destroy_workqueue(dp->g_ioband_wq);
+	kfree(dp);
+}
+
+static int is_ioband_device_flushed(struct ioband_device *dp,
+						int wait_completion)
+{
+	struct ioband_group *gp;
+
+	if (wait_completion && dp->g_issued[0] + dp->g_issued[1] > 0)
+		return 0;
+	if (dp->g_blocked || waitqueue_active(&dp->g_waitq))
+		return 0;
+	list_for_each_entry(gp, &dp->g_groups, c_list)
+		if (waitqueue_active(&gp->c_waitq))
+			return 0;
+	return 1;
+}
+
+static void suspend_ioband_device(struct ioband_device *dp,
+				unsigned long flags, int wait_completion)
+{
+	struct ioband_group *gp;
+
+	/* block incoming bios */
+	set_device_suspended(dp);
+
+	/* wake up all blocked processes and go down all ioband groups */
+	wake_up_all(&dp->g_waitq);
+	list_for_each_entry(gp, &dp->g_groups, c_list) {
+		if (!is_group_down(gp)) {
+			set_group_down(gp);
+			set_group_need_up(gp);
+		}
+		wake_up_all(&gp->c_waitq);
+	}
+
+	/* flush the already mapped bios */
+	spin_unlock_irqrestore(&dp->g_lock, flags);
+	queue_work(dp->g_ioband_wq, &dp->g_conductor);
+	flush_workqueue(dp->g_ioband_wq);
+
+	/* wait for all processes to wake up and bios to release */
+	spin_lock_irqsave(&dp->g_lock, flags);
+	wait_event_lock_irq(dp->g_waitq_flush,
+			is_ioband_device_flushed(dp, wait_completion),
+			dp->g_lock, do_nothing());
+}
+
+static void resume_ioband_device(struct ioband_device *dp)
+{
+	struct ioband_group *gp;
+
+	/* go up ioband groups */
+	list_for_each_entry(gp, &dp->g_groups, c_list) {
+		if (group_need_up(gp)) {
+			clear_group_need_up(gp);
+			clear_group_down(gp);
+		}
+	}
+
+	/* accept incoming bios */
+	wake_up_all(&dp->g_waitq_suspend);
+	clear_device_suspended(dp);
+}
+
+static struct ioband_group *ioband_group_find(
+					struct ioband_group *head, int id)
+{
+	struct rb_node *node = head->c_group_root.rb_node;
+
+	while (node) {
+		struct ioband_group *p =
+			container_of(node, struct ioband_group, c_group_node);
+
+		if (p->c_id == id || id == IOBAND_ID_ANY)
+			return p;
+		node = (id < p->c_id) ? node->rb_left : node->rb_right;
+	}
+	return NULL;
+}
+
+static void ioband_group_add_node(struct rb_root *root,
+						struct ioband_group *gp)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+	struct ioband_group *p;
+
+	while (*new) {
+		p = container_of(*new, struct ioband_group, c_group_node);
+		parent = *new;
+		new = (gp->c_id < p->c_id) ?
+					&(*new)->rb_left : &(*new)->rb_right;
+	}
+
+	rb_link_node(&gp->c_group_node, parent, new);
+	rb_insert_color(&gp->c_group_node, root);
+}
+
+static int ioband_group_init(struct ioband_group *gp,
+    struct ioband_group *head, struct ioband_device *dp, int id, char *param)
+{
+	unsigned long flags;
+	int r;
+
+	INIT_LIST_HEAD(&gp->c_list);
+	bio_list_init(&gp->c_blocked_bios);
+	bio_list_init(&gp->c_prio_bios);
+	gp->c_id = id;	/* should be verified */
+	
...

[PATCH 2/2] dm-ioband: I/O bandwidth controller v1.2.0: Document [message #31599 is a reply to message #31598] Fri, 04 July 2008 03:42 Go to previous message
Ryo Tsuruta is currently offline  Ryo Tsuruta
Messages: 35
Registered: January 2008
Member
Here is the document of dm-ioband.

Based on 2.6.26-rc5-mm3
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -uprN linux-2.6.26-rc5-mm3.orig/Documentation/device-mapper/ioband.txt linux-2.6.26-rc5-mm3/Documentation/device-mapper/ioband.txt
--- linux-2.6.26-rc5-mm3.orig/Documentation/device-mapper/ioband.txt	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.26-rc5-mm3/Documentation/device-mapper/ioband.txt	2008-07-04 12:21:10.000000000 +0900
@@ -0,0 +1,933 @@
+                     Block I/O bandwidth control: dm-ioband
+
+            -------------------------------------------------------
+
+   Table of Contents
+
+   [1]What's dm-ioband all about?
+
+   [2]Differences from the CFQ I/O scheduler
+
+   [3]How dm-ioband works.
+
+   [4]Setup and Installation
+
+   [5]Getting started
+
+   [6]Command Reference
+
+   [7]Examples
+
+What's dm-ioband all about?
+
+     dm-ioband is an I/O bandwidth controller implemented as a device-mapper
+   driver. Several jobs using the same physical device have to share the
+   bandwidth of the device. dm-ioband gives bandwidth to each job according
+   to its weight, which each job can set its own value to.
+
+     At this time, a job is a group of processes with the same pid or pgrp or
+   uid. There is also a plan to make it support cgroup. A job can also be a
+   virtual machine such as KVM or Xen.
+
+     +------+ +------+ +------+   +------+ +------+ +------+
+     |cgroup| |cgroup| | the  |   | pid  | | pid  | | the  |  jobs
+     |  A   | |  B   | |others|   |  X   | |  Y   | |others|
+     +--|---+ +--|---+ +--|---+   +--|---+ +--|---+ +--|---+
+     +--V----+---V---+----V---+   +--V----+---V---+----V---+
+     | group | group | default|   | group | group | default|  ioband groups
+     |       |       |  group |   |       |       |  group |
+     +-------+-------+--------+   +-------+-------+--------+
+     |        ioband1         |   |       ioband2          |  ioband devices
+     +-----------|------------+   +-----------|------------+
+     +-----------V--------------+-------------V------------+
+     |                          |                          |
+     |          sdb1            |           sdb2           |  physical devices
+     +--------------------------+--------------------------+
+
+
+   --------------------------------------------------------------------------
+
+Differences from the CFQ I/O scheduler
+
+     Dm-ioband is flexible to configure the bandwidth settings.
+
+     Dm-ioband can work with any type of I/O scheduler such as the NOOP
+   scheduler, which is often chosen for high-end storages, since it is
+   implemented outside the I/O scheduling layer. It allows both of partition
+   based bandwidth control and job --- a group of processes --- based
+   control. In addition, it can set different configuration on each physical
+   device to control its bandwidth.
+
+     Meanwhile the current implementation of the CFQ scheduler has 8 IO
+   priority levels and all jobs whose processes have the same IO priority
+   share the bandwidth assigned to this level between them. And IO priority
+   is an attribute of a process so that it equally effects to all block
+   devices.
+
+   --------------------------------------------------------------------------
+
+How dm-ioband works.
+
+     Every ioband device has one ioband group, which by default is called the
+   default group.
+
+     Ioband devices can also have extra ioband groups in them. Each ioband
+   group has a job to support and a weight. Proportional to the weight,
+   dm-ioband gives tokens to the group.
+
+     A group passes on I/O requests that its job issues to the underlying
+   layer so long as it has tokens left, while requests are blocked if there
+   aren't any tokens left in the group. Tokens are refilled once all of
+   groups that have requests on a given physical device use up their tokens.
+
+     There are two policies for token consumption. One is that a token is
+   consumed for each I/O request. The other is that a token is consumed for
+   each I/O sector, for example, one I/O request which consists of
+   4Kbytes(512bytes * 8 sectors) read consumes 8 tokens. A user can choose
+   either policy.
+
+     With this approach, a job running on an ioband group with large weight
+   is guaranteed a wide I/O bandwidth.
+
+   --------------------------------------------------------------------------
+
+Setup and Installation
+
+     Build a kernel with these options enabled:
+
+     CONFIG_MD
+     CONFIG_BLK_DEV_DM
+     CONFIG_DM_IOBAND
+
+
+     If compiled as module, use modprobe to load dm-ioband.
+
+     # make modules
+     # make modules_install
+     # depmod -a
+     # modprobe dm-ioband
+
+
+     "dmsetup targets" command shows all available device-mapper targets.
+   "ioband" is displayed if dm-ioband has been loaded.
+
+     # dmsetup targets
+     ioband           v1.2.0
+
+
+   --------------------------------------------------------------------------
+
+Getting started
+
+     The following is a brief description how to control the I/O bandwidth of
+   disks. In this description, we'll take one disk with two partitions as an
+   example target.
+
+   --------------------------------------------------------------------------
+
+  Create and map ioband devices
+
+     Create two ioband devices "ioband1" and "ioband2". "ioband1" is mapped
+   to "/dev/sda1" and has a weight of 40. "ioband2" is mapped to "/dev/sda2"
+   and has a weight of 10. "ioband1" can use 80% --- 40/(40+10)*100 --- of
+   the bandwidth of the physical disk "/dev/sda" while "ioband2" can use 20%.
+
+    # echo "0 $(blockdev --getsize /dev/sda1) ioband /dev/sda1 1 0 0 none" \
+        "weight 0 :40" | dmsetup create ioband1
+    # echo "0 $(blockdev --getsize /dev/sda2) ioband /dev/sda2 1 0 0 none" \
+        "weight 0 :10" | dmsetup create ioband2
+
+
+     If the commands are successful then the device files
+   "/dev/mapper/ioband1" and "/dev/mapper/ioband2" will have been created.
+
+   --------------------------------------------------------------------------
+
+  Additional bandwidth control
+
+     In this example two extra ioband groups are created on "ioband1". The
+   first group consists of all the processes with user-id 1000 and the second
+   group consists of all the processes with user-id 2000. Their weights are
+   30 and 20 respectively.
+
+    # dmsetup message ioband1 0 type user
+    # dmsetup message ioband1 0 attach 1000
+    # dmsetup message ioband1 0 attach 2000
+    # dmsetup message ioband1 0 weight 1000:30
+    # dmsetup message ioband1 0 weight 2000:20
+
+
+     Now the processes in the user-id 1000 group can use 30% ---
+   30/(30+20+40+10)*100 --- of the bandwidth of the physical disk.
+
+   Table 1. Weight assignments
+
+   +----------------------------------------------------------------+
+   | ioband device |          ioband group          | ioband weight |
+   |---------------+--------------------------------+---------------|
+   | ioband1       | user id 1000                   | 30            |
+   |---------------+--------------------------------+---------------|
+   | ioband1       | user id 2000                   | 20            |
+   |---------------+--------------------------------+---------------|
+   | ioband1       | default group(the other users) | 40            |
+   |---------------+--------------------------------+---------------|
+   | ioband2       | default group                  | 10            |
+   +----------------------------------------------------------------+
+
+   --------------------------------------------------------------------------
+
+  Remove the ioband devices
+
+     Remove the ioband devices when no longer used.
+
+     # dmsetup remove ioband1
+     # dmsetup remove ioband2
+
+
+   --------------------------------------------------------------------------
+
+Command Reference
+
+  Create an ioband device
+
+   SYNOPSIS
+
+           dmsetup create IOBAND_DEVICE
+
+   DESCRIPTION
+
+             Create an ioband device with the given name IOBAND_DEVICE.
+           Generally, dmsetup reads a table from standard input. Each line of
+           the table specifies a single target and is of the form:
+
+             start_sector num_sectors "ioband" device_file ioband_device_id \
+                 io_throttle io_limit ioband_group_type policy token_base \
+                 :weight [ioband_group_id:weight...]
+
+
+                start_sector, num_sectors
+
+                          The sector range of the underlying device where
+                        dm-ioband maps.
+
+                ioband
+
+                          Specify the string "ioband" as a target type.
+
+                device_file
+
+                          Underlying device name.
+
+                ioband_device_id
+
+                          The ID number for an ioband device. The same ID
+                        must be set among the ioband devices that share the
+                        same bandwidth, which means they work on the same
+                        physical disk.
+
+                io_throttle
+
+                          Dm-ioband starts to control the bandwidth when the
+                        number of BIOs in progress exceeds this value. If 0
+                        is specified, dm-ioband uses the default value.
+
+                io_limit
+
+                          Dm-ioband blocks all I/O requests for the
+                        IOBAND_DEVICE when the number of BIOs in progress
+                        exceeds this value. If 0 is specified, dm-ioband uses
+                        the default value.
+
+                ioband_group_type
+
+                       
...

Previous Topic: [PATCH 13/15] Revert "netns: Fix device renaming for sysfs"
Next Topic: [PATCH 04/15] sysfs: Implement __sysfs_get_dentry
Goto Forum:
  


Current Time: Thu Jan 26 22:08:03 GMT 2023

Total time taken to generate the page: 0.00792 seconds