OpenVZ Forum


Home » Mailing lists » Devel » [PATCH 0/2] dm-ioband: I/O bandwidth controller v1.0.0: Introduction
[PATCH 0/2] dm-ioband: I/O bandwidth controller v1.0.0: Introduction [message #30253] Mon, 19 May 2008 08:22 Go to next message
Ryo Tsuruta is currently offline  Ryo Tsuruta
Messages: 35
Registered: January 2008
Member
Hi everyone,

This is dm-ioband version 1.0.0 release.

Dm-ioband is an I/O bandwidth controller implemented as a device-mapper
driver, which gives specified bandwidth to each job running on the same
physical device.

- Can be applied to the kernel 2.6.26-rc2-mm1.
- Changes from 0.0.4 (24th April):
    - Performance tuning
      - A new bandwidth control policy is added. This policy controls
        bandwidth based on the number of I/O sectors instead of the
	number of I/O requests.
	I think it would be useful for some applications such as video
	streaming or when you want to use high-end storages.
       - Control read and write requests separately. So even if a
         write I/O queue in an elevator is full, read I/O requests
	 won't be blocked.
      - More accurate bandwidth control.
    - Code cleanups. Remove the "LINUX_VERSION_CODE" and "inline" stuff.
    - Fix panic on "dmsetup suspend."

For more details, please refer to:
http://people.valinux.co.jp/~ryov/dm-ioband/

Thanks,
Ryo Tsuruta
_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
[PATCH 1/2] dm-ioband: I/O bandwidth controller v1.0.0: Source code and patch [message #30254 is a reply to message #30253] Mon, 19 May 2008 08:23 Go to previous messageGo to next message
Ryo Tsuruta is currently offline  Ryo Tsuruta
Messages: 35
Registered: January 2008
Member
Here is the patch of dm-ioband.

Based on 2.6.26-rc2-mm1
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -uprN linux-2.6.26-rc2-mm1.orig/drivers/md/Kconfig linux-2.6.26-rc2-mm1/drivers/md/Kconfig
--- linux-2.6.26-rc2-mm1.orig/drivers/md/Kconfig	2008-05-16 16:20:23.000000000 +0900
+++ linux-2.6.26-rc2-mm1/drivers/md/Kconfig	2008-05-19 14:22:37.000000000 +0900
@@ -271,4 +271,17 @@ config DM_UEVENT
 	---help---
 	Generate udev events for DM events.
 
+config DM_IOBAND
+	tristate "I/O bandwidth control (EXPERIMENTAL)"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	---help---
+	This device-mapper target allows to define how the
+	available bandwith of a storage device should be
+	shared between processes, cgroups, the partitions or the LUNs.
+
+	Information on how to use dm-ioband is available in:
+	   <file:Documentation/device-mapper/ioband.txt>.
+
+	If unsure, say N.
+
 endif # MD
diff -uprN linux-2.6.26-rc2-mm1.orig/drivers/md/Makefile linux-2.6.26-rc2-mm1/drivers/md/Makefile
--- linux-2.6.26-rc2-mm1.orig/drivers/md/Makefile	2008-05-16 16:20:23.000000000 +0900
+++ linux-2.6.26-rc2-mm1/drivers/md/Makefile	2008-05-19 14:22:37.000000000 +0900
@@ -7,6 +7,7 @@ dm-mod-objs	:= dm.o dm-table.o dm-target
 dm-multipath-objs := dm-path-selector.o dm-mpath.o
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-raid1.o
+dm-ioband-objs	:= dm-ioband-ctl.o dm-ioband-policy.o dm-ioband-type.o
 md-mod-objs     := md.o bitmap.o
 raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
@@ -36,6 +37,7 @@ obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipa
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o dm-log.o
 obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
+obj-$(CONFIG_DM_IOBAND)		+= dm-ioband.o
 
 quiet_cmd_unroll = UNROLL  $@
       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
diff -uprN linux-2.6.26-rc2-mm1.orig/drivers/md/dm-ioband-ctl.c linux-2.6.26-rc2-mm1/drivers/md/dm-ioband-ctl.c
--- linux-2.6.26-rc2-mm1.orig/drivers/md/dm-ioband-ctl.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.26-rc2-mm1/drivers/md/dm-ioband-ctl.c	2008-05-19 14:22:37.000000000 +0900
@@ -0,0 +1,1108 @@
+/*
+ * Copyright (C) 2008 VA Linux Systems Japan K.K.
+ * Authors: Hirokazu Takahashi <taka@valinux.co.jp>
+ *          Ryo Tsuruta <ryov@valinux.co.jp>
+ *
+ *  I/O bandwidth control
+ *
+ * This file is released under the GPL.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/raid/md.h>
+#include "dm.h"
+#include "dm-bio-list.h"
+#include "dm-ioband.h"
+
+#define DM_MSG_PREFIX "ioband"
+#define POLICY_PARAM_START 6
+#define POLICY_PARAM_DELIM "=:,"
+
+static LIST_HEAD(ioband_device_list);
+/* to protect ioband_device_list */
+static DEFINE_SPINLOCK(ioband_devicelist_lock);
+
+static void ioband_conduct(struct work_struct *);
+static void ioband_hold_bio(struct ioband_group *, struct bio *);
+static struct bio *ioband_pop_bio(struct ioband_group *);
+static int ioband_set_param(struct ioband_group *, char *, char *);
+static int ioband_group_attach(struct ioband_group *, int, char *);
+static int ioband_group_type_select(struct ioband_group *, char *);
+
+long ioband_debug;	/* just for debugging */
+
+static int policy_init(struct ioband_device *dp, char *name,
+							int argc, char **argv)
+{
+	struct policy_type *p;
+	int r;
+
+	for (p = dm_ioband_policy_type; (p->p_name); p++) {
+		if (!strcmp(name, p->p_name))
+			break;
+	}
+
+	dp->g_policy = p;
+	r = p->p_policy_init(dp, argc, argv);
+	if (!dp->g_hold_bio)
+		dp->g_hold_bio = ioband_hold_bio;
+	if (!dp->g_pop_bio)
+		dp->g_pop_bio = ioband_pop_bio;
+	return r;
+}
+
+static struct ioband_device *alloc_ioband_device(char *name, char *policy,
+			int io_throttle, int io_limit, int argc, char **argv)
+
+{
+	struct ioband_device *dp = NULL;
+	struct ioband_device *p;
+	struct ioband_device *new;
+	unsigned long flags;
+
+	new = kzalloc(sizeof(struct ioband_device), GFP_KERNEL);
+	if (!new)
+		goto try_to_find;
+
+	/*
+	 * Prepare its own workqueue as generic_make_request() may potentially
+	 * block the workqueue when submitting BIOs.
+	 */
+	new->g_ioband_wq = create_workqueue("kioband");
+	if (!new->g_ioband_wq) {
+		kfree(new);
+		new = NULL;
+		goto try_to_find;
+	}
+
+	INIT_WORK(&new->g_conductor, ioband_conduct);
+	INIT_LIST_HEAD(&new->g_groups);
+	INIT_LIST_HEAD(&new->g_list);
+	spin_lock_init(&new->g_lock);
+	new->g_io_throttle = io_throttle;
+	new->g_io_limit[0] = io_limit;
+	new->g_io_limit[1] = io_limit;
+	new->g_issued[0] = 0;
+	new->g_issued[1] = 0;
+	new->g_blocked = 0;
+	new->g_ref = 0;
+	new->g_flags = 0;
+	strlcpy(new->g_name, name, sizeof(new->g_name));
+	new->g_policy = NULL;
+	new->g_hold_bio = NULL;
+	new->g_pop_bio = NULL;
+	init_waitqueue_head(&new->g_waitq);
+
+try_to_find:
+	spin_lock_irqsave(&ioband_devicelist_lock, flags);
+	list_for_each_entry(p, &ioband_device_list, g_list) {
+		if (!strcmp(p->g_name, name)) {
+			dp = p;
+			break;
+		}
+	}
+	if (!dp && (new)) {
+		if (policy_init(new, policy, argc, argv) == 0) {
+			dp = new;
+			new = NULL;
+			list_add_tail(&dp->g_list, &ioband_device_list);
+		}
+	}
+	spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+
+	if (new) {
+		destroy_workqueue(new->g_ioband_wq);
+		kfree(new);
+	}
+
+	return dp;
+}
+
+static void release_ioband_device(struct ioband_device *dp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioband_devicelist_lock, flags);
+	if (!list_empty(&dp->g_groups)) {
+		spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+		return;
+	}
+	list_del(&dp->g_list);
+	spin_unlock_irqrestore(&ioband_devicelist_lock, flags);
+	destroy_workqueue(dp->g_ioband_wq);
+	kfree(dp);
+}
+
+static struct ioband_group *ioband_group_find(struct ioband_group *head,
+									int id)
+{
+	struct ioband_group *p;
+	struct ioband_group *gp = NULL;
+
+	list_for_each_entry(p, &head->c_group_list, c_group_list) {
+		if (p->c_id == id || id == IOBAND_ID_ANY)
+			gp = p;
+	}
+	return gp;
+}
+
+static int ioband_group_init(struct ioband_group *gp,
+     struct ioband_group *head, struct ioband_device *dp, int id, char *param)
+{
+	unsigned long flags;
+	int r;
+
+	INIT_LIST_HEAD(&gp->c_list);
+	bio_list_init(&gp->c_blocked_bios);
+	bio_list_init(&gp->c_prio_bios);
+	gp->c_id = id;	/* should be verified */
+	gp->c_blocked = 0;
+	gp->c_prio_blocked = 0;
+	memset(gp->c_stat, 0, sizeof(gp->c_stat));
+	init_waitqueue_head(&gp->c_waitq);
+	gp->c_flags = 0;
+
+	INIT_LIST_HEAD(&gp->c_group_list);
+
+	gp->c_banddev = dp;
+
+	spin_lock_irqsave(&dp->g_lock, flags);
+	if (head && ioband_group_find(head, id)) {
+		spin_unlock_irqrestore(&dp->g_lock, flags);
+		DMWARN("ioband_group: id=%d already exists.", id);
+		return -EEXIST;
+	}
+
+	dp->g_ref++;
+	list_add_tail(&gp->c_list, &dp->g_groups);
+
+	r = dp->g_group_ctr(gp, param);
+	if (r) {
+		list_del(&gp->c_list);
+		dp->g_ref--;
+		return r;
+	}
+
+	if (head) {
+		list_add_tail(&gp->c_group_list, &head->c_group_list);
+		gp->c_dev = head->c_dev;
+		gp->c_target = head->c_target;
+	}
+
+	spin_unlock_irqrestore(&dp->g_lock, flags);
+
+	return 0;
+}
+
+static void ioband_group_release(struct ioband_group *gp)
+{
+	struct ioband_device *dp = gp->c_banddev;
+
+	list_del(&gp->c_list);
+	list_del(&gp->c_group_list);
+	dp->g_ref--;
+	dp->g_group_dtr(gp);
+	kfree(gp);
+}
+
+static void ioband_group_destroy_all(struct ioband_group *gp)
+{
+	struct ioband_device *dp = gp->c_banddev;
+	struct ioband_group *group;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dp->g_lock, flags);
+	while ((group = ioband_group_find(gp, IOBAND_ID_ANY)))
+		ioband_group_release(group);
+	ioband_group_release(gp);
+	spin_unlock_irqrestore(&dp->g_lock, flags);
+}
+
+static void ioband_group_stop(struct ioband_group *gp)
+{
+	struct ioband_device *dp = gp->c_banddev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dp->g_lock, flags);
+	set_group_down(gp);
+	spin_unlock_irqrestore(&dp->g_lock, flags);
+	queue_work(dp->g_ioband_wq, &dp->g_conductor);
+	flush_workqueue(dp->g_ioband_wq);
+}
+
+static void ioband_group_stop_all(struct ioband_group *head, int suspend)
+{
+	struct ioband_device *dp = head->c_banddev;
+	struct ioband_group *p;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dp->g_lock, flags);
+	list_for_each_entry(p, &head->c_group_list, c_group_list) {
+		set_group_down(p);
+		if (suspend) {
+			set_group_suspended(p);
+			dprintk(KERN_ERR "ioband suspend: gp(%p)\n", p);
+		}
+
+	}
+	set_group_down(head);
+	if (suspend) {
+		set_group_suspended(head);
+		dprintk(KERN_ERR "ioband suspend: gp(%p)\n", head);
+	}
+	spin_unlock_irqrestore(&dp->g_lock, flags);
+	queue_work(dp->g_ioband_wq, &dp->g_conductor);
+	flush_workqueue(dp->g_ioband_wq);
+}
+
+static void ioband_group_resume_all(struct ioband_group *head)
+{
+	struct ioband_device *dp = head->c_banddev;
+	struct ioband_group *p;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dp->g_lock, flags);
+	list_for_each_entry(p, &head->c_group_list, c_group_list) {
+		clear_group_down(p);
+		clear_group_suspended(p);
+		dprintk(KERN_ERR "ioband resume: gp(%p)\n", p);
+	}
+	clear_group_down(head);
+	clear_group_suspended(head);
+	dprintk(KERN_ERR "ioband resume: gp(%p)\n", head);
+	spin_unlock_i
...

[PATCH 2/2] dm-ioband: I/O bandwidth controller v1.0.0: Document [message #30255 is a reply to message #30254] Mon, 19 May 2008 08:24 Go to previous message
Ryo Tsuruta is currently offline  Ryo Tsuruta
Messages: 35
Registered: January 2008
Member
Here is the document of dm-ioband.

Based on 2.6.26-rc2-mm1
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -uprN linux-2.6.26-rc2-mm1.orig/Documentation/device-mapper/ioband.txt linux-2.6.26-rc2-mm1/Documentation/device-mapper/ioband.txt
--- linux-2.6.26-rc2-mm1.orig/Documentation/device-mapper/ioband.txt	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.26-rc2-mm1/Documentation/device-mapper/ioband.txt	2008-05-19 14:22:37.000000000 +0900
@@ -0,0 +1,899 @@
+                     Block I/O bandwidth control: dm-ioband
+
+            -------------------------------------------------------
+
+   Table of Contents
+
+   [1]What's dm-ioband all about?
+
+   [2]Differences from the CFQ I/O scheduler
+
+   [3]How dm-ioband works.
+
+   [4]Setup and Installation
+
+   [5]Getting started
+
+   [6]Command Reference
+
+   [7]Examples
+
+What's dm-ioband all about?
+
+     dm-ioband is an I/O bandwidth controller implemented as a device-mapper
+   driver. Several jobs using the same physical device have to share the
+   bandwidth of the device. dm-ioband gives bandwidth to each job according
+   to its weight, which each job can set its own value to.
+
+     At this time, a job is a group of processes with the same pid or pgrp or
+   uid. There is also a plan to make it support cgroup. A job can also be a
+   virtual machine such as KVM or Xen.
+
+     +------+ +------+ +------+   +------+ +------+ +------+
+     |cgroup| |cgroup| | the  |   | pid  | | pid  | | the  |  jobs
+     |  A   | |  B   | |others|   |  X   | |  Y   | |others|
+     +--|---+ +--|---+ +--|---+   +--|---+ +--|---+ +--|---+
+     +--V----+---V---+----V---+   +--V----+---V---+----V---+
+     | group | group | default|   | group | group | default|  ioband groups
+     |       |       |  group |   |       |       |  group |
+     +-------+-------+--------+   +-------+-------+--------+
+     |        ioband1         |   |       ioband2          |  ioband devices
+     +-----------|------------+   +-----------|------------+
+     +-----------V--------------+-------------V------------+
+     |                          |                          |
+     |          sdb1            |           sdb2           |  physical devices
+     +--------------------------+--------------------------+
+
+
+   --------------------------------------------------------------------------
+
+Differences from the CFQ I/O scheduler
+
+     Dm-ioband is flexible to configure the bandwidth settings.
+
+     Dm-ioband can work with any type of I/O scheduler such as the NOOP
+   scheduler, which is often chosen for high-end storages, since it is
+   implemented outside the I/O scheduling layer. It allows both of partition
+   based bandwidth control and job --- a group of processes --- based
+   control. In addition, it can set different configuration on each physical
+   device to control its bandwidth.
+
+     Meanwhile the current implementation of the CFQ scheduler has 8 IO
+   priority levels and all jobs whose processes have the same IO priority
+   share the bandwidth assigned to this level between them. And IO priority
+   is an attribute of a process so that it equally effects to all block
+   devices.
+
+   --------------------------------------------------------------------------
+
+How dm-ioband works.
+
+     Every ioband device has one ioband group, which by default is called the
+   default group.
+
+     Ioband devices can also have extra ioband groups in them. Each ioband
+   group has a job to support and a weight. Proportional to the weight,
+   dm-ioband gives tokens to the group.
+
+     A group passes on I/O requests that its job issues to the underlying
+   layer so long as it has tokens left, while requests are blocked if there
+   aren't any tokens left in the group. Tokens are refilled once all of
+   groups that have requests on a given physical device use up their tokens.
+
+     There are two policies for token consumption. One is that a token is
+   consumed for each I/O request. The other is that a token is consumed for
+   each I/O sector, for example, one I/O request which consists of
+   4Kbytes(512bytes * 8 sectors) read consumes 8 tokens. A user can choose
+   either policy.
+
+     With this approach, a job running on an ioband group with large weight
+   is guaranteed a wide I/O bandwidth.
+
+   --------------------------------------------------------------------------
+
+Setup and Installation
+
+     Build a kernel with these options enabled:
+
+     CONFIG_MD
+     CONFIG_BLK_DEV_DM
+     CONFIG_DM_IOBAND
+
+
+     If compiled as module, use modprobe to load dm-ioband.
+
+     # make modules
+     # make modules_install
+     # depmod -a
+     # modprobe dm-ioband
+
+
+     "dmsetup targets" command shows all available device-mapper targets.
+   "ioband" is displayed if dm-ioband has been loaded.
+
+     # dmsetup targets
+     ioband           v1.0.0
+
+
+   --------------------------------------------------------------------------
+
+Getting started
+
+     The following is a brief description how to control the I/O bandwidth of
+   disks. In this description, we'll take one disk with two partitions as an
+   example target.
+
+   --------------------------------------------------------------------------
+
+  Create and map ioband devices
+
+     Create two ioband devices "ioband1" and "ioband2". "ioband1" is mapped
+   to "/dev/sda1" and has a weight of 40. "ioband2" is mapped to "/dev/sda2"
+   and has a weight of 10. "ioband1" can use 80% --- 40/(40+10)*100 --- of
+   the bandwidth of the physical disk "/dev/sda" while "ioband2" can use 20%.
+
+    # echo "0 $(blockdev --getsize /dev/sda1) ioband /dev/sda1 1 0 0 none" \
+        "weight 0 :40" | dmsetup create ioband1
+    # echo "0 $(blockdev --getsize /dev/sda2) ioband /dev/sda2 1 0 0 none" \
+        "weight 0 :10" | dmsetup create ioband2
+
+
+     If the commands are successful then the device files
+   "/dev/mapper/ioband1" and "/dev/mapper/ioband2" will have been created.
+
+   --------------------------------------------------------------------------
+
+  Additional bandwidth control
+
+     In this example two extra ioband groups are created on "ioband1". The
+   first group consists of all the processes with user-id 1000 and the second
+   group consists of all the processes with user-id 2000. Their weights are
+   30 and 20 respectively.
+
+    # dmsetup message ioband1 0 type user
+    # dmsetup message ioband1 0 attach 1000
+    # dmsetup message ioband1 0 attach 2000
+    # dmsetup message ioband1 0 weight 1000:30
+    # dmsetup message ioband1 0 weight 2000:20
+
+
+     Now the processes in the user-id 1000 group can use 30% ---
+   30/(30+20+40+10)*100 --- of the bandwidth of the physical disk.
+
+   Table 1. Weight assignments
+
+   +----------------------------------------------------------------+
+   | ioband device |          ioband group          | ioband weight |
+   |---------------+--------------------------------+---------------|
+   | ioband1       | user id 1000                   | 30            |
+   |---------------+--------------------------------+---------------|
+   | ioband1       | user id 2000                   | 20            |
+   |---------------+--------------------------------+---------------|
+   | ioband1       | default group(the other users) | 40            |
+   |---------------+--------------------------------+---------------|
+   | ioband2       | default group                  | 10            |
+   +----------------------------------------------------------------+
+
+   --------------------------------------------------------------------------
+
+  Remove the ioband devices
+
+     Remove the ioband devices when no longer used.
+
+     # dmsetup remove ioband1
+     # dmsetup remove ioband2
+
+
+   --------------------------------------------------------------------------
+
+Command Reference
+
+  Create an ioband device
+
+   SYNOPSIS
+
+           dmsetup create IOBAND_DEVICE
+
+   DESCRIPTION
+
+             Create an ioband device with the given name IOBAND_DEVICE.
+           Generally, dmsetup reads a table from standard input. Each line of
+           the table specifies a single target and is of the form:
+
+             start_sector num_sectors "ioband" device_file ioband_device_id \
+                 io_throttle io_limit ioband_group_type policy token_base \
+                 :weight [ioband_group_id:weight...]
+
+
+                start_sector, num_sectors
+
+                          The sector range of the underlying device where
+                        dm-ioband maps.
+
+                ioband
+
+                          Specify the string "ioband" as a target type.
+
+                device_file
+
+                          Underlying device name.
+
+                ioband_device_id
+
+                          The ID number for an ioband device. The same ID
+                        must be set among the ioband devices that share the
+                        same bandwidth, which means they work on the same
+                        physical disk.
+
+                io_throttle
+
+                          Dm-ioband starts to control the bandwidth when the
+                        number of BIOs in progress exceeds this value. If 0
+                        is specified, dm-ioband uses the default value.
+
+                io_limit
+
+                          Dm-ioband blocks all I/O requests for the
+                        IOBAND_DEVICE when the number of BIOs in progress
+                        exceeds this value. If 0 is specified, dm-ioband uses
+                        the default value.
+
+                ioband_group_type
+
+                       
...

Previous Topic: user namespace semantics (may 16)
Next Topic: [PATCH O/4] BIO tracking take2
Goto Forum:
  


Current Time: Tue Jul 16 18:32:13 GMT 2024

Total time taken to generate the page: 0.02972 seconds