OpenVZ Forum


Home » Mailing lists » Devel » [patch 0/1] [RFC][net namespace] veth ioctl management
[patch 0/1] [RFC][net namespace] veth ioctl management [message #17465] Mon, 19 February 2007 13:37 Go to next message
Daniel Lezcano is currently offline  Daniel Lezcano
Messages: 417
Registered: June 2006
Senior Member
The following patch is an upgrade proposition for the veth pass-through driver.
The temporary proc interface has been replaced by an ioctl. The device is
a misc device. The major number is 10 and the minor number is dynamically allocated.
The minor number should to be retrieved from the /dev/misc

The veth configuration is no more done in one shot and does not unshare anymore.
The veth creation is done after the network namespace has been unshared and binded with the bind ns. 
The ioctl takes the parent dev name, the child dev name and the child namespace identifier.
The veth creation will fails if the namespaces relationship is not parent->child.

The set_mac_address callback is set, so the usual commands ip/ifconfig can be used to
assign a mac address to the pair device.

The patch header contains the source code for the userspace tool, vethctl.
The patch needs the lxc patchset for the bind namespace syscall.

-- 
_______________________________________________
Containers mailing list
Containers@lists.osdl.org
https://lists.osdl.org/mailman/listinfo/containers
[patch 1/1] net namespace : veth management interface [message #17466 is a reply to message #17465] Mon, 19 February 2007 13:37 Go to previous message
Daniel Lezcano is currently offline  Daniel Lezcano
Messages: 417
Registered: June 2006
Senior Member
From: Daniel Lezcano <dlezcano@fr.ibm.com>

The veth module has been modified to be managed from userspace via ioctl.
The temporary /proc/veth_ctl interface has been removed.
Refcounting has been added on the module.
Misc dev is used to register the module.
Mac address is now assigned via ifconfig <interface> hw ether <hwaddr> in 
both child and parent namespace.

Usage:
	. load veth module
	. retrieve in /proc/misc minor number
	. mknod /dev/net/veth c 10 <minor>

	. unshare with bind_ns in order to assign an identifier
	. from the parent namespace use the vethctl program below to add/delete
		. (add)    vethctl -I <nsid> -v <parent_ifname> -i <child_ifname> -a
		. (delete) vethctl -v <parent_ifname> -d
	. assign mac address in the child namespace
	. assign mac address in the parent namespace

vethctl.c:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>

#define VETH_IOC_MAGIC 0x1234
#define VETH_IOC_ADD   _IOW(VETH_IOC_MAGIC, 0x1, struct veth_ioc_pair*)
#define VETH_IOC_DEL   _IOW(VETH_IOC_MAGIC, 0x2, struct veth_ioc_pair*)

const char *vethname = "/dev/net/veth";

struct veth_ioc_pair {
        char parent[IFNAMSIZ];
        char child[IFNAMSIZ];
        int id;
};

static void usage(const char *name)
{
        printf("usage: %s [-h] [-I id] [-i <ifname>] [-v ifname] [-ad]\n", name);
        printf("\n");
        printf("  -h            this message\n");
        printf("\n");
        printf("  -I <id>       add pass-through device to nsproxy <id>\n");
        printf("  -v            parent interface name\n");
        printf("  -i            child interface name\n");
        printf("  -a            add the interface\n");
        printf("  -d            delete the interface\n");
        printf("\n");
        printf("(C) Copyright IBM Corp. 2007\n");
        printf("\n");
        exit(1);
}

int main(int argc, char* argv[])
{
        int fd;
        struct veth_ioc_pair cmd;
        char *veth = NULL;
        char *eth = NULL;
        char c;
        int id = -1;
        int add = -1;

        while ((c = getopt(argc, argv, "adi:v:I:")) != EOF) {
                switch (c) {
                case 'I':if (optarg) id = atoi(optarg); break;
                case 'i': eth = optarg; break;
                case 'v': veth = optarg; break;
                case 'a': add = 1; break;
                case 'd': add = 0; break;
                default: usage(argv[0]);
                };
        };

        if (id == -1)
                usage(argv[0]);
        if (add == -1)
                usage(argv[0]);
        if (add) {
                if (!veth || !eth)
                        usage(argv[0]);
        } else {
                if (!veth)
                        usage(argv[0]);
        }

        fd = open(vethname, 0, O_WRONLY);
        if (fd == -1) {
                perror("open");
                return 1;
        }

        strncpy(cmd.parent, veth, sizeof(cmd.parent));
        if (add)
                strncpy(cmd.child, eth, sizeof(cmd.parent));
        cmd.id = id;

        if (ioctl(fd, add?VETH_IOC_ADD:VETH_IOC_DEL, &cmd, sizeof(cmd))) {
                perror("ioctl");
                return 1;
        }

        close(fd);

        return 0;
}
 
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>

---
 drivers/net/veth.c   |  361 ++++++++++++++++++++++++++++++++-------------------
 include/linux/veth.h |   20 ++
 kernel/nsproxy.c     |    2 
 3 files changed, 255 insertions(+), 128 deletions(-)

Index: 2.6.20-lxc2/drivers/net/veth.c
===================================================================
--- 2.6.20-lxc2.orig/drivers/net/veth.c
+++ 2.6.20-lxc2/drivers/net/veth.c
@@ -13,6 +13,11 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/miscdevice.h>
+#include <linux/veth.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
 
@@ -24,6 +29,8 @@
 
 #define veth_from_netdev(dev) ((struct veth_struct *)(netdev_priv(dev)))
 
+static struct module *veth_module = THIS_MODULE;
+
 /* ------------------------------------------------------------------- *
  *
  * Device functions
@@ -75,12 +82,22 @@
 	return 0;
 }
 
-static int veth_open(struct net_device *dev)
+static inline int veth_mod_inc_use(void)
+{
+	return try_module_get(veth_module)?0:1;
+}
+
+static inline void veth_mod_dec_use(void)
+{
+	module_put(veth_module);
+}
+
+static int veth_dev_open(struct net_device *dev)
 {
 	return 0;
 }
 
-static int veth_close(struct net_device *dev)
+static int veth_dev_close(struct net_device *dev)
 {
 	return 0;
 }
@@ -95,14 +112,25 @@
 	return &veth_from_netdev(dev)->stats;
 }
 
-int veth_init_dev(struct net_device *dev)
+static int veth_set_address(struct net_device *dev, void *p)
+{
+	struct sockaddr *sa = p;
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	return 0;
+}
+
+static int veth_init_dev(struct net_device *dev)
 {
 	dev->hard_start_xmit = veth_xmit;
-	dev->open = veth_open;
-	dev->stop = veth_close;
+	dev->open = veth_dev_open;
+	dev->stop = veth_dev_close;
 	dev->destructor = veth_destructor;
 	dev->get_stats = get_stats;
-
+	dev->set_mac_address = veth_set_address;
 	ether_setup(dev);
 
 	dev->tx_queue_len = 0;
@@ -114,6 +142,173 @@
 	dev->init = veth_init_dev;
 }
 
+static int veth_add(struct veth_ioc_pair *veth_pair)
+{
+	struct net_namespace *child_ns;
+	struct net_namespace *parent_ns;
+	struct net_device *parent_dev;
+	struct net_device *child_dev;
+	struct nsproxy *nsproxy;
+	int err;
+
+	err = -ESRCH;
+	nsproxy = find_nsproxy_by_id(veth_pair->id);
+	if (!nsproxy)
+		goto out;
+
+	child_ns = nsproxy->net_ns;
+	put_nsproxy(nsproxy);
+	get_net_ns(child_ns);
+
+	parent_ns = current_net_ns;
+	get_net_ns(parent_ns);
+
+	err = -EINVAL;
+	if (parent_ns != child_ns->parent)
+		goto out_parent_net_ns;
+
+	err = -ENOMEM;
+	parent_dev = alloc_netdev(sizeof(struct veth_struct),
+				  veth_pair->parent, veth_setup);
+	if (!parent_dev)
+		goto out_parent_net_ns;
+
+	push_net_ns(child_ns);
+	child_dev = alloc_netdev(sizeof(struct veth_struct),
+				 veth_pair->child, veth_setup);
+	pop_net_ns(parent_ns);
+	if (!child_dev)
+		goto out_parent_dev;
+
+	veth_from_netdev(parent_dev)->pair = child_dev;
+	veth_from_netdev(child_dev)->pair = parent_dev;
+
+	rtnl_lock();
+
+	err = register_netdevice(parent_dev);
+	if (err)
+		goto out_parent_reg;
+
+	push_net_ns(child_ns);
+	err = register_netdevice(child_dev);
+	pop_net_ns(parent_ns);
+	if (err)
+		goto out_child_reg;
+
+	rtnl_unlock();
+
+	err = -EBUSY;
+	if (veth_mod_inc_use())
+		goto out_child_reg;
+
+	err = 0;
+
+out_parent_net_ns:
+	put_net_ns(parent_ns);
+	put_net_ns(child_ns);
+out:
+	return err;
+
+out_child_reg:
+	unregister_netdevice(parent_dev);
+out_parent_reg:
+	rtnl_unlock();
+	free_netdev(child_dev);
+out_parent_dev:
+	free_netdev(parent_dev);
+	goto out_parent_net_ns;
+}
+
+static int veth_del(struct veth_ioc_pair *veth_pair)
+{
+	struct net_device *child_dev;
+	struct net_namespace *parent_ns, *child_ns;
+	struct net_device *parent_dev;
+
+	parent_dev = dev_get_by_name(veth_pair->parent);
+	if (!parent_dev)
+		return -ENODEV;
+
+	rtnl_lock();
+
+	child_dev = veth_from_netdev(parent_dev)->pair;
+	get_net_ns(child_dev->net_ns);
+	child_ns = child_dev->net_ns;
+
+	dev_close(child_dev);
+	synchronize_net();
+
+	/*
+	 * Now child_dev does not send or receives anything.
+	 * This means child_dev->hard_start_xmit is not called anymore.
+	 */
+	unregister_netdevice(parent_dev);
+	/*
+	 * At this point child_dev has dead pointer to parent_dev.
+	 * But this pointer is not dereferenced.
+	 */
+	parent_ns = push_net_ns(child_ns);
+	unregister_netdevice(child_dev);
+
+	dev_put(parent_dev);
+	rtnl_unlock();
+
+	pop_net_ns(parent_ns);
+	put_net_ns(child_ns);
+
+	veth_mod_dec_use();
+	return 0;
+}
+
+static int veth_open(struct inode *i, struct file *f)
+{
+	if (veth_mod_inc_use())
+		return -EBUSY;
+	return 0;
+}
+
+static int veth_release(struct inode *i, struct file *f)
+{
+	veth_mod_dec_use();
+	return 0;
+}
+
+static int veth_ioctl(struct inode *inode, struct file *file,
+		      unsigned int cmd, unsigned long arg)
+{
+	struct veth_ioc_pair *veth_pair;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	veth_pair = kmalloc(sizeof(*veth_pair), GFP_KERNEL);
+	if (!veth_pair)
+		return -ENOMEM;
+
+	if (copy_from_user(veth_pair, (void*)arg, sizeof(*veth_pair))) {
+		kfree(veth_pair);
+		return -EFAULT;
+	}
+
+	switch (cmd) {
+	case VETH_IOC_ADD:
+		err = veth_add(veth_pair);
+		break;
+
+	case VETH_IOC_DEL:
+		err = veth_del(veth_pair);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+	kfree(veth_pair);
+
+	return err;
+}
+
 static inline int is_veth_dev(struct net_device *dev)
 {
 	return dev->init == veth_init_dev;
@@ -246,123 +441,6 @@
 
 /* ------------------------------------------------------------------- *
  *
- * Temporary interface to create veth devices
- *
- * ------------------------------------------------------------------- */
-
-#ifdef CONFIG_PROC_FS
-
-static int veth_debug_open(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-static char *parse_addr(char *s, char *addr)
-{
-	int i, v;
-
-	for (i = 0; i < ETH_ALEN; i++) {
-		if (!isxdigit(*s))
-			return NULL;
-		*addr = 0;
-		v = isdigit(*s) ? *s - '0' : toupper(*s) - 'A' + 10;
-		s++;
-		if (isxdigit(*s)) {
-
...

Previous Topic: Re: [ckrm-tech] [RFC][PATCH][2/4] Add RSS accounting and control
Next Topic: Re: [patch 0/1] [RFC][net namespace] veth ioctl management
Goto Forum:
  


Current Time: Thu Oct 09 01:57:57 GMT 2025

Total time taken to generate the page: 0.07886 seconds