From: Daniel Lezcano <dlezcano@fr.ibm.com>
The veth module has been modified to be managed from userspace via ioctl.
The temporary /proc/veth_ctl interface has been removed.
Refcounting has been added on the module.
Misc dev is used to register the module.
Mac address is now assigned via ifconfig <interface> hw ether <hwaddr> in
both child and parent namespace.
Usage:
. load veth module
. retrieve in /proc/misc minor number
. mknod /dev/net/veth c 10 <minor>
. unshare with bind_ns in order to assign an identifier
. from the parent namespace use the vethctl program below to add/delete
. (add) vethctl -I <nsid> -v <parent_ifname> -i <child_ifname> -a
. (delete) vethctl -v <parent_ifname> -d
. assign mac address in the child namespace
. assign mac address in the parent namespace
vethctl.c:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
#define VETH_IOC_MAGIC 0x1234
#define VETH_IOC_ADD _IOW(VETH_IOC_MAGIC, 0x1, struct veth_ioc_pair*)
#define VETH_IOC_DEL _IOW(VETH_IOC_MAGIC, 0x2, struct veth_ioc_pair*)
const char *vethname = "/dev/net/veth";
struct veth_ioc_pair {
char parent[IFNAMSIZ];
char child[IFNAMSIZ];
int id;
};
static void usage(const char *name)
{
printf("usage: %s [-h] [-I id] [-i <ifname>] [-v ifname] [-ad]\n", name);
printf("\n");
printf(" -h this message\n");
printf("\n");
printf(" -I <id> add pass-through device to nsproxy <id>\n");
printf(" -v parent interface name\n");
printf(" -i child interface name\n");
printf(" -a add the interface\n");
printf(" -d delete the interface\n");
printf("\n");
printf("(C) Copyright IBM Corp. 2007\n");
printf("\n");
exit(1);
}
int main(int argc, char* argv[])
{
int fd;
struct veth_ioc_pair cmd;
char *veth = NULL;
char *eth = NULL;
char c;
int id = -1;
int add = -1;
while ((c = getopt(argc, argv, "adi:v:I:")) != EOF) {
switch (c) {
case 'I':if (optarg) id = atoi(optarg); break;
case 'i': eth = optarg; break;
case 'v': veth = optarg; break;
case 'a': add = 1; break;
case 'd': add = 0; break;
default: usage(argv[0]);
};
};
if (id == -1)
usage(argv[0]);
if (add == -1)
usage(argv[0]);
if (add) {
if (!veth || !eth)
usage(argv[0]);
} else {
if (!veth)
usage(argv[0]);
}
fd = open(vethname, 0, O_WRONLY);
if (fd == -1) {
perror("open");
return 1;
}
strncpy(cmd.parent, veth, sizeof(cmd.parent));
if (add)
strncpy(cmd.child, eth, sizeof(cmd.parent));
cmd.id = id;
if (ioctl(fd, add?VETH_IOC_ADD:VETH_IOC_DEL, &cmd, sizeof(cmd))) {
perror("ioctl");
return 1;
}
close(fd);
return 0;
}
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
drivers/net/veth.c | 361 ++++++++++++++++++++++++++++++++-------------------
include/linux/veth.h | 20 ++
kernel/nsproxy.c | 2
3 files changed, 255 insertions(+), 128 deletions(-)
Index: 2.6.20-lxc2/drivers/net/veth.c
===================================================================
--- 2.6.20-lxc2.orig/drivers/net/veth.c
+++ 2.6.20-lxc2/drivers/net/veth.c
@@ -13,6 +13,11 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/miscdevice.h>
+#include <linux/veth.h>
#include <net/dst.h>
#include <net/xfrm.h>
@@ -24,6 +29,8 @@
#define veth_from_netdev(dev) ((struct veth_struct *)(netdev_priv(dev)))
+static struct module *veth_module = THIS_MODULE;
+
/* ------------------------------------------------------------------- *
*
* Device functions
@@ -75,12 +82,22 @@
return 0;
}
-static int veth_open(struct net_device *dev)
+static inline int veth_mod_inc_use(void)
+{
+ return try_module_get(veth_module)?0:1;
+}
+
+static inline void veth_mod_dec_use(void)
+{
+ module_put(veth_module);
+}
+
+static int veth_dev_open(struct net_device *dev)
{
return 0;
}
-static int veth_close(struct net_device *dev)
+static int veth_dev_close(struct net_device *dev)
{
return 0;
}
@@ -95,14 +112,25 @@
return &veth_from_netdev(dev)->stats;
}
-int veth_init_dev(struct net_device *dev)
+static int veth_set_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *sa = p;
+
+ if (!is_valid_ether_addr(sa->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+ return 0;
+}
+
+static int veth_init_dev(struct net_device *dev)
{
dev->hard_start_xmit = veth_xmit;
- dev->open = veth_open;
- dev->stop = veth_close;
+ dev->open = veth_dev_open;
+ dev->stop = veth_dev_close;
dev->destructor = veth_destructor;
dev->get_stats = get_stats;
-
+ dev->set_mac_address = veth_set_address;
ether_setup(dev);
dev->tx_queue_len = 0;
@@ -114,6 +142,173 @@
dev->init = veth_init_dev;
}
+static int veth_add(struct veth_ioc_pair *veth_pair)
+{
+ struct net_namespace *child_ns;
+ struct net_namespace *parent_ns;
+ struct net_device *parent_dev;
+ struct net_device *child_dev;
+ struct nsproxy *nsproxy;
+ int err;
+
+ err = -ESRCH;
+ nsproxy = find_nsproxy_by_id(veth_pair->id);
+ if (!nsproxy)
+ goto out;
+
+ child_ns = nsproxy->net_ns;
+ put_nsproxy(nsproxy);
+ get_net_ns(child_ns);
+
+ parent_ns = current_net_ns;
+ get_net_ns(parent_ns);
+
+ err = -EINVAL;
+ if (parent_ns != child_ns->parent)
+ goto out_parent_net_ns;
+
+ err = -ENOMEM;
+ parent_dev = alloc_netdev(sizeof(struct veth_struct),
+ veth_pair->parent, veth_setup);
+ if (!parent_dev)
+ goto out_parent_net_ns;
+
+ push_net_ns(child_ns);
+ child_dev = alloc_netdev(sizeof(struct veth_struct),
+ veth_pair->child, veth_setup);
+ pop_net_ns(parent_ns);
+ if (!child_dev)
+ goto out_parent_dev;
+
+ veth_from_netdev(parent_dev)->pair = child_dev;
+ veth_from_netdev(child_dev)->pair = parent_dev;
+
+ rtnl_lock();
+
+ err = register_netdevice(parent_dev);
+ if (err)
+ goto out_parent_reg;
+
+ push_net_ns(child_ns);
+ err = register_netdevice(child_dev);
+ pop_net_ns(parent_ns);
+ if (err)
+ goto out_child_reg;
+
+ rtnl_unlock();
+
+ err = -EBUSY;
+ if (veth_mod_inc_use())
+ goto out_child_reg;
+
+ err = 0;
+
+out_parent_net_ns:
+ put_net_ns(parent_ns);
+ put_net_ns(child_ns);
+out:
+ return err;
+
+out_child_reg:
+ unregister_netdevice(parent_dev);
+out_parent_reg:
+ rtnl_unlock();
+ free_netdev(child_dev);
+out_parent_dev:
+ free_netdev(parent_dev);
+ goto out_parent_net_ns;
+}
+
+static int veth_del(struct veth_ioc_pair *veth_pair)
+{
+ struct net_device *child_dev;
+ struct net_namespace *parent_ns, *child_ns;
+ struct net_device *parent_dev;
+
+ parent_dev = dev_get_by_name(veth_pair->parent);
+ if (!parent_dev)
+ return -ENODEV;
+
+ rtnl_lock();
+
+ child_dev = veth_from_netdev(parent_dev)->pair;
+ get_net_ns(child_dev->net_ns);
+ child_ns = child_dev->net_ns;
+
+ dev_close(child_dev);
+ synchronize_net();
+
+ /*
+ * Now child_dev does not send or receives anything.
+ * This means child_dev->hard_start_xmit is not called anymore.
+ */
+ unregister_netdevice(parent_dev);
+ /*
+ * At this point child_dev has dead pointer to parent_dev.
+ * But this pointer is not dereferenced.
+ */
+ parent_ns = push_net_ns(child_ns);
+ unregister_netdevice(child_dev);
+
+ dev_put(parent_dev);
+ rtnl_unlock();
+
+ pop_net_ns(parent_ns);
+ put_net_ns(child_ns);
+
+ veth_mod_dec_use();
+ return 0;
+}
+
+static int veth_open(struct inode *i, struct file *f)
+{
+ if (veth_mod_inc_use())
+ return -EBUSY;
+ return 0;
+}
+
+static int veth_release(struct inode *i, struct file *f)
+{
+ veth_mod_dec_use();
+ return 0;
+}
+
+static int veth_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct veth_ioc_pair *veth_pair;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ veth_pair = kmalloc(sizeof(*veth_pair), GFP_KERNEL);
+ if (!veth_pair)
+ return -ENOMEM;
+
+ if (copy_from_user(veth_pair, (void*)arg, sizeof(*veth_pair))) {
+ kfree(veth_pair);
+ return -EFAULT;
+ }
+
+ switch (cmd) {
+ case VETH_IOC_ADD:
+ err = veth_add(veth_pair);
+ break;
+
+ case VETH_IOC_DEL:
+ err = veth_del(veth_pair);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ kfree(veth_pair);
+
+ return err;
+}
+
static inline int is_veth_dev(struct net_device *dev)
{
return dev->init == veth_init_dev;
@@ -246,123 +441,6 @@
/* ------------------------------------------------------------------- *
*
- * Temporary interface to create veth devices
- *
- * ------------------------------------------------------------------- */
-
-#ifdef CONFIG_PROC_FS
-
-static int veth_debug_open(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-static char *parse_addr(char *s, char *addr)
-{
- int i, v;
-
- for (i = 0; i < ETH_ALEN; i++) {
- if (!isxdigit(*s))
- return NULL;
- *addr = 0;
- v = isdigit(*s) ? *s - '0' : toupper(*s) - 'A' + 10;
- s++;
- if (isxdigit(*s)) {
-
...