OpenVZ Forum


Home » Mailing lists » Devel » [patch 0/5][RFC - ipv4/udp checkpoint/restart] dumping/restoring the IPV4/UDP sockets
[patch 3/5][RFC - ipv4/udp checkpoint/restart] : c/r the socket information and options [message #18771 is a reply to message #18768] Wed, 06 June 2007 12:18 Go to previous messageGo to previous message
Daniel Lezcano is currently offline  Daniel Lezcano
Messages: 417
Registered: June 2006
Senior Member
From: Daniel Lezcano <dlezcano@fr.ibm.com>

This patch defines a set of netlink attributes to store/retrieve socket 
options.

 * At dump time, a netlink message specify the inode of the socket to
   be checkpointed. The socket is retrieved with the inode number. A 
   new netlink message is built in order to store the socket information. 
   The type, state and socket options are stored into it and the netlink
   message is transmitted to the requestor.

 * At restore time, the netlink message contains the type of the socket.
   A new socket is created, using this type and the attributes are browsed
   in order to use the values to restore the differents options.

The choice of the C/R is to stick as much as possible to the user/kernel
frontier. For this reason, the kernel_{set,get}sockopt are used. That allows
to reduce code and delegate the different checks to the corresponding function.
Unfortunatly, some get/set are not symetric, so some options can be retrieved
but not set and vice-versa. For this reason, there are a few helpers, and the 
option definitions contains a GET|SET|BOTH flag.


Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/linux/af_inet_cr.h |   61 ++++
 net/ipv4/af_inet_cr.c      |  640 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 680 insertions(+), 21 deletions(-)

Index: 2.6.20-cr/net/ipv4/af_inet_cr.c
===================================================================
--- 2.6.20-cr.orig/net/ipv4/af_inet_cr.c
+++ 2.6.20-cr/net/ipv4/af_inet_cr.c
@@ -12,36 +12,644 @@
 #include <net/genetlink.h>
 #include <net/sock.h>
 #include <linux/fs.h>
+#include <linux/syscalls.h>
 #include <linux/af_inet_cr.h>
 
 /*
- * af_inet_cr_nldump : this function is called when a netlink message is received
- * with AF_INET_CR_CMD_DUMP command.
+ * Netlink message policy definition
+ */
+static struct nla_policy af_inet_cr_policy[AF_INET_CR_ATTR_MAX] = {
+	[AF_INET_CR_ATTR_INODE]                = { .type = NLA_U32 },
+
+	[AF_INET_CR_ATTR_SOCK_STATE]           = { .type = NLA_U32 },
+	[AF_INET_CR_ATTR_SOCK_TYPE]            = { .type = NLA_U32 },
+
+	[AF_INET_CR_ATTR_SOCKOPT_BROADCAST]    = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_DEBUG]        = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_DONTROUTE]    = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_KEEPALIVE]    = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_OOBINLINE]    = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_PASSCRED]     = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_REUSEADDR]    = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_TIMESTAMP]    = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_SNDBUF_ULOCK] = { .type = NLA_FLAG },
+	[AF_INET_CR_ATTR_SOCKOPT_RCVBUF_ULOCK] = { .type = NLA_FLAG },
+
+	[AF_INET_CR_ATTR_SOCKOPT_RCVBUF]       = { .type = NLA_U32  },
+	[AF_INET_CR_ATTR_SOCKOPT_SNDBUF]       = { .type = NLA_U32  },
+	[AF_INET_CR_ATTR_SOCKOPT_PRIORITY]     = { .type = NLA_U32  },
+	[AF_INET_CR_ATTR_SOCKOPT_RCVLOWAT]     = { .type = NLA_U32  },
+
+	[AF_INET_CR_ATTR_SOCKOPT_RCVTIMEO] = { .len = sizeof(struct timeval) },
+	[AF_INET_CR_ATTR_SOCKOPT_SNDTIMEO] = { .len = sizeof(struct timeval) },
+	[AF_INET_CR_ATTR_SOCKOPT_LINGER]   = { .len = sizeof(struct linger)  },
+	[AF_INET_CR_ATTR_SOCKOPT_BINDTODEVICE]  = { .len  = IFNAMSIZ },
+};
+
+/*
+ * Generic netlink family definition
+ */
+static struct genl_family af_inet_cr_family = {
+	.id             = GENL_ID_GENERATE,
+	.name           = "af_inet_cr",
+	.version        = 0x1,
+	.maxattr        = AF_INET_CR_ATTR_MAX - 1,
+};
+
+/*
+ * socket options association with netlink attribute
+ */
+struct af_inet_cr_optattr socket_options[] = {
+	{ SO_BROADCAST,    AF_INET_CR_ATTR_SOCKOPT_BROADCAST,    0, BOTH },
+	{ SO_DEBUG,        AF_INET_CR_ATTR_SOCKOPT_DEBUG,        0, BOTH },
+	{ SO_DONTROUTE,    AF_INET_CR_ATTR_SOCKOPT_DONTROUTE,    0, BOTH },
+	{ SO_KEEPALIVE,    AF_INET_CR_ATTR_SOCKOPT_KEEPALIVE,    0, BOTH },
+	{ SO_OOBINLINE,    AF_INET_CR_ATTR_SOCKOPT_OOBINLINE,    0, BOTH },
+	{ SO_PRIORITY,     AF_INET_CR_ATTR_SOCKOPT_PRIORITY,     0, BOTH },
+	{ SO_RCVLOWAT,     AF_INET_CR_ATTR_SOCKOPT_RCVLOWAT,     0, BOTH },
+	{ SO_RCVBUF,       AF_INET_CR_ATTR_SOCKOPT_RCVBUF,       0, GET  },
+	{ SO_SNDBUF,       AF_INET_CR_ATTR_SOCKOPT_SNDBUF,       0, GET  },
+	{ SO_REUSEADDR,    AF_INET_CR_ATTR_SOCKOPT_REUSEADDR,    0, BOTH },
+	{ SO_TIMESTAMP,    AF_INET_CR_ATTR_SOCKOPT_TIMESTAMP,    0, BOTH },
+	{ SO_LINGER,       AF_INET_CR_ATTR_SOCKOPT_LINGER,       0, BOTH },
+	{ SO_RCVTIMEO,     AF_INET_CR_ATTR_SOCKOPT_RCVTIMEO,     0, BOTH },
+	{ SO_SNDTIMEO,     AF_INET_CR_ATTR_SOCKOPT_SNDTIMEO,     0, BOTH },
+	{ SO_BINDTODEVICE, AF_INET_CR_ATTR_SOCKOPT_BINDTODEVICE, 0, SET  },
+};
+
+
+/*
+ * socket_lookup : search for socket using the inode number
+ *
+ * @sb : superblock associated to the sockfs
+ * @ino : the inode number associated with the socket
+ * @sock : the socket resulting from the lookup
+ *
+ * Returns 0 on succes or if the call fails:
+ *  -ENOENT : inode is not found
+ *  -ENOTSOCK: the inode found is not associated with a socket
+ *  -EINVAL: unexpected error
+ */
+static inline int socket_lookup(struct super_block *sb, unsigned long ino,
+ 				struct socket **sock)
+{
+	int ret;
+ 	struct inode *inode;
+
+ 	inode = ilookup_unhashed(sb, ino);
+ 	if (!inode)
+		return -ENOENT;
+
+	ret = -ENOTSOCK;
+ 	if (!S_ISSOCK(inode->i_mode))
+		goto out;
+
+	ret = -EINVAL;
+ 	*sock = SOCKET_I(inode);
+ 	if (!*sock)
+		goto out;
+
+	ret = 0;
+out:
+	iput(inode);
+ 	return ret;
+}
+
+/*
+ *  af_inet_cr_opt2attr: convert a socket option to a netlink attribute and push it
+ *  to the skbuff
+ *
+ * @skb : the skbuff to be filled
+ * @sock : the socket to retrieve options
+ * @optlevel : the level of the option
+ * @optattr : the correpondance between the option and the attribute
+ *
+ * Return 0 on sucess, < 0 otherwise
+ */
+int af_inet_cr_opt2attr(struct sk_buff *skb, const struct socket *sock, int optlevel,
+			const struct af_inet_cr_optattr *optattr)
+{
+	int attr = optattr->attr;
+	int optname = optattr->optname;
+	int type = af_inet_cr_policy[attr].type;
+	int optlen;
+	char *optbuf = NULL;
+	int optval;
+	int ret;
+
+	if (!(optattr->get_and_set & GET))
+                return 0;
+
+	switch (type) {
+	case NLA_UNSPEC:
+		optlen = af_inet_cr_policy[attr].len;
+		optbuf = kmalloc(optlen, GFP_KERNEL);
+		if (!optbuf)
+			return -ENOMEM;
+		ret = kernel_getsockopt((struct socket *)sock, optlevel,
+					optname, optbuf, &optlen);
+		if (ret)
+			goto out;
+		ret = nla_put(skb, attr, af_inet_cr_policy[attr].len, optbuf);
+		goto out;
+	case NLA_U32:
+		optlen = sizeof(optval);
+		ret = kernel_getsockopt((struct socket *)sock, optlevel,
+					optname, (void *)&optval, &optlen);
+		if (ret)
+			goto out;
+		ret = nla_put_u32(skb, attr, optval);
+		goto out;
+	case NLA_U8:
+		optlen = sizeof(optval);
+		ret = kernel_getsockopt((struct socket *)sock, optlevel,
+					optname, (void *)&optval, &optlen);
+		if (ret)
+			goto out;
+		ret = nla_put_u8(skb, attr, optval);
+		goto out;
+	case NLA_FLAG:
+		optlen = sizeof(optval);
+		ret = kernel_getsockopt((struct socket *)sock, optlevel,
+					optname, (void *)&optval, &optlen);
+		if (ret)
+			goto out;
+		if (optval)
+			ret = nla_put_flag(skb, attr);
+		goto out;
+	default:
+		ret = -EINVAL;
+                goto out;
+	};
+ out:
+	if (optbuf)
+		kfree(optbuf);
+        return ret;
+}
+
+/*
+ * af_inet_cr_opt2attr : convert a netlink attribute to a socket option
+ *  and set the option to the socket
+ *
+ * @info : the generic netlink message
+ * @sock : the socket to set options
+ * @optlevel : the level of the option
+ * @optattr : the correpondance between the option and the attribute
+ *
+ * Return 0 on sucess, < 0 otherwise
+ */
+int af_inet_cr_attr2opt(const struct genl_info *info, struct socket *sock,
+			int optlevel, const struct af_inet_cr_optattr *optattr)
+{
+        int optname = optattr->optname;
+        int attr = optattr->attr;
+	int ret, type = af_inet_cr_policy[attr].type;
+	struct nlattr *nla = info->attrs[attr];
+	char *optbuf;
+	int optlen;
+	int optval;
+
+	if (!(optattr->get_and_set & SET))
+		return 0;
+	if (!nla)
+                return 0;
+
+	switch (type) {
+        case NLA_FLAG:
+		optval = nla_get_flag(nla);
+		break;
+	case NLA_U8:
+		optval = nla_get_u8(nla);
+		break;
+        case NLA_U32:
+		optval = nla_get_u32(nla);
+		break;
+	case NLA_UNSPEC:
+		optlen = af_inet_cr_policy[attr].len;
+		optbuf = kmalloc(optlen, GFP_KERNEL);
+		if (!optbuf)
+			return -ENOMEM;
+		nla_memcpy(optbuf, nla, optlen);
+		ret = kernel_setsockopt(sock, optlevel, optname,
+					optbuf, optlen);
+		kfree(optbuf);
+		goto out;
+	default:
+		ret = -EINVAL;
+		goto out;
+        }
+
+	optlen = sizeof(optval);
+	ret = kernel_setsockopt(sock, optlevel, optname,
+				(void *)&optval, optlen);
+out:
+	return ret;
+}
+
+/*
+ * dump_sockopt_sndbuf : retrieve the userlock flag on the sndbuf option and
+ *  add it to the netlink message
+ *
+ * @sock : the socket to retrieve the userlock
+ * @skb : the skbuff containing the netlink message
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+static inline int dump_sockopt_sndbuf(struct socket *sock,
+				      struct sk_buff *skb)
+{
+	int ret;
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+		ret = nla_put_flag(skb, AF_INET_CR_ATTR_SOCKOPT_SNDBUF_ULOCK);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * dump_sockopt_rcvbuf : retrieve the userlock flag on the rcv option and
+ *  add it to the netlink message
+ *
+ * @sock : the socket to retrieve the userlock
+ * @skb : the skbuff containing the netlink message
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+static inline int dump_sockopt_rcvbuf(struct socket *sock,
+				      struct sk_buff *skb)
+{
+	int ret;
+	struct sock *sk
...

 
Read Message
Read Message
Read Message
Read Message
Read Message
Read Message
Previous Topic: Pid namespaces approaches testing results
Next Topic: Re: checkpointing and restoring processes
Goto Forum:
  


Current Time: Wed Sep 10 21:39:39 GMT 2025

Total time taken to generate the page: 0.10515 seconds