This patch make processing netlink user -> kernel messages synchronious.
This change was inspired by the talk with Alexey Kuznetsov about current
netlink messages processing. He says that he was badly wrong when introduced
asynchronious user -> kernel communication.
The call netlink_unicast is the only path to send message to the kernel
netlink socket. But, unfortunately, it is also used to send data to the
user.
Before this change the user message has been attached to the socket queue
and sk->sk_data_ready was called. The process has been blocked until all
pending messages were processed. The bad thing is that this processing
may occur in the arbitrary process context.
This patch changes nlk->data_ready callback to get 1 skb and force packet
processing right in the netlink_unicast.
Kernel -> user path in netlink_unicast remains untouched.
EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock
drop, but the process remains in the cycle until the message will be fully
processed. So, there is no need to use this kludges now.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
--- ./drivers/connector/connector.c.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./drivers/connector/connector.c 2007-10-05 16:33:04.000000000 +0400
@@ -235,18 +235,6 @@ out:
}
/*
- * Netlink socket input callback - dequeues the skbs and calls the
- * main netlink receiving function.
- */
-static void cn_input(struct sock *sk, int len)
-{
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
- cn_rx_skb(skb);
-}
-
-/*
* Notification routing.
*
* Gets id and checks if there are notification request for it's idx
@@ -442,7 +430,7 @@ static int __devinit cn_init(void)
struct cn_dev *dev = &cdev;
int err;
- dev->input = cn_input;
+ dev->input = cn_rx_skb;
dev->id.idx = cn_idx;
dev->id.val = cn_val;
--- ./drivers/scsi/scsi_netlink.c.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./drivers/scsi/scsi_netlink.c 2007-10-05 16:49:45.000000000 +0400
@@ -64,7 +64,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) {
err = -EBADMSG;
- goto next_msg;
+ return;
}
hdr = NLMSG_DATA(nlh);
@@ -99,27 +99,6 @@ next_msg:
/**
- * scsi_nl_rcv_msg -
- * Receive handler for a socket. Extracts a received message buffer from
- * the socket, and starts message processing.
- *
- * @sk: socket
- * @len: unused
- *
- **/
-static void
-scsi_nl_rcv(struct sock *sk, int len)
-{
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
- scsi_nl_rcv_msg(skb);
- kfree_skb(skb);
- }
-}
-
-
-/**
* scsi_nl_rcv_event -
* Event handler for a netlink socket.
*
@@ -168,7 +147,7 @@ scsi_netlink_init(void)
}
scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
- SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+ SCSI_NL_GRP_CNT, scsi_nl_rcv_msg, NULL,
THIS_MODULE);
if (!scsi_nl_sock) {
printk(KERN_ERR "%s: register of recieve handler failed\n",
--- ./drivers/scsi/scsi_transport_iscsi.c.nlk6 2007-10-05 17:51:40.000000000 +0400
+++ ./drivers/scsi/scsi_transport_iscsi.c 2007-10-05 17:53:08.000000000 +0400
@@ -1097,61 +1097,49 @@ iscsi_if_recv_msg(struct sk_buff *skb, s
}
/*
- * Get message from skb (based on rtnetlink_rcv_skb). Each message is
- * processed by iscsi_if_recv_msg. Malformed skbs with wrong lengths or
- * invalid creds are discarded silently.
+ * Get message from skb. Each message is processed by iscsi_if_recv_msg.
+ * Malformed skbs with wrong lengths or invalid creds are not processed.
*/
static void
-iscsi_if_rx(struct sock *sk, int len)
+iscsi_if_rx(struct sk_buff *skb)
{
- struct sk_buff *skb;
-
mutex_lock(&rx_queue_mutex);
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- if (NETLINK_CREDS(skb)->uid) {
- skb_pull(skb, skb->len);
- goto free_skb;
+ while (skb->len >= NLMSG_SPACE(0)) {
+ int err;
+ uint32_t rlen;
+ struct nlmsghdr *nlh;
+ struct iscsi_uevent *ev;
+
+ nlh = nlmsg_hdr(skb);
+ if (nlh->nlmsg_len < sizeof(*nlh) ||
+ skb->len < nlh->nlmsg_len) {
+ break;
}
- while (skb->len >= NLMSG_SPACE(0)) {
- int err;
- uint32_t rlen;
- struct nlmsghdr *nlh;
- struct iscsi_uevent *ev;
-
- nlh = nlmsg_hdr(skb);
- if (nlh->nlmsg_len < sizeof(*nlh) ||
- skb->len < nlh->nlmsg_len) {
- break;
- }
-
- ev = NLMSG_DATA(nlh);
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
-
- err = iscsi_if_recv_msg(skb, nlh);
- if (err) {
- ev->type = ISCSI_KEVENT_IF_ERROR;
- ev->iferror = err;
- }
- do {
- /*
- * special case for GET_STATS:
- * on success - sending reply and stats from
- * inside of if_recv_msg(),
- * on error - fall through.
- */
- if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
- break;
- err = iscsi_if_send_reply(
- NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
- nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
- } while (err < 0 && err != -ECONNREFUSED);
- skb_pull(skb, rlen);
+ ev = NLMSG_DATA(nlh);
+ rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (rlen > skb->len)
+ rlen = skb->len;
+
+ err = iscsi_if_recv_msg(skb, nlh);
+ if (err) {
+ ev->type = ISCSI_KEVENT_IF_ERROR;
+ ev->iferror = err;
}
-free_skb:
- kfree_skb(skb);
+ do {
+ /*
+ * special case for GET_STATS:
+ * on success - sending reply and stats from
+ * inside of if_recv_msg(),
+ * on error - fall through.
+ */
+ if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
+ break;
+ err = iscsi_if_send_reply(
+ NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
+ nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
+ } while (err < 0 && err != -ECONNREFUSED);
+ skb_pull(skb, rlen);
}
mutex_unlock(&rx_queue_mutex);
}
--- ./fs/ecryptfs/netlink.c.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./fs/ecryptfs/netlink.c 2007-10-05 16:33:04.000000000 +0400
@@ -165,22 +165,10 @@ static int ecryptfs_process_nl_quit(stru
* it to its desired netlink context element and wake up the process
* that is waiting for a response.
*/
-static void ecryptfs_receive_nl_message(struct sock *sk, int len)
+static void ecryptfs_receive_nl_message(struct sk_buff *skb)
{
- struct sk_buff *skb;
struct nlmsghdr *nlh;
- int rc = 0; /* skb_recv_datagram requires this */
-receive:
- skb = skb_recv_datagram(sk, 0, 0, &rc);
- if (rc == -EINTR)
- goto receive;
- else if (rc < 0) {
- ecryptfs_printk(KERN_ERR, "Error occurred while "
- "receiving eCryptfs netlink message; "
- "rc = [%d]\n", rc);
- return;
- }
nlh = nlmsg_hdr(skb);
if (!NLMSG_OK(nlh, skb->len)) {
ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
--- ./include/linux/connector.h.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./include/linux/connector.h 2007-10-05 16:33:04.000000000 +0400
@@ -153,7 +153,7 @@ struct cn_dev {
u32 seq, groups;
struct sock *nls;
- void (*input) (struct sock * sk, int len);
+ void (*input) (struct sk_buff *skb);
struct cn_queue_dev *cbdev;
};
--- ./include/linux/netlink.h.nlk6 2007-10-05 16:28:17.000000000 +0400
+++ ./include/linux/netlink.h 2007-10-05 16:33:04.000000000 +0400
@@ -175,7 +175,7 @@ struct netlink_skb_parms
extern struct sock *netlink_kernel_create(struct net *net,
int unit,unsigned int groups,
- void (*input)(struct sock *sk, int len),
+ void (*input)(struct sk_buff *skb),
struct mutex *cb_mutex,
struct module *module);
extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
--- ./include/net/netlink.h.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./include/net/netlink.h 2007-10-05 16:33:04.000000000 +0400
@@ -220,9 +220,9 @@ struct nl_info {
u32 pid;
};
-extern unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen,
- int (*cb)(struct sk_buff *,
- struct nlmsghdr *));
+extern int netlink_rcv_skb(struct sk_buff *skb,
+ int (*cb)(struct sk_buff *,
+ struct nlmsghdr *));
extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb,
u32 pid, unsigned int group, int report,
gfp_t flags);
--- ./kernel/audit.c.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./kernel/audit.c 2007-10-05 16:50:01.000000000 +0400
@@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_
}
/* Receive messages from netlink socket. */
-static void audit_receive(struct sock *sk, int length)
+static void audit_receive(struct sk_buff *skb)
{
- struct sk_buff *skb;
- unsigned int qlen;
-
mutex_lock(&audit_cmd_mutex);
-
- for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- audit_receive_skb(skb);
- kfree_skb(skb);
- }
+ audit_receive_skb(skb);
mutex_unlock(&audit_cmd_mutex);
}
--- ./net/core/rtnetlink.c.nlk6 2007-10-05 16:28:09.000000000 +0400
+++ ./net/core/rtnetlink.c 2007-10-05 16:50:29.000000000 +0400
@@ -1312,15 +1312,11 @@ static int rtnetlink_rcv_msg(struct sk_b
return doit(skb, nlh, (void *)&rta_buf[0]);
}
-static void rtnetlink_rcv(struct sock *sk, int len)
+static void rtnetlink_rcv(struct sk_buff *skb)
{
- unsigned int qlen = 0;
-
- do {
- rtnl_lock();
- qlen = netlink_run_queue(sk, qlen, &rtnetlink_rcv_msg);
- rtnl_unlock();
- } while (qlen);
+ rtnl_lock();
+ netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
+ rtnl_unlock();
}
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
--- ./net/decnet/netfilter/dn_rtmsg.c.nlk6 2007-10-05 16:27:16.000000000 +0400
+++ ./net/decnet/netfilter/dn_rtmsg.c 2007-10-05 16:50:45.000000000 +0400
@@ -115,17 +115,6 @@ static
...