Make FIBs per-namespace and adds additional key (net namespace) to lookups in
routing cache.
Signed-off-by: Dmitry Mishin <dim@openvz.org>
---
include/linux/net_namespace.h | 12 ++++
include/net/flow.h | 3 +
include/net/ip_fib.h | 48 +++++++++++++---
net/core/fib_rules.c | 43 ++++++++++++--
net/core/net_namespace.c | 9 +++
net/ipv4/fib_frontend.c | 121 +++++++++++++++++++++++++++++++++++-------
net/ipv4/fib_hash.c | 13 +++-
net/ipv4/fib_rules.c | 86 +++++++++++++++++++++++++----
net/ipv4/fib_semantics.c | 100 +++++++++++++++++++++++-----------
net/ipv4/fib_trie.c | 18 ++++--
net/ipv4/route.c | 29 +++++++++-
11 files changed, 391 insertions(+), 91 deletions(-)
--- linux-2.6.19-rc6-mm2.orig/include/linux/net_namespace.h
+++ linux-2.6.19-rc6-mm2/include/linux/net_namespace.h
@@ -11,6 +11,18 @@ struct net_namespace {
struct net_device *dev_base_p, **dev_tail_p;
struct net_device *loopback_dev_p;
struct pcpu_lstats *pcpu_lstats_p;
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+ struct fib_table *fib4_local_table, *fib4_main_table;
+#else
+ struct list_head fib_rules_ops_list;
+ struct fib_rules_ops *fib4_rules_ops;
+#endif
+ struct hlist_head *fib4_tables;
+ struct hlist_head *fib4_hash, *fib4_laddrhash;
+ unsigned fib4_hash_size, fib4_info_cnt;
+#ifdef CONFIG_IP_FIB_TRIE
+ int fib4_trie_last_dflt;
+#endif
unsigned int hash;
};
--- linux-2.6.19-rc6-mm2.orig/include/net/flow.h
+++ linux-2.6.19-rc6-mm2/include/net/flow.h
@@ -82,6 +82,9 @@ struct flowi {
#define fl_mh_type uli_u.mht.type
#endif
__u32 secid; /* used by xfrm; see secid.txt */
+#ifdef CONFIG_NET_NS
+ struct net_namespace *net_ns;
+#endif
} __attribute__((__aligned__(BITS_PER_LONG/8)));
#define FLOW_DIR_IN 0
--- linux-2.6.19-rc6-mm2.orig/include/net/ip_fib.h
+++ linux-2.6.19-rc6-mm2/include/net/ip_fib.h
@@ -18,6 +18,7 @@
#include <net/flow.h>
#include <linux/seq_file.h>
+#include <linux/net_namespace.h>
#include <net/fib_rules.h>
struct fib_config {
@@ -171,14 +172,21 @@ struct fib_table {
#ifndef CONFIG_IP_MULTIPLE_TABLES
-extern struct fib_table *ip_fib_local_table;
-extern struct fib_table *ip_fib_main_table;
+#ifndef CONFIG_NET_NS
+extern struct fib_table *ip_fib_local_table_static;
+extern struct fib_table *ip_fib_main_table_static;
+#define ip_fib_local_table_ns() ip_fib_local_table_static
+#define ip_fib_main_table_ns() ip_fib_main_table_static
+#else
+#define ip_fib_local_table_ns() (current_net_ns->fib4_local_table)
+#define ip_fib_main_table_ns() (current_net_ns->fib4_main_table)
+#endif
static inline struct fib_table *fib_get_table(u32 id)
{
if (id != RT_TABLE_LOCAL)
- return ip_fib_main_table;
- return ip_fib_local_table;
+ return ip_fib_main_table_ns();
+ return ip_fib_local_table_ns();
}
static inline struct fib_table *fib_new_table(u32 id)
@@ -188,21 +196,29 @@ static inline struct fib_table *fib_new_
static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
{
- if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) &&
- ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res))
+ struct fib_table *tb;
+
+ tb = ip_fib_local_table_ns();
+ if (!tb->tb_lookup(tb, flp, res))
+ return 0;
+ tb = ip_fib_main_table_ns();
+ if (tb->tb_lookup(tb, flp, res))
return -ENETUNREACH;
return 0;
}
static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
{
+ struct fib_table *tb;
+
+ tb = ip_fib_main_table_ns();
if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
- ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res);
+ tb->tb_select_default(ip_fib_main_table_ns(), flp, res);
}
#else /* CONFIG_IP_MULTIPLE_TABLES */
-#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
-#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
+#define ip_fib_local_table_ns() fib_get_table(RT_TABLE_LOCAL)
+#define ip_fib_main_table_ns() fib_get_table(RT_TABLE_MAIN)
extern int fib_lookup(struct flowi *flp, struct fib_result *res);
@@ -215,6 +231,10 @@ extern void fib_select_default(const str
/* Exported by fib_frontend.c */
extern struct nla_policy rtm_ipv4_policy[];
extern void ip_fib_init(void);
+#ifdef CONFIG_NET_NS
+extern int ip_fib_struct_init(struct net_namespace *);
+extern void ip_fib_struct_cleanup(void);
+#endif
extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
@@ -230,6 +250,9 @@ extern int ip_fib_check_default(__be32 g
extern int fib_sync_down(__be32 local, struct net_device *dev, int force);
extern int fib_sync_up(struct net_device *dev);
extern __be32 __fib_res_prefsrc(struct fib_result *res);
+#ifdef CONFIG_NET_NS
+extern void fib_hashtable_destroy(void);
+#endif
/* Exported by fib_hash.c */
extern struct fib_table *fib_hash_init(u32 id);
@@ -237,7 +260,12 @@ extern struct fib_table *fib_hash_init(u
#ifdef CONFIG_IP_MULTIPLE_TABLES
extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
-extern void __init fib4_rules_init(void);
+#ifdef CONFIG_NET_NS
+extern int fib4_rules_init(struct net_namespace *);
+extern void fib4_rules_cleanup(void);
+#else
+extern int fib4_rules_init(void);
+#endif
#ifdef CONFIG_NET_CLS_ROUTE
extern u32 fib_rules_tclass(struct fib_result *res);
--- linux-2.6.19-rc6-mm2.orig/net/core/fib_rules.c
+++ linux-2.6.19-rc6-mm2/net/core/fib_rules.c
@@ -11,9 +11,15 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/net_ns.h>
#include <net/fib_rules.h>
-static LIST_HEAD(rules_ops);
+#ifndef CONFIG_NET_NS
+static struct list_head rules_ops_static;
+#define rules_ops_ns() rules_ops_static
+#else
+#define rules_ops_ns() (current_net_ns->fib_rules_ops_list)
+#endif
static DEFINE_SPINLOCK(rules_mod_lock);
static void notify_rule_change(int event, struct fib_rule *rule,
@@ -22,10 +28,12 @@ static void notify_rule_change(int event
static struct fib_rules_ops *lookup_rules_ops(int family)
{
+ struct list_head *ops_list;
struct fib_rules_ops *ops;
+ ops_list = &rules_ops_ns();
rcu_read_lock();
- list_for_each_entry_rcu(ops, &rules_ops, list) {
+ list_for_each_entry_rcu(ops, ops_list, list) {
if (ops->family == family) {
if (!try_module_get(ops->owner))
ops = NULL;
@@ -47,6 +55,7 @@ static void rules_ops_put(struct fib_rul
int fib_rules_register(struct fib_rules_ops *ops)
{
int err = -EEXIST;
+ struct list_head *ops_list;
struct fib_rules_ops *o;
if (ops->rule_size < sizeof(struct fib_rule))
@@ -57,12 +66,13 @@ int fib_rules_register(struct fib_rules_
ops->action == NULL)
return -EINVAL;
+ ops_list = &rules_ops_ns();
spin_lock(&rules_mod_lock);
- list_for_each_entry(o, &rules_ops, list)
+ list_for_each_entry(o, ops_list, list)
if (ops->family == o->family)
goto errout;
- list_add_tail_rcu(&ops->list, &rules_ops);
+ list_add_tail_rcu(&ops->list, ops_list);
err = 0;
errout:
spin_unlock(&rules_mod_lock);
@@ -85,10 +95,12 @@ static void cleanup_ops(struct fib_rules
int fib_rules_unregister(struct fib_rules_ops *ops)
{
int err = 0;
+ struct list_head *ops_list;
struct fib_rules_ops *o;
+ ops_list = &rules_ops_ns();
spin_lock(&rules_mod_lock);
- list_for_each_entry(o, &rules_ops, list) {
+ list_for_each_entry(o, ops_list, list) {
if (o == ops) {
list_del_rcu(&o->list);
cleanup_ops(ops);
@@ -131,6 +143,14 @@ int fib_rules_lookup(struct fib_rules_op
rcu_read_lock();
+ err = -EINVAL;
+ if (ops->rules_list->next == NULL) {
+ if (net_ratelimit())
+ printk(" *** NULL head, ops %p, list %p\n",
+ ops, ops->rules_list);
+ goto out;
+ }
+
list_for_each_entry_rcu(rule, ops->rules_list, list) {
if (!fib_rule_match(rule, ops, fl, flags))
continue;
@@ -141,6 +161,12 @@ int fib_rules_lookup(struct fib_rules_op
arg->rule = rule;
goto out;
}
+ if (rule->list.next == NULL) {
+ if (net_ratelimit())
+ printk(" *** NULL, ops %p, list %p, item %p\n",
+ ops, ops->rules_list, rule);
+ goto out;
+ }
}
err = -ENETUNREACH;
@@ -439,19 +465,21 @@ static int fib_rules_event(struct notifi
void *ptr)
{
struct net_device *dev = ptr;
+ struct list_head *ops_list;
struct fib_rules_ops *ops;
ASSERT_RTNL();
rcu_read_lock();
+ ops_list = &rules_ops_ns();
switch (event) {
case NETDEV_REGISTER:
- list_for_each_entry(ops, &rules_ops, list)
+ list_for_each_entry(ops, ops_list, list)
attach_rules(ops->rules_list, dev);
break;
case NETDEV_UNREGISTER:
- list_for_each_entry(ops, &rules_ops, list)
+ list_for_each_entry(ops, ops_list, list)
detach_rules(ops->rules_list, dev);
break;
}
@@ -467,6 +495,7 @@ static struct notifier_block fib_rules_n
static int __init fib_rules_init(void)
{
+ INIT_LIST_HEAD(&rules_ops_ns());
return register_netdevice_notifier(&fib_rules_notifier);
}
--- linux-2.6.19-rc6-mm2.orig/net/core/net_namespace.c
+++ linux-2.6.19-rc6-mm2/net/core/net_namespace.c
@@ -11,6 +11,7 @@
#include <linux/net_namespace.h>
#include <linux/net.h>
#include <linux/netdevice.h>
+#include <net/ip_fib.h>
struct net_namespace init_net_ns = {
.kref = {
@@ -44,11 +45,18 @@ static struct net_namespace *clone_net_n
ns->dev_tail_p = &ns->dev_base_p;
ns->hash = net_random();
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ INIT_LIST_HEAD(&ns->fib_rules_ops_list);
+#endif
+ if (ip_fib_struct_init(ns))
+ goto out_fib4;
if (loopback_init(ns))
g
...