ipv6: fix memory leak with multiple tables during netns destruction
[pandora-kernel.git] / net / ipv6 / ip6_fib.c
index 93718f3..10135b1 100644 (file)
@@ -153,11 +153,23 @@ static __inline__ struct fib6_node * node_alloc(void)
        return fn;
 }
 
-static __inline__ void node_free(struct fib6_node * fn)
+static void node_free_immediate(struct fib6_node *fn)
 {
        kmem_cache_free(fib6_node_kmem, fn);
 }
 
+static void node_free_rcu(struct rcu_head *head)
+{
+       struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
+
+       kmem_cache_free(fib6_node_kmem, fn);
+}
+
+static void node_free(struct fib6_node *fn)
+{
+       call_rcu(&fn->rcu, node_free_rcu);
+}
+
 static __inline__ void rt6_release(struct rt6_info *rt)
 {
        if (atomic_dec_and_test(&rt->rt6i_ref))
@@ -529,9 +541,9 @@ insert_above:
 
                if (in == NULL || ln == NULL) {
                        if (in)
-                               node_free(in);
+                               node_free_immediate(in);
                        if (ln)
-                               node_free(ln);
+                               node_free_immediate(ln);
                        return NULL;
                }
 
@@ -658,7 +670,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
        rt->dst.rt6_next = iter;
        *ins = rt;
-       rt->rt6i_node = fn;
+       rcu_assign_pointer(rt->rt6i_node, fn);
        atomic_inc(&rt->rt6i_ref);
        inet6_rt_notify(RTM_NEWROUTE, rt, info);
        info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -743,7 +755,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
                                   root, and then (in st_failure) stale node
                                   in main tree.
                                 */
-                               node_free(sfn);
+                               node_free_immediate(sfn);
                                goto st_failure;
                        }
 
@@ -862,14 +874,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 
                        if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
 #ifdef CONFIG_IPV6_SUBTREES
-                               if (fn->subtree)
-                                       fn = fib6_lookup_1(fn->subtree, args + 1);
+                               if (fn->subtree) {
+                                       struct fib6_node *sfn;
+                                       sfn = fib6_lookup_1(fn->subtree,
+                                                           args + 1);
+                                       if (!sfn)
+                                               goto backtrack;
+                                       fn = sfn;
+                               }
 #endif
-                               if (!fn || fn->fn_flags & RTN_RTINFO)
+                               if (fn->fn_flags & RTN_RTINFO)
                                        return fn;
                        }
                }
-
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
                if (fn->fn_flags & RTN_ROOT)
                        break;
 
@@ -1161,8 +1181,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 
 int fib6_del(struct rt6_info *rt, struct nl_info *info)
 {
+       struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+                                   lockdep_is_held(&rt->rt6i_table->tb6_lock));
        struct net *net = info->nl_net;
-       struct fib6_node *fn = rt->rt6i_node;
        struct rt6_info **rtp;
 
 #if RT6_DEBUG >= 2
@@ -1268,9 +1289,9 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
                        if (w->leaf && fn->fn_flags&RTN_RTINFO) {
                                int err;
 
-                               if (w->count < w->skip) {
-                                       w->count++;
-                                       continue;
+                               if (w->skip) {
+                                       w->skip--;
+                                       goto skip;
                                }
 
                                err = w->func(w);
@@ -1280,6 +1301,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
                                w->count++;
                                continue;
                        }
+skip:
                        w->state = FWS_U;
                case FWS_U:
                        if (fn == w->root)
@@ -1339,7 +1361,10 @@ static int fib6_clean_node(struct fib6_walker_t *w)
                        res = fib6_del(rt, &info);
                        if (res) {
 #if RT6_DEBUG >= 2
-                               printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+                               pr_debug("%s: del failed: rt=%p@%p err=%d\n",
+                                        __func__, rt,
+                                        rcu_access_pointer(rt->rt6i_node),
+                                        res);
 #endif
                                continue;
                        }
@@ -1464,27 +1489,28 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
-void fib6_run_gc(unsigned long expires, struct net *net)
+void fib6_run_gc(unsigned long expires, struct net *net, bool force)
 {
-       if (expires != ~0UL) {
+       unsigned long now;
+
+       if (force) {
                spin_lock_bh(&fib6_gc_lock);
-               gc_args.timeout = expires ? (int)expires :
-                       net->ipv6.sysctl.ip6_rt_gc_interval;
-       } else {
-               if (!spin_trylock_bh(&fib6_gc_lock)) {
-                       mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
-                       return;
-               }
-               gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
+       } else if (!spin_trylock_bh(&fib6_gc_lock)) {
+               mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+               return;
        }
+       gc_args.timeout = expires ? (int)expires :
+                         net->ipv6.sysctl.ip6_rt_gc_interval;
 
        gc_args.more = icmp6_dst_gc();
 
        fib6_clean_all(net, fib6_age, 0, NULL);
+       now = jiffies;
+       net->ipv6.ip6_rt_last_gc = now;
 
        if (gc_args.more)
                mod_timer(&net->ipv6.ip6_fib_timer,
-                         round_jiffies(jiffies
+                         round_jiffies(now
                                        + net->ipv6.sysctl.ip6_rt_gc_interval));
        else
                del_timer(&net->ipv6.ip6_fib_timer);
@@ -1493,7 +1519,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 
 static void fib6_gc_timer_cb(unsigned long arg)
 {
-       fib6_run_gc(0, (struct net *)arg);
+       fib6_run_gc(0, (struct net *)arg, true);
 }
 
 static int __net_init fib6_net_init(struct net *net)
@@ -1551,13 +1577,22 @@ out_timer:
 
 static void fib6_net_exit(struct net *net)
 {
+       unsigned int i;
+
        rt6_ifdown(net, NULL);
        del_timer_sync(&net->ipv6.ip6_fib_timer);
 
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-       kfree(net->ipv6.fib6_local_tbl);
-#endif
-       kfree(net->ipv6.fib6_main_tbl);
+       for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+               struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+               struct hlist_node *node, *tmp;
+               struct fib6_table *tb;
+
+               hlist_for_each_entry_safe(tb, node, tmp, head, tb6_hlist) {
+                       hlist_del(&tb->tb6_hlist);
+                       kfree(tb);
+               }
+       }
+
        kfree(net->ipv6.fib_table_hash);
        kfree(net->ipv6.rt6_stats);
 }