7aafaeddfcbffcfc5dcf6bc444600e7f1b429a08
[pandora-kernel.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->output = neigh_blackhole;
242                                 if (n->nud_state & NUD_VALID)
243                                         n->nud_state = NUD_NOARP;
244                                 else
245                                         n->nud_state = NUD_NONE;
246                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
247                         }
248                         write_unlock(&n->lock);
249                         neigh_cleanup_and_release(n);
250                 }
251         }
252 }
253
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256         write_lock_bh(&tbl->lock);
257         neigh_flush_dev(tbl, dev);
258         write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264         write_lock_bh(&tbl->lock);
265         neigh_flush_dev(tbl, dev);
266         pneigh_ifdown(tbl, dev);
267         write_unlock_bh(&tbl->lock);
268
269         del_timer_sync(&tbl->proxy_timer);
270         pneigh_queue_purge(&tbl->proxy_queue);
271         return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277         struct neighbour *n = NULL;
278         unsigned long now = jiffies;
279         int entries;
280
281         entries = atomic_inc_return(&tbl->entries) - 1;
282         if (entries >= tbl->gc_thresh3 ||
283             (entries >= tbl->gc_thresh2 &&
284              time_after(now, tbl->last_flush + 5 * HZ))) {
285                 if (!neigh_forced_gc(tbl) &&
286                     entries >= tbl->gc_thresh3)
287                         goto out_entries;
288         }
289
290         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291         if (!n)
292                 goto out_entries;
293
294         skb_queue_head_init(&n->arp_queue);
295         rwlock_init(&n->lock);
296         seqlock_init(&n->ha_lock);
297         n->updated        = n->used = now;
298         n->nud_state      = NUD_NONE;
299         n->output         = neigh_blackhole;
300         seqlock_init(&n->hh.hh_lock);
301         n->parms          = neigh_parms_clone(&tbl->parms);
302         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303
304         NEIGH_CACHE_STAT_INC(tbl, allocs);
305         n->tbl            = tbl;
306         atomic_set(&n->refcnt, 1);
307         n->dead           = 1;
308 out:
309         return n;
310
311 out_entries:
312         atomic_dec(&tbl->entries);
313         goto out;
314 }
315
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
317 {
318         size_t size = (1 << shift) * sizeof(struct neighbour *);
319         struct neigh_hash_table *ret;
320         struct neighbour __rcu **buckets;
321
322         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323         if (!ret)
324                 return NULL;
325         if (size <= PAGE_SIZE)
326                 buckets = kzalloc(size, GFP_ATOMIC);
327         else
328                 buckets = (struct neighbour __rcu **)
329                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330                                            get_order(size));
331         if (!buckets) {
332                 kfree(ret);
333                 return NULL;
334         }
335         ret->hash_buckets = buckets;
336         ret->hash_shift = shift;
337         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338         ret->hash_rnd |= 1;
339         return ret;
340 }
341
342 static void neigh_hash_free_rcu(struct rcu_head *head)
343 {
344         struct neigh_hash_table *nht = container_of(head,
345                                                     struct neigh_hash_table,
346                                                     rcu);
347         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
348         struct neighbour __rcu **buckets = nht->hash_buckets;
349
350         if (size <= PAGE_SIZE)
351                 kfree(buckets);
352         else
353                 free_pages((unsigned long)buckets, get_order(size));
354         kfree(nht);
355 }
356
357 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
358                                                 unsigned long new_shift)
359 {
360         unsigned int i, hash;
361         struct neigh_hash_table *new_nht, *old_nht;
362
363         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
364
365         old_nht = rcu_dereference_protected(tbl->nht,
366                                             lockdep_is_held(&tbl->lock));
367         new_nht = neigh_hash_alloc(new_shift);
368         if (!new_nht)
369                 return old_nht;
370
371         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
372                 struct neighbour *n, *next;
373
374                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375                                                    lockdep_is_held(&tbl->lock));
376                      n != NULL;
377                      n = next) {
378                         hash = tbl->hash(n->primary_key, n->dev,
379                                          new_nht->hash_rnd);
380
381                         hash >>= (32 - new_nht->hash_shift);
382                         next = rcu_dereference_protected(n->next,
383                                                 lockdep_is_held(&tbl->lock));
384
385                         rcu_assign_pointer(n->next,
386                                            rcu_dereference_protected(
387                                                 new_nht->hash_buckets[hash],
388                                                 lockdep_is_held(&tbl->lock)));
389                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390                 }
391         }
392
393         rcu_assign_pointer(tbl->nht, new_nht);
394         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395         return new_nht;
396 }
397
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399                                struct net_device *dev)
400 {
401         struct neighbour *n;
402         int key_len = tbl->key_len;
403         u32 hash_val;
404         struct neigh_hash_table *nht;
405
406         NEIGH_CACHE_STAT_INC(tbl, lookups);
407
408         rcu_read_lock_bh();
409         nht = rcu_dereference_bh(tbl->nht);
410         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
411
412         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413              n != NULL;
414              n = rcu_dereference_bh(n->next)) {
415                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416                         if (!atomic_inc_not_zero(&n->refcnt))
417                                 n = NULL;
418                         NEIGH_CACHE_STAT_INC(tbl, hits);
419                         break;
420                 }
421         }
422
423         rcu_read_unlock_bh();
424         return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429                                      const void *pkey)
430 {
431         struct neighbour *n;
432         int key_len = tbl->key_len;
433         u32 hash_val;
434         struct neigh_hash_table *nht;
435
436         NEIGH_CACHE_STAT_INC(tbl, lookups);
437
438         rcu_read_lock_bh();
439         nht = rcu_dereference_bh(tbl->nht);
440         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
441
442         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443              n != NULL;
444              n = rcu_dereference_bh(n->next)) {
445                 if (!memcmp(n->primary_key, pkey, key_len) &&
446                     net_eq(dev_net(n->dev), net)) {
447                         if (!atomic_inc_not_zero(&n->refcnt))
448                                 n = NULL;
449                         NEIGH_CACHE_STAT_INC(tbl, hits);
450                         break;
451                 }
452         }
453
454         rcu_read_unlock_bh();
455         return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         u32 hash_val;
463         int key_len = tbl->key_len;
464         int error;
465         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466         struct neigh_hash_table *nht;
467
468         if (!n) {
469                 rc = ERR_PTR(-ENOBUFS);
470                 goto out;
471         }
472
473         memcpy(n->primary_key, pkey, key_len);
474         n->dev = dev;
475         dev_hold(dev);
476
477         /* Protocol specific setup. */
478         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
479                 rc = ERR_PTR(error);
480                 goto out_neigh_release;
481         }
482
483         /* Device specific setup. */
484         if (n->parms->neigh_setup &&
485             (error = n->parms->neigh_setup(n)) < 0) {
486                 rc = ERR_PTR(error);
487                 goto out_neigh_release;
488         }
489
490         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491
492         write_lock_bh(&tbl->lock);
493         nht = rcu_dereference_protected(tbl->nht,
494                                         lockdep_is_held(&tbl->lock));
495
496         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
497                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
498
499         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
500
501         if (n->parms->dead) {
502                 rc = ERR_PTR(-EINVAL);
503                 goto out_tbl_unlock;
504         }
505
506         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507                                             lockdep_is_held(&tbl->lock));
508              n1 != NULL;
509              n1 = rcu_dereference_protected(n1->next,
510                         lockdep_is_held(&tbl->lock))) {
511                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512                         neigh_hold(n1);
513                         rc = n1;
514                         goto out_tbl_unlock;
515                 }
516         }
517
518         n->dead = 0;
519         neigh_hold(n);
520         rcu_assign_pointer(n->next,
521                            rcu_dereference_protected(nht->hash_buckets[hash_val],
522                                                      lockdep_is_held(&tbl->lock)));
523         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524         write_unlock_bh(&tbl->lock);
525         NEIGH_PRINTK2("neigh %p is created.\n", n);
526         rc = n;
527 out:
528         return rc;
529 out_tbl_unlock:
530         write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532         neigh_release(n);
533         goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539         u32 hash_val = *(u32 *)(pkey + key_len - 4);
540         hash_val ^= (hash_val >> 16);
541         hash_val ^= hash_val >> 8;
542         hash_val ^= hash_val >> 4;
543         hash_val &= PNEIGH_HASHMASK;
544         return hash_val;
545 }
546
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548                                               struct net *net,
549                                               const void *pkey,
550                                               int key_len,
551                                               struct net_device *dev)
552 {
553         while (n) {
554                 if (!memcmp(n->key, pkey, key_len) &&
555                     net_eq(pneigh_net(n), net) &&
556                     (n->dev == dev || !n->dev))
557                         return n;
558                 n = n->next;
559         }
560         return NULL;
561 }
562
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564                 struct net *net, const void *pkey, struct net_device *dev)
565 {
566         int key_len = tbl->key_len;
567         u32 hash_val = pneigh_hash(pkey, key_len);
568
569         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570                                  net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575                                     struct net *net, const void *pkey,
576                                     struct net_device *dev, int creat)
577 {
578         struct pneigh_entry *n;
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         read_lock_bh(&tbl->lock);
583         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584                               net, pkey, key_len, dev);
585         read_unlock_bh(&tbl->lock);
586
587         if (n || !creat)
588                 goto out;
589
590         ASSERT_RTNL();
591
592         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593         if (!n)
594                 goto out;
595
596         write_pnet(&n->net, hold_net(net));
597         memcpy(n->key, pkey, key_len);
598         n->dev = dev;
599         if (dev)
600                 dev_hold(dev);
601
602         if (tbl->pconstructor && tbl->pconstructor(n)) {
603                 if (dev)
604                         dev_put(dev);
605                 release_net(net);
606                 kfree(n);
607                 n = NULL;
608                 goto out;
609         }
610
611         write_lock_bh(&tbl->lock);
612         n->next = tbl->phash_buckets[hash_val];
613         tbl->phash_buckets[hash_val] = n;
614         write_unlock_bh(&tbl->lock);
615 out:
616         return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619
620
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622                   struct net_device *dev)
623 {
624         struct pneigh_entry *n, **np;
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         write_lock_bh(&tbl->lock);
629         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630              np = &n->next) {
631                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632                     net_eq(pneigh_net(n), net)) {
633                         *np = n->next;
634                         write_unlock_bh(&tbl->lock);
635                         if (tbl->pdestructor)
636                                 tbl->pdestructor(n);
637                         if (n->dev)
638                                 dev_put(n->dev);
639                         release_net(pneigh_net(n));
640                         kfree(n);
641                         return 0;
642                 }
643         }
644         write_unlock_bh(&tbl->lock);
645         return -ENOENT;
646 }
647
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         u32 h;
652
653         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654                 np = &tbl->phash_buckets[h];
655                 while ((n = *np) != NULL) {
656                         if (!dev || n->dev == dev) {
657                                 *np = n->next;
658                                 if (tbl->pdestructor)
659                                         tbl->pdestructor(n);
660                                 if (n->dev)
661                                         dev_put(n->dev);
662                                 release_net(pneigh_net(n));
663                                 kfree(n);
664                                 continue;
665                         }
666                         np = &n->next;
667                 }
668         }
669         return -ENOENT;
670 }
671
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676         if (atomic_dec_and_test(&parms->refcnt))
677                 neigh_parms_destroy(parms);
678 }
679
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *      neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
693
694         if (!neigh->dead) {
695                 printk(KERN_WARNING
696                        "Destroying alive neighbour %p\n", neigh);
697                 dump_stack();
698                 return;
699         }
700
701         if (neigh_del_timer(neigh))
702                 printk(KERN_WARNING "Impossible event.\n");
703
704         skb_queue_purge(&neigh->arp_queue);
705
706         dev_put(neigh->dev);
707         neigh_parms_put(neigh->parms);
708
709         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
710
711         atomic_dec(&neigh->tbl->entries);
712         call_rcu(&neigh->rcu, neigh_destroy_rcu);
713 }
714 EXPORT_SYMBOL(neigh_destroy);
715
716 /* Neighbour state is suspicious;
717    disable fast path.
718
719    Called with write_locked neigh.
720  */
721 static void neigh_suspect(struct neighbour *neigh)
722 {
723         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
724
725         neigh->output = neigh->ops->output;
726 }
727
728 /* Neighbour state is OK;
729    enable fast path.
730
731    Called with write_locked neigh.
732  */
733 static void neigh_connect(struct neighbour *neigh)
734 {
735         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
736
737         neigh->output = neigh->ops->connected_output;
738 }
739
740 static void neigh_periodic_work(struct work_struct *work)
741 {
742         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
743         struct neighbour *n;
744         struct neighbour __rcu **np;
745         unsigned int i;
746         struct neigh_hash_table *nht;
747
748         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
749
750         write_lock_bh(&tbl->lock);
751         nht = rcu_dereference_protected(tbl->nht,
752                                         lockdep_is_held(&tbl->lock));
753
754         /*
755          *      periodically recompute ReachableTime from random function
756          */
757
758         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
759                 struct neigh_parms *p;
760                 tbl->last_rand = jiffies;
761                 for (p = &tbl->parms; p; p = p->next)
762                         p->reachable_time =
763                                 neigh_rand_reach_time(p->base_reachable_time);
764         }
765
766         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
767                 np = &nht->hash_buckets[i];
768
769                 while ((n = rcu_dereference_protected(*np,
770                                 lockdep_is_held(&tbl->lock))) != NULL) {
771                         unsigned int state;
772
773                         write_lock(&n->lock);
774
775                         state = n->nud_state;
776                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
777                                 write_unlock(&n->lock);
778                                 goto next_elt;
779                         }
780
781                         if (time_before(n->used, n->confirmed))
782                                 n->used = n->confirmed;
783
784                         if (atomic_read(&n->refcnt) == 1 &&
785                             (state == NUD_FAILED ||
786                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
787                                 *np = n->next;
788                                 n->dead = 1;
789                                 write_unlock(&n->lock);
790                                 neigh_cleanup_and_release(n);
791                                 continue;
792                         }
793                         write_unlock(&n->lock);
794
795 next_elt:
796                         np = &n->next;
797                 }
798                 /*
799                  * It's fine to release lock here, even if hash table
800                  * grows while we are preempted.
801                  */
802                 write_unlock_bh(&tbl->lock);
803                 cond_resched();
804                 write_lock_bh(&tbl->lock);
805                 nht = rcu_dereference_protected(tbl->nht,
806                                                 lockdep_is_held(&tbl->lock));
807         }
808         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
809          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
810          * base_reachable_time.
811          */
812         schedule_delayed_work(&tbl->gc_work,
813                               tbl->parms.base_reachable_time >> 1);
814         write_unlock_bh(&tbl->lock);
815 }
816
817 static __inline__ int neigh_max_probes(struct neighbour *n)
818 {
819         struct neigh_parms *p = n->parms;
820         return (n->nud_state & NUD_PROBE) ?
821                 p->ucast_probes :
822                 p->ucast_probes + p->app_probes + p->mcast_probes;
823 }
824
825 static void neigh_invalidate(struct neighbour *neigh)
826         __releases(neigh->lock)
827         __acquires(neigh->lock)
828 {
829         struct sk_buff *skb;
830
831         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
832         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
833         neigh->updated = jiffies;
834
835         /* It is very thin place. report_unreachable is very complicated
836            routine. Particularly, it can hit the same neighbour entry!
837
838            So that, we try to be accurate and avoid dead loop. --ANK
839          */
840         while (neigh->nud_state == NUD_FAILED &&
841                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
842                 write_unlock(&neigh->lock);
843                 neigh->ops->error_report(neigh, skb);
844                 write_lock(&neigh->lock);
845         }
846         skb_queue_purge(&neigh->arp_queue);
847 }
848
849 static void neigh_probe(struct neighbour *neigh)
850         __releases(neigh->lock)
851 {
852         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
853         /* keep skb alive even if arp_queue overflows */
854         if (skb)
855                 skb = skb_copy(skb, GFP_ATOMIC);
856         write_unlock(&neigh->lock);
857         neigh->ops->solicit(neigh, skb);
858         atomic_inc(&neigh->probes);
859         kfree_skb(skb);
860 }
861
862 /* Called when a timer expires for a neighbour entry. */
863
864 static void neigh_timer_handler(unsigned long arg)
865 {
866         unsigned long now, next;
867         struct neighbour *neigh = (struct neighbour *)arg;
868         unsigned state;
869         int notify = 0;
870
871         write_lock(&neigh->lock);
872
873         state = neigh->nud_state;
874         now = jiffies;
875         next = now + HZ;
876
877         if (!(state & NUD_IN_TIMER))
878                 goto out;
879
880         if (state & NUD_REACHABLE) {
881                 if (time_before_eq(now,
882                                    neigh->confirmed + neigh->parms->reachable_time)) {
883                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
884                         next = neigh->confirmed + neigh->parms->reachable_time;
885                 } else if (time_before_eq(now,
886                                           neigh->used + neigh->parms->delay_probe_time)) {
887                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
888                         neigh->nud_state = NUD_DELAY;
889                         neigh->updated = jiffies;
890                         neigh_suspect(neigh);
891                         next = now + neigh->parms->delay_probe_time;
892                 } else {
893                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
894                         neigh->nud_state = NUD_STALE;
895                         neigh->updated = jiffies;
896                         neigh_suspect(neigh);
897                         notify = 1;
898                 }
899         } else if (state & NUD_DELAY) {
900                 if (time_before_eq(now,
901                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
902                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
903                         neigh->nud_state = NUD_REACHABLE;
904                         neigh->updated = jiffies;
905                         neigh_connect(neigh);
906                         notify = 1;
907                         next = neigh->confirmed + neigh->parms->reachable_time;
908                 } else {
909                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
910                         neigh->nud_state = NUD_PROBE;
911                         neigh->updated = jiffies;
912                         atomic_set(&neigh->probes, 0);
913                         next = now + neigh->parms->retrans_time;
914                 }
915         } else {
916                 /* NUD_PROBE|NUD_INCOMPLETE */
917                 next = now + neigh->parms->retrans_time;
918         }
919
920         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
921             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
922                 neigh->nud_state = NUD_FAILED;
923                 notify = 1;
924                 neigh_invalidate(neigh);
925         }
926
927         if (neigh->nud_state & NUD_IN_TIMER) {
928                 if (time_before(next, jiffies + HZ/2))
929                         next = jiffies + HZ/2;
930                 if (!mod_timer(&neigh->timer, next))
931                         neigh_hold(neigh);
932         }
933         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
934                 neigh_probe(neigh);
935         } else {
936 out:
937                 write_unlock(&neigh->lock);
938         }
939
940         if (notify)
941                 neigh_update_notify(neigh);
942
943         neigh_release(neigh);
944 }
945
946 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
947 {
948         int rc;
949         bool immediate_probe = false;
950
951         write_lock_bh(&neigh->lock);
952
953         rc = 0;
954         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
955                 goto out_unlock_bh;
956
957         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
958                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
959                         unsigned long next, now = jiffies;
960
961                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
962                         neigh->nud_state     = NUD_INCOMPLETE;
963                         neigh->updated = now;
964                         next = now + max(neigh->parms->retrans_time, HZ/2);
965                         neigh_add_timer(neigh, next);
966                         immediate_probe = true;
967                 } else {
968                         neigh->nud_state = NUD_FAILED;
969                         neigh->updated = jiffies;
970                         write_unlock_bh(&neigh->lock);
971
972                         kfree_skb(skb);
973                         return 1;
974                 }
975         } else if (neigh->nud_state & NUD_STALE) {
976                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
977                 neigh->nud_state = NUD_DELAY;
978                 neigh->updated = jiffies;
979                 neigh_add_timer(neigh,
980                                 jiffies + neigh->parms->delay_probe_time);
981         }
982
983         if (neigh->nud_state == NUD_INCOMPLETE) {
984                 if (skb) {
985                         if (skb_queue_len(&neigh->arp_queue) >=
986                             neigh->parms->queue_len) {
987                                 struct sk_buff *buff;
988                                 buff = __skb_dequeue(&neigh->arp_queue);
989                                 kfree_skb(buff);
990                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
991                         }
992                         skb_dst_force(skb);
993                         __skb_queue_tail(&neigh->arp_queue, skb);
994                 }
995                 rc = 1;
996         }
997 out_unlock_bh:
998         if (immediate_probe)
999                 neigh_probe(neigh);
1000         else
1001                 write_unlock(&neigh->lock);
1002         local_bh_enable();
1003         return rc;
1004 }
1005 EXPORT_SYMBOL(__neigh_event_send);
1006
1007 static void neigh_update_hhs(struct neighbour *neigh)
1008 {
1009         struct hh_cache *hh;
1010         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1011                 = NULL;
1012
1013         if (neigh->dev->header_ops)
1014                 update = neigh->dev->header_ops->cache_update;
1015
1016         if (update) {
1017                 hh = &neigh->hh;
1018                 if (hh->hh_len) {
1019                         write_seqlock_bh(&hh->hh_lock);
1020                         update(hh, neigh->dev, neigh->ha);
1021                         write_sequnlock_bh(&hh->hh_lock);
1022                 }
1023         }
1024 }
1025
1026
1027
1028 /* Generic update routine.
1029    -- lladdr is new lladdr or NULL, if it is not supplied.
1030    -- new    is new state.
1031    -- flags
1032         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1033                                 if it is different.
1034         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1035                                 lladdr instead of overriding it
1036                                 if it is different.
1037                                 It also allows to retain current state
1038                                 if lladdr is unchanged.
1039         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1040
1041         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1042                                 NTF_ROUTER flag.
1043         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1044                                 a router.
1045
1046    Caller MUST hold reference count on the entry.
1047  */
1048
1049 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1050                  u32 flags)
1051 {
1052         u8 old;
1053         int err;
1054         int notify = 0;
1055         struct net_device *dev;
1056         int update_isrouter = 0;
1057
1058         write_lock_bh(&neigh->lock);
1059
1060         dev    = neigh->dev;
1061         old    = neigh->nud_state;
1062         err    = -EPERM;
1063
1064         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1065             (old & (NUD_NOARP | NUD_PERMANENT)))
1066                 goto out;
1067
1068         if (!(new & NUD_VALID)) {
1069                 neigh_del_timer(neigh);
1070                 if (old & NUD_CONNECTED)
1071                         neigh_suspect(neigh);
1072                 neigh->nud_state = new;
1073                 err = 0;
1074                 notify = old & NUD_VALID;
1075                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1076                     (new & NUD_FAILED)) {
1077                         neigh_invalidate(neigh);
1078                         notify = 1;
1079                 }
1080                 goto out;
1081         }
1082
1083         /* Compare new lladdr with cached one */
1084         if (!dev->addr_len) {
1085                 /* First case: device needs no address. */
1086                 lladdr = neigh->ha;
1087         } else if (lladdr) {
1088                 /* The second case: if something is already cached
1089                    and a new address is proposed:
1090                    - compare new & old
1091                    - if they are different, check override flag
1092                  */
1093                 if ((old & NUD_VALID) &&
1094                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1095                         lladdr = neigh->ha;
1096         } else {
1097                 /* No address is supplied; if we know something,
1098                    use it, otherwise discard the request.
1099                  */
1100                 err = -EINVAL;
1101                 if (!(old & NUD_VALID))
1102                         goto out;
1103                 lladdr = neigh->ha;
1104         }
1105
1106         if (new & NUD_CONNECTED)
1107                 neigh->confirmed = jiffies;
1108         neigh->updated = jiffies;
1109
1110         /* If entry was valid and address is not changed,
1111            do not change entry state, if new one is STALE.
1112          */
1113         err = 0;
1114         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1115         if (old & NUD_VALID) {
1116                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1117                         update_isrouter = 0;
1118                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1119                             (old & NUD_CONNECTED)) {
1120                                 lladdr = neigh->ha;
1121                                 new = NUD_STALE;
1122                         } else
1123                                 goto out;
1124                 } else {
1125                         if (lladdr == neigh->ha && new == NUD_STALE &&
1126                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1127                              (old & NUD_CONNECTED))
1128                             )
1129                                 new = old;
1130                 }
1131         }
1132
1133         if (new != old) {
1134                 neigh_del_timer(neigh);
1135                 if (new & NUD_IN_TIMER)
1136                         neigh_add_timer(neigh, (jiffies +
1137                                                 ((new & NUD_REACHABLE) ?
1138                                                  neigh->parms->reachable_time :
1139                                                  0)));
1140                 neigh->nud_state = new;
1141         }
1142
1143         if (lladdr != neigh->ha) {
1144                 write_seqlock(&neigh->ha_lock);
1145                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1146                 write_sequnlock(&neigh->ha_lock);
1147                 neigh_update_hhs(neigh);
1148                 if (!(new & NUD_CONNECTED))
1149                         neigh->confirmed = jiffies -
1150                                       (neigh->parms->base_reachable_time << 1);
1151                 notify = 1;
1152         }
1153         if (new == old)
1154                 goto out;
1155         if (new & NUD_CONNECTED)
1156                 neigh_connect(neigh);
1157         else
1158                 neigh_suspect(neigh);
1159         if (!(old & NUD_VALID)) {
1160                 struct sk_buff *skb;
1161
1162                 /* Again: avoid dead loop if something went wrong */
1163
1164                 while (neigh->nud_state & NUD_VALID &&
1165                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1166                         struct dst_entry *dst = skb_dst(skb);
1167                         struct neighbour *n2, *n1 = neigh;
1168                         write_unlock_bh(&neigh->lock);
1169
1170                         rcu_read_lock();
1171                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1172                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1173                                 n1 = n2;
1174                         n1->output(n1, skb);
1175                         rcu_read_unlock();
1176
1177                         write_lock_bh(&neigh->lock);
1178                 }
1179                 skb_queue_purge(&neigh->arp_queue);
1180         }
1181 out:
1182         if (update_isrouter) {
1183                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1184                         (neigh->flags | NTF_ROUTER) :
1185                         (neigh->flags & ~NTF_ROUTER);
1186         }
1187         write_unlock_bh(&neigh->lock);
1188
1189         if (notify)
1190                 neigh_update_notify(neigh);
1191
1192         return err;
1193 }
1194 EXPORT_SYMBOL(neigh_update);
1195
1196 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1197                                  u8 *lladdr, void *saddr,
1198                                  struct net_device *dev)
1199 {
1200         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1201                                                  lladdr || !dev->addr_len);
1202         if (neigh)
1203                 neigh_update(neigh, lladdr, NUD_STALE,
1204                              NEIGH_UPDATE_F_OVERRIDE);
1205         return neigh;
1206 }
1207 EXPORT_SYMBOL(neigh_event_ns);
1208
1209 /* called with read_lock_bh(&n->lock); */
1210 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1211 {
1212         struct net_device *dev = dst->dev;
1213         __be16 prot = dst->ops->protocol;
1214         struct hh_cache *hh = &n->hh;
1215
1216         write_lock_bh(&n->lock);
1217
1218         /* Only one thread can come in here and initialize the
1219          * hh_cache entry.
1220          */
1221         if (!hh->hh_len)
1222                 dev->header_ops->cache(n, hh, prot);
1223
1224         write_unlock_bh(&n->lock);
1225 }
1226
1227 /* This function can be used in contexts, where only old dev_queue_xmit
1228  * worked, f.e. if you want to override normal output path (eql, shaper),
1229  * but resolution is not made yet.
1230  */
1231
1232 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1233 {
1234         struct net_device *dev = skb->dev;
1235
1236         __skb_pull(skb, skb_network_offset(skb));
1237
1238         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1239                             skb->len) < 0 &&
1240             dev->header_ops->rebuild(skb))
1241                 return 0;
1242
1243         return dev_queue_xmit(skb);
1244 }
1245 EXPORT_SYMBOL(neigh_compat_output);
1246
1247 /* Slow and careful. */
1248
1249 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1250 {
1251         struct dst_entry *dst = skb_dst(skb);
1252         int rc = 0;
1253
1254         if (!dst)
1255                 goto discard;
1256
1257         __skb_pull(skb, skb_network_offset(skb));
1258
1259         if (!neigh_event_send(neigh, skb)) {
1260                 int err;
1261                 struct net_device *dev = neigh->dev;
1262                 unsigned int seq;
1263
1264                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1265                         neigh_hh_init(neigh, dst);
1266
1267                 do {
1268                         seq = read_seqbegin(&neigh->ha_lock);
1269                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1270                                               neigh->ha, NULL, skb->len);
1271                 } while (read_seqretry(&neigh->ha_lock, seq));
1272
1273                 if (err >= 0)
1274                         rc = dev_queue_xmit(skb);
1275                 else
1276                         goto out_kfree_skb;
1277         }
1278 out:
1279         return rc;
1280 discard:
1281         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1282                       dst, neigh);
1283 out_kfree_skb:
1284         rc = -EINVAL;
1285         kfree_skb(skb);
1286         goto out;
1287 }
1288 EXPORT_SYMBOL(neigh_resolve_output);
1289
1290 /* As fast as possible without hh cache */
1291
1292 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1293 {
1294         struct net_device *dev = neigh->dev;
1295         unsigned int seq;
1296         int err;
1297
1298         __skb_pull(skb, skb_network_offset(skb));
1299
1300         do {
1301                 seq = read_seqbegin(&neigh->ha_lock);
1302                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1303                                       neigh->ha, NULL, skb->len);
1304         } while (read_seqretry(&neigh->ha_lock, seq));
1305
1306         if (err >= 0)
1307                 err = dev_queue_xmit(skb);
1308         else {
1309                 err = -EINVAL;
1310                 kfree_skb(skb);
1311         }
1312         return err;
1313 }
1314 EXPORT_SYMBOL(neigh_connected_output);
1315
1316 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1317 {
1318         return dev_queue_xmit(skb);
1319 }
1320 EXPORT_SYMBOL(neigh_direct_output);
1321
1322 static void neigh_proxy_process(unsigned long arg)
1323 {
1324         struct neigh_table *tbl = (struct neigh_table *)arg;
1325         long sched_next = 0;
1326         unsigned long now = jiffies;
1327         struct sk_buff *skb, *n;
1328
1329         spin_lock(&tbl->proxy_queue.lock);
1330
1331         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1332                 long tdif = NEIGH_CB(skb)->sched_next - now;
1333
1334                 if (tdif <= 0) {
1335                         struct net_device *dev = skb->dev;
1336
1337                         __skb_unlink(skb, &tbl->proxy_queue);
1338                         if (tbl->proxy_redo && netif_running(dev)) {
1339                                 rcu_read_lock();
1340                                 tbl->proxy_redo(skb);
1341                                 rcu_read_unlock();
1342                         } else {
1343                                 kfree_skb(skb);
1344                         }
1345
1346                         dev_put(dev);
1347                 } else if (!sched_next || tdif < sched_next)
1348                         sched_next = tdif;
1349         }
1350         del_timer(&tbl->proxy_timer);
1351         if (sched_next)
1352                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1353         spin_unlock(&tbl->proxy_queue.lock);
1354 }
1355
1356 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1357                     struct sk_buff *skb)
1358 {
1359         unsigned long now = jiffies;
1360         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1361
1362         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1363                 kfree_skb(skb);
1364                 return;
1365         }
1366
1367         NEIGH_CB(skb)->sched_next = sched_next;
1368         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1369
1370         spin_lock(&tbl->proxy_queue.lock);
1371         if (del_timer(&tbl->proxy_timer)) {
1372                 if (time_before(tbl->proxy_timer.expires, sched_next))
1373                         sched_next = tbl->proxy_timer.expires;
1374         }
1375         skb_dst_drop(skb);
1376         dev_hold(skb->dev);
1377         __skb_queue_tail(&tbl->proxy_queue, skb);
1378         mod_timer(&tbl->proxy_timer, sched_next);
1379         spin_unlock(&tbl->proxy_queue.lock);
1380 }
1381 EXPORT_SYMBOL(pneigh_enqueue);
1382
1383 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1384                                                       struct net *net, int ifindex)
1385 {
1386         struct neigh_parms *p;
1387
1388         for (p = &tbl->parms; p; p = p->next) {
1389                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1390                     (!p->dev && !ifindex))
1391                         return p;
1392         }
1393
1394         return NULL;
1395 }
1396
1397 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1398                                       struct neigh_table *tbl)
1399 {
1400         struct neigh_parms *p, *ref;
1401         struct net *net = dev_net(dev);
1402         const struct net_device_ops *ops = dev->netdev_ops;
1403
1404         ref = lookup_neigh_parms(tbl, net, 0);
1405         if (!ref)
1406                 return NULL;
1407
1408         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1409         if (p) {
1410                 p->tbl            = tbl;
1411                 atomic_set(&p->refcnt, 1);
1412                 p->reachable_time =
1413                                 neigh_rand_reach_time(p->base_reachable_time);
1414
1415                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1416                         kfree(p);
1417                         return NULL;
1418                 }
1419
1420                 dev_hold(dev);
1421                 p->dev = dev;
1422                 write_pnet(&p->net, hold_net(net));
1423                 p->sysctl_table = NULL;
1424                 write_lock_bh(&tbl->lock);
1425                 p->next         = tbl->parms.next;
1426                 tbl->parms.next = p;
1427                 write_unlock_bh(&tbl->lock);
1428         }
1429         return p;
1430 }
1431 EXPORT_SYMBOL(neigh_parms_alloc);
1432
1433 static void neigh_rcu_free_parms(struct rcu_head *head)
1434 {
1435         struct neigh_parms *parms =
1436                 container_of(head, struct neigh_parms, rcu_head);
1437
1438         neigh_parms_put(parms);
1439 }
1440
1441 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1442 {
1443         struct neigh_parms **p;
1444
1445         if (!parms || parms == &tbl->parms)
1446                 return;
1447         write_lock_bh(&tbl->lock);
1448         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1449                 if (*p == parms) {
1450                         *p = parms->next;
1451                         parms->dead = 1;
1452                         write_unlock_bh(&tbl->lock);
1453                         if (parms->dev)
1454                                 dev_put(parms->dev);
1455                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1456                         return;
1457                 }
1458         }
1459         write_unlock_bh(&tbl->lock);
1460         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1461 }
1462 EXPORT_SYMBOL(neigh_parms_release);
1463
1464 static void neigh_parms_destroy(struct neigh_parms *parms)
1465 {
1466         release_net(neigh_parms_net(parms));
1467         kfree(parms);
1468 }
1469
1470 static struct lock_class_key neigh_table_proxy_queue_class;
1471
1472 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1473 {
1474         unsigned long now = jiffies;
1475         unsigned long phsize;
1476
1477         write_pnet(&tbl->parms.net, &init_net);
1478         atomic_set(&tbl->parms.refcnt, 1);
1479         tbl->parms.reachable_time =
1480                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1481
1482         if (!tbl->kmem_cachep)
1483                 tbl->kmem_cachep =
1484                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1485                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1486                                           NULL);
1487         tbl->stats = alloc_percpu(struct neigh_statistics);
1488         if (!tbl->stats)
1489                 panic("cannot create neighbour cache statistics");
1490
1491 #ifdef CONFIG_PROC_FS
1492         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1493                               &neigh_stat_seq_fops, tbl))
1494                 panic("cannot create neighbour proc dir entry");
1495 #endif
1496
1497         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1498
1499         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1500         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1501
1502         if (!tbl->nht || !tbl->phash_buckets)
1503                 panic("cannot allocate neighbour cache hashes");
1504
1505         rwlock_init(&tbl->lock);
1506         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1507         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1508         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1509         skb_queue_head_init_class(&tbl->proxy_queue,
1510                         &neigh_table_proxy_queue_class);
1511
1512         tbl->last_flush = now;
1513         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1514 }
1515 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1516
1517 void neigh_table_init(struct neigh_table *tbl)
1518 {
1519         struct neigh_table *tmp;
1520
1521         neigh_table_init_no_netlink(tbl);
1522         write_lock(&neigh_tbl_lock);
1523         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1524                 if (tmp->family == tbl->family)
1525                         break;
1526         }
1527         tbl->next       = neigh_tables;
1528         neigh_tables    = tbl;
1529         write_unlock(&neigh_tbl_lock);
1530
1531         if (unlikely(tmp)) {
1532                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1533                        "family %d\n", tbl->family);
1534                 dump_stack();
1535         }
1536 }
1537 EXPORT_SYMBOL(neigh_table_init);
1538
1539 int neigh_table_clear(struct neigh_table *tbl)
1540 {
1541         struct neigh_table **tp;
1542
1543         /* It is not clean... Fix it to unload IPv6 module safely */
1544         cancel_delayed_work_sync(&tbl->gc_work);
1545         del_timer_sync(&tbl->proxy_timer);
1546         pneigh_queue_purge(&tbl->proxy_queue);
1547         neigh_ifdown(tbl, NULL);
1548         if (atomic_read(&tbl->entries))
1549                 printk(KERN_CRIT "neighbour leakage\n");
1550         write_lock(&neigh_tbl_lock);
1551         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1552                 if (*tp == tbl) {
1553                         *tp = tbl->next;
1554                         break;
1555                 }
1556         }
1557         write_unlock(&neigh_tbl_lock);
1558
1559         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1560                  neigh_hash_free_rcu);
1561         tbl->nht = NULL;
1562
1563         kfree(tbl->phash_buckets);
1564         tbl->phash_buckets = NULL;
1565
1566         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1567
1568         free_percpu(tbl->stats);
1569         tbl->stats = NULL;
1570
1571         kmem_cache_destroy(tbl->kmem_cachep);
1572         tbl->kmem_cachep = NULL;
1573
1574         return 0;
1575 }
1576 EXPORT_SYMBOL(neigh_table_clear);
1577
1578 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1579 {
1580         struct net *net = sock_net(skb->sk);
1581         struct ndmsg *ndm;
1582         struct nlattr *dst_attr;
1583         struct neigh_table *tbl;
1584         struct net_device *dev = NULL;
1585         int err = -EINVAL;
1586
1587         ASSERT_RTNL();
1588         if (nlmsg_len(nlh) < sizeof(*ndm))
1589                 goto out;
1590
1591         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1592         if (dst_attr == NULL)
1593                 goto out;
1594
1595         ndm = nlmsg_data(nlh);
1596         if (ndm->ndm_ifindex) {
1597                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1598                 if (dev == NULL) {
1599                         err = -ENODEV;
1600                         goto out;
1601                 }
1602         }
1603
1604         read_lock(&neigh_tbl_lock);
1605         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1606                 struct neighbour *neigh;
1607
1608                 if (tbl->family != ndm->ndm_family)
1609                         continue;
1610                 read_unlock(&neigh_tbl_lock);
1611
1612                 if (nla_len(dst_attr) < tbl->key_len)
1613                         goto out;
1614
1615                 if (ndm->ndm_flags & NTF_PROXY) {
1616                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1617                         goto out;
1618                 }
1619
1620                 if (dev == NULL)
1621                         goto out;
1622
1623                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1624                 if (neigh == NULL) {
1625                         err = -ENOENT;
1626                         goto out;
1627                 }
1628
1629                 err = neigh_update(neigh, NULL, NUD_FAILED,
1630                                    NEIGH_UPDATE_F_OVERRIDE |
1631                                    NEIGH_UPDATE_F_ADMIN);
1632                 neigh_release(neigh);
1633                 goto out;
1634         }
1635         read_unlock(&neigh_tbl_lock);
1636         err = -EAFNOSUPPORT;
1637
1638 out:
1639         return err;
1640 }
1641
1642 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1643 {
1644         struct net *net = sock_net(skb->sk);
1645         struct ndmsg *ndm;
1646         struct nlattr *tb[NDA_MAX+1];
1647         struct neigh_table *tbl;
1648         struct net_device *dev = NULL;
1649         int err;
1650
1651         ASSERT_RTNL();
1652         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1653         if (err < 0)
1654                 goto out;
1655
1656         err = -EINVAL;
1657         if (tb[NDA_DST] == NULL)
1658                 goto out;
1659
1660         ndm = nlmsg_data(nlh);
1661         if (ndm->ndm_ifindex) {
1662                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1663                 if (dev == NULL) {
1664                         err = -ENODEV;
1665                         goto out;
1666                 }
1667
1668                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1669                         goto out;
1670         }
1671
1672         read_lock(&neigh_tbl_lock);
1673         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1674                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1675                 struct neighbour *neigh;
1676                 void *dst, *lladdr;
1677
1678                 if (tbl->family != ndm->ndm_family)
1679                         continue;
1680                 read_unlock(&neigh_tbl_lock);
1681
1682                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1683                         goto out;
1684                 dst = nla_data(tb[NDA_DST]);
1685                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1686
1687                 if (ndm->ndm_flags & NTF_PROXY) {
1688                         struct pneigh_entry *pn;
1689
1690                         err = -ENOBUFS;
1691                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1692                         if (pn) {
1693                                 pn->flags = ndm->ndm_flags;
1694                                 err = 0;
1695                         }
1696                         goto out;
1697                 }
1698
1699                 if (dev == NULL)
1700                         goto out;
1701
1702                 neigh = neigh_lookup(tbl, dst, dev);
1703                 if (neigh == NULL) {
1704                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1705                                 err = -ENOENT;
1706                                 goto out;
1707                         }
1708
1709                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1710                         if (IS_ERR(neigh)) {
1711                                 err = PTR_ERR(neigh);
1712                                 goto out;
1713                         }
1714                 } else {
1715                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1716                                 err = -EEXIST;
1717                                 neigh_release(neigh);
1718                                 goto out;
1719                         }
1720
1721                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1722                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1723                 }
1724
1725                 if (ndm->ndm_flags & NTF_USE) {
1726                         neigh_event_send(neigh, NULL);
1727                         err = 0;
1728                 } else
1729                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1730                 neigh_release(neigh);
1731                 goto out;
1732         }
1733
1734         read_unlock(&neigh_tbl_lock);
1735         err = -EAFNOSUPPORT;
1736 out:
1737         return err;
1738 }
1739
1740 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1741 {
1742         struct nlattr *nest;
1743
1744         nest = nla_nest_start(skb, NDTA_PARMS);
1745         if (nest == NULL)
1746                 return -ENOBUFS;
1747
1748         if (parms->dev)
1749                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1750
1751         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1752         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1753         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1754         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1755         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1756         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1757         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1758         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1759                       parms->base_reachable_time);
1760         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1761         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1762         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1763         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1764         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1765         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1766
1767         return nla_nest_end(skb, nest);
1768
1769 nla_put_failure:
1770         nla_nest_cancel(skb, nest);
1771         return -EMSGSIZE;
1772 }
1773
1774 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1775                               u32 pid, u32 seq, int type, int flags)
1776 {
1777         struct nlmsghdr *nlh;
1778         struct ndtmsg *ndtmsg;
1779
1780         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1781         if (nlh == NULL)
1782                 return -EMSGSIZE;
1783
1784         ndtmsg = nlmsg_data(nlh);
1785
1786         read_lock_bh(&tbl->lock);
1787         ndtmsg->ndtm_family = tbl->family;
1788         ndtmsg->ndtm_pad1   = 0;
1789         ndtmsg->ndtm_pad2   = 0;
1790
1791         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1792         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1793         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1794         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1795         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1796
1797         {
1798                 unsigned long now = jiffies;
1799                 unsigned int flush_delta = now - tbl->last_flush;
1800                 unsigned int rand_delta = now - tbl->last_rand;
1801                 struct neigh_hash_table *nht;
1802                 struct ndt_config ndc = {
1803                         .ndtc_key_len           = tbl->key_len,
1804                         .ndtc_entry_size        = tbl->entry_size,
1805                         .ndtc_entries           = atomic_read(&tbl->entries),
1806                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1807                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1808                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1809                 };
1810
1811                 rcu_read_lock_bh();
1812                 nht = rcu_dereference_bh(tbl->nht);
1813                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1814                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1815                 rcu_read_unlock_bh();
1816
1817                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1818         }
1819
1820         {
1821                 int cpu;
1822                 struct ndt_stats ndst;
1823
1824                 memset(&ndst, 0, sizeof(ndst));
1825
1826                 for_each_possible_cpu(cpu) {
1827                         struct neigh_statistics *st;
1828
1829                         st = per_cpu_ptr(tbl->stats, cpu);
1830                         ndst.ndts_allocs                += st->allocs;
1831                         ndst.ndts_destroys              += st->destroys;
1832                         ndst.ndts_hash_grows            += st->hash_grows;
1833                         ndst.ndts_res_failed            += st->res_failed;
1834                         ndst.ndts_lookups               += st->lookups;
1835                         ndst.ndts_hits                  += st->hits;
1836                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1837                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1838                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1839                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1840                 }
1841
1842                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1843         }
1844
1845         BUG_ON(tbl->parms.dev);
1846         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1847                 goto nla_put_failure;
1848
1849         read_unlock_bh(&tbl->lock);
1850         return nlmsg_end(skb, nlh);
1851
1852 nla_put_failure:
1853         read_unlock_bh(&tbl->lock);
1854         nlmsg_cancel(skb, nlh);
1855         return -EMSGSIZE;
1856 }
1857
1858 static int neightbl_fill_param_info(struct sk_buff *skb,
1859                                     struct neigh_table *tbl,
1860                                     struct neigh_parms *parms,
1861                                     u32 pid, u32 seq, int type,
1862                                     unsigned int flags)
1863 {
1864         struct ndtmsg *ndtmsg;
1865         struct nlmsghdr *nlh;
1866
1867         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1868         if (nlh == NULL)
1869                 return -EMSGSIZE;
1870
1871         ndtmsg = nlmsg_data(nlh);
1872
1873         read_lock_bh(&tbl->lock);
1874         ndtmsg->ndtm_family = tbl->family;
1875         ndtmsg->ndtm_pad1   = 0;
1876         ndtmsg->ndtm_pad2   = 0;
1877
1878         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1879             neightbl_fill_parms(skb, parms) < 0)
1880                 goto errout;
1881
1882         read_unlock_bh(&tbl->lock);
1883         return nlmsg_end(skb, nlh);
1884 errout:
1885         read_unlock_bh(&tbl->lock);
1886         nlmsg_cancel(skb, nlh);
1887         return -EMSGSIZE;
1888 }
1889
1890 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1891         [NDTA_NAME]             = { .type = NLA_STRING },
1892         [NDTA_THRESH1]          = { .type = NLA_U32 },
1893         [NDTA_THRESH2]          = { .type = NLA_U32 },
1894         [NDTA_THRESH3]          = { .type = NLA_U32 },
1895         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1896         [NDTA_PARMS]            = { .type = NLA_NESTED },
1897 };
1898
1899 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1900         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1901         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1902         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1903         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1904         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1905         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1906         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1907         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1908         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1909         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1910         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1911         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1912         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1913 };
1914
1915 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1916 {
1917         struct net *net = sock_net(skb->sk);
1918         struct neigh_table *tbl;
1919         struct ndtmsg *ndtmsg;
1920         struct nlattr *tb[NDTA_MAX+1];
1921         int err;
1922
1923         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1924                           nl_neightbl_policy);
1925         if (err < 0)
1926                 goto errout;
1927
1928         if (tb[NDTA_NAME] == NULL) {
1929                 err = -EINVAL;
1930                 goto errout;
1931         }
1932
1933         ndtmsg = nlmsg_data(nlh);
1934         read_lock(&neigh_tbl_lock);
1935         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1936                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1937                         continue;
1938
1939                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1940                         break;
1941         }
1942
1943         if (tbl == NULL) {
1944                 err = -ENOENT;
1945                 goto errout_locked;
1946         }
1947
1948         /*
1949          * We acquire tbl->lock to be nice to the periodic timers and
1950          * make sure they always see a consistent set of values.
1951          */
1952         write_lock_bh(&tbl->lock);
1953
1954         if (tb[NDTA_PARMS]) {
1955                 struct nlattr *tbp[NDTPA_MAX+1];
1956                 struct neigh_parms *p;
1957                 int i, ifindex = 0;
1958
1959                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1960                                        nl_ntbl_parm_policy);
1961                 if (err < 0)
1962                         goto errout_tbl_lock;
1963
1964                 if (tbp[NDTPA_IFINDEX])
1965                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1966
1967                 p = lookup_neigh_parms(tbl, net, ifindex);
1968                 if (p == NULL) {
1969                         err = -ENOENT;
1970                         goto errout_tbl_lock;
1971                 }
1972
1973                 for (i = 1; i <= NDTPA_MAX; i++) {
1974                         if (tbp[i] == NULL)
1975                                 continue;
1976
1977                         switch (i) {
1978                         case NDTPA_QUEUE_LEN:
1979                                 p->queue_len = nla_get_u32(tbp[i]);
1980                                 break;
1981                         case NDTPA_PROXY_QLEN:
1982                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1983                                 break;
1984                         case NDTPA_APP_PROBES:
1985                                 p->app_probes = nla_get_u32(tbp[i]);
1986                                 break;
1987                         case NDTPA_UCAST_PROBES:
1988                                 p->ucast_probes = nla_get_u32(tbp[i]);
1989                                 break;
1990                         case NDTPA_MCAST_PROBES:
1991                                 p->mcast_probes = nla_get_u32(tbp[i]);
1992                                 break;
1993                         case NDTPA_BASE_REACHABLE_TIME:
1994                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
1995                                 break;
1996                         case NDTPA_GC_STALETIME:
1997                                 p->gc_staletime = nla_get_msecs(tbp[i]);
1998                                 break;
1999                         case NDTPA_DELAY_PROBE_TIME:
2000                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2001                                 break;
2002                         case NDTPA_RETRANS_TIME:
2003                                 p->retrans_time = nla_get_msecs(tbp[i]);
2004                                 break;
2005                         case NDTPA_ANYCAST_DELAY:
2006                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2007                                 break;
2008                         case NDTPA_PROXY_DELAY:
2009                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2010                                 break;
2011                         case NDTPA_LOCKTIME:
2012                                 p->locktime = nla_get_msecs(tbp[i]);
2013                                 break;
2014                         }
2015                 }
2016         }
2017
2018         if (tb[NDTA_THRESH1])
2019                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2020
2021         if (tb[NDTA_THRESH2])
2022                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2023
2024         if (tb[NDTA_THRESH3])
2025                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2026
2027         if (tb[NDTA_GC_INTERVAL])
2028                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2029
2030         err = 0;
2031
2032 errout_tbl_lock:
2033         write_unlock_bh(&tbl->lock);
2034 errout_locked:
2035         read_unlock(&neigh_tbl_lock);
2036 errout:
2037         return err;
2038 }
2039
2040 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2041 {
2042         struct net *net = sock_net(skb->sk);
2043         int family, tidx, nidx = 0;
2044         int tbl_skip = cb->args[0];
2045         int neigh_skip = cb->args[1];
2046         struct neigh_table *tbl;
2047
2048         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2049
2050         read_lock(&neigh_tbl_lock);
2051         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2052                 struct neigh_parms *p;
2053
2054                 if (tidx < tbl_skip || (family && tbl->family != family))
2055                         continue;
2056
2057                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2058                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2059                                        NLM_F_MULTI) <= 0)
2060                         break;
2061
2062                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2063                         if (!net_eq(neigh_parms_net(p), net))
2064                                 continue;
2065
2066                         if (nidx < neigh_skip)
2067                                 goto next;
2068
2069                         if (neightbl_fill_param_info(skb, tbl, p,
2070                                                      NETLINK_CB(cb->skb).pid,
2071                                                      cb->nlh->nlmsg_seq,
2072                                                      RTM_NEWNEIGHTBL,
2073                                                      NLM_F_MULTI) <= 0)
2074                                 goto out;
2075                 next:
2076                         nidx++;
2077                 }
2078
2079                 neigh_skip = 0;
2080         }
2081 out:
2082         read_unlock(&neigh_tbl_lock);
2083         cb->args[0] = tidx;
2084         cb->args[1] = nidx;
2085
2086         return skb->len;
2087 }
2088
2089 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2090                            u32 pid, u32 seq, int type, unsigned int flags)
2091 {
2092         unsigned long now = jiffies;
2093         struct nda_cacheinfo ci;
2094         struct nlmsghdr *nlh;
2095         struct ndmsg *ndm;
2096
2097         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2098         if (nlh == NULL)
2099                 return -EMSGSIZE;
2100
2101         ndm = nlmsg_data(nlh);
2102         ndm->ndm_family  = neigh->ops->family;
2103         ndm->ndm_pad1    = 0;
2104         ndm->ndm_pad2    = 0;
2105         ndm->ndm_flags   = neigh->flags;
2106         ndm->ndm_type    = neigh->type;
2107         ndm->ndm_ifindex = neigh->dev->ifindex;
2108
2109         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2110
2111         read_lock_bh(&neigh->lock);
2112         ndm->ndm_state   = neigh->nud_state;
2113         if (neigh->nud_state & NUD_VALID) {
2114                 char haddr[MAX_ADDR_LEN];
2115
2116                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2117                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2118                         read_unlock_bh(&neigh->lock);
2119                         goto nla_put_failure;
2120                 }
2121         }
2122
2123         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2124         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2125         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2126         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2127         read_unlock_bh(&neigh->lock);
2128
2129         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2130         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2131
2132         return nlmsg_end(skb, nlh);
2133
2134 nla_put_failure:
2135         nlmsg_cancel(skb, nlh);
2136         return -EMSGSIZE;
2137 }
2138
2139 static void neigh_update_notify(struct neighbour *neigh)
2140 {
2141         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2142         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2143 }
2144
2145 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2146                             struct netlink_callback *cb)
2147 {
2148         struct net *net = sock_net(skb->sk);
2149         struct neighbour *n;
2150         int rc, h, s_h = cb->args[1];
2151         int idx, s_idx = idx = cb->args[2];
2152         struct neigh_hash_table *nht;
2153
2154         rcu_read_lock_bh();
2155         nht = rcu_dereference_bh(tbl->nht);
2156
2157         for (h = 0; h < (1 << nht->hash_shift); h++) {
2158                 if (h < s_h)
2159                         continue;
2160                 if (h > s_h)
2161                         s_idx = 0;
2162                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2163                      n != NULL;
2164                      n = rcu_dereference_bh(n->next)) {
2165                         if (!net_eq(dev_net(n->dev), net))
2166                                 continue;
2167                         if (idx < s_idx)
2168                                 goto next;
2169                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2170                                             cb->nlh->nlmsg_seq,
2171                                             RTM_NEWNEIGH,
2172                                             NLM_F_MULTI) <= 0) {
2173                                 rc = -1;
2174                                 goto out;
2175                         }
2176 next:
2177                         idx++;
2178                 }
2179         }
2180         rc = skb->len;
2181 out:
2182         rcu_read_unlock_bh();
2183         cb->args[1] = h;
2184         cb->args[2] = idx;
2185         return rc;
2186 }
2187
2188 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2189 {
2190         struct neigh_table *tbl;
2191         int t, family, s_t;
2192
2193         read_lock(&neigh_tbl_lock);
2194         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2195         s_t = cb->args[0];
2196
2197         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2198                 if (t < s_t || (family && tbl->family != family))
2199                         continue;
2200                 if (t > s_t)
2201                         memset(&cb->args[1], 0, sizeof(cb->args) -
2202                                                 sizeof(cb->args[0]));
2203                 if (neigh_dump_table(tbl, skb, cb) < 0)
2204                         break;
2205         }
2206         read_unlock(&neigh_tbl_lock);
2207
2208         cb->args[0] = t;
2209         return skb->len;
2210 }
2211
2212 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2213 {
2214         int chain;
2215         struct neigh_hash_table *nht;
2216
2217         rcu_read_lock_bh();
2218         nht = rcu_dereference_bh(tbl->nht);
2219
2220         read_lock(&tbl->lock); /* avoid resizes */
2221         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2222                 struct neighbour *n;
2223
2224                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2225                      n != NULL;
2226                      n = rcu_dereference_bh(n->next))
2227                         cb(n, cookie);
2228         }
2229         read_unlock(&tbl->lock);
2230         rcu_read_unlock_bh();
2231 }
2232 EXPORT_SYMBOL(neigh_for_each);
2233
2234 /* The tbl->lock must be held as a writer and BH disabled. */
2235 void __neigh_for_each_release(struct neigh_table *tbl,
2236                               int (*cb)(struct neighbour *))
2237 {
2238         int chain;
2239         struct neigh_hash_table *nht;
2240
2241         nht = rcu_dereference_protected(tbl->nht,
2242                                         lockdep_is_held(&tbl->lock));
2243         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2244                 struct neighbour *n;
2245                 struct neighbour __rcu **np;
2246
2247                 np = &nht->hash_buckets[chain];
2248                 while ((n = rcu_dereference_protected(*np,
2249                                         lockdep_is_held(&tbl->lock))) != NULL) {
2250                         int release;
2251
2252                         write_lock(&n->lock);
2253                         release = cb(n);
2254                         if (release) {
2255                                 rcu_assign_pointer(*np,
2256                                         rcu_dereference_protected(n->next,
2257                                                 lockdep_is_held(&tbl->lock)));
2258                                 n->dead = 1;
2259                         } else
2260                                 np = &n->next;
2261                         write_unlock(&n->lock);
2262                         if (release)
2263                                 neigh_cleanup_and_release(n);
2264                 }
2265         }
2266 }
2267 EXPORT_SYMBOL(__neigh_for_each_release);
2268
2269 #ifdef CONFIG_PROC_FS
2270
2271 static struct neighbour *neigh_get_first(struct seq_file *seq)
2272 {
2273         struct neigh_seq_state *state = seq->private;
2274         struct net *net = seq_file_net(seq);
2275         struct neigh_hash_table *nht = state->nht;
2276         struct neighbour *n = NULL;
2277         int bucket = state->bucket;
2278
2279         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2280         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2281                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2282
2283                 while (n) {
2284                         if (!net_eq(dev_net(n->dev), net))
2285                                 goto next;
2286                         if (state->neigh_sub_iter) {
2287                                 loff_t fakep = 0;
2288                                 void *v;
2289
2290                                 v = state->neigh_sub_iter(state, n, &fakep);
2291                                 if (!v)
2292                                         goto next;
2293                         }
2294                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2295                                 break;
2296                         if (n->nud_state & ~NUD_NOARP)
2297                                 break;
2298 next:
2299                         n = rcu_dereference_bh(n->next);
2300                 }
2301
2302                 if (n)
2303                         break;
2304         }
2305         state->bucket = bucket;
2306
2307         return n;
2308 }
2309
2310 static struct neighbour *neigh_get_next(struct seq_file *seq,
2311                                         struct neighbour *n,
2312                                         loff_t *pos)
2313 {
2314         struct neigh_seq_state *state = seq->private;
2315         struct net *net = seq_file_net(seq);
2316         struct neigh_hash_table *nht = state->nht;
2317
2318         if (state->neigh_sub_iter) {
2319                 void *v = state->neigh_sub_iter(state, n, pos);
2320                 if (v)
2321                         return n;
2322         }
2323         n = rcu_dereference_bh(n->next);
2324
2325         while (1) {
2326                 while (n) {
2327                         if (!net_eq(dev_net(n->dev), net))
2328                                 goto next;
2329                         if (state->neigh_sub_iter) {
2330                                 void *v = state->neigh_sub_iter(state, n, pos);
2331                                 if (v)
2332                                         return n;
2333                                 goto next;
2334                         }
2335                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2336                                 break;
2337
2338                         if (n->nud_state & ~NUD_NOARP)
2339                                 break;
2340 next:
2341                         n = rcu_dereference_bh(n->next);
2342                 }
2343
2344                 if (n)
2345                         break;
2346
2347                 if (++state->bucket >= (1 << nht->hash_shift))
2348                         break;
2349
2350                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2351         }
2352
2353         if (n && pos)
2354                 --(*pos);
2355         return n;
2356 }
2357
2358 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2359 {
2360         struct neighbour *n = neigh_get_first(seq);
2361
2362         if (n) {
2363                 --(*pos);
2364                 while (*pos) {
2365                         n = neigh_get_next(seq, n, pos);
2366                         if (!n)
2367                                 break;
2368                 }
2369         }
2370         return *pos ? NULL : n;
2371 }
2372
2373 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2374 {
2375         struct neigh_seq_state *state = seq->private;
2376         struct net *net = seq_file_net(seq);
2377         struct neigh_table *tbl = state->tbl;
2378         struct pneigh_entry *pn = NULL;
2379         int bucket = state->bucket;
2380
2381         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2382         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2383                 pn = tbl->phash_buckets[bucket];
2384                 while (pn && !net_eq(pneigh_net(pn), net))
2385                         pn = pn->next;
2386                 if (pn)
2387                         break;
2388         }
2389         state->bucket = bucket;
2390
2391         return pn;
2392 }
2393
2394 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2395                                             struct pneigh_entry *pn,
2396                                             loff_t *pos)
2397 {
2398         struct neigh_seq_state *state = seq->private;
2399         struct net *net = seq_file_net(seq);
2400         struct neigh_table *tbl = state->tbl;
2401
2402         do {
2403                 pn = pn->next;
2404         } while (pn && !net_eq(pneigh_net(pn), net));
2405
2406         while (!pn) {
2407                 if (++state->bucket > PNEIGH_HASHMASK)
2408                         break;
2409                 pn = tbl->phash_buckets[state->bucket];
2410                 while (pn && !net_eq(pneigh_net(pn), net))
2411                         pn = pn->next;
2412                 if (pn)
2413                         break;
2414         }
2415
2416         if (pn && pos)
2417                 --(*pos);
2418
2419         return pn;
2420 }
2421
2422 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2423 {
2424         struct pneigh_entry *pn = pneigh_get_first(seq);
2425
2426         if (pn) {
2427                 --(*pos);
2428                 while (*pos) {
2429                         pn = pneigh_get_next(seq, pn, pos);
2430                         if (!pn)
2431                                 break;
2432                 }
2433         }
2434         return *pos ? NULL : pn;
2435 }
2436
2437 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2438 {
2439         struct neigh_seq_state *state = seq->private;
2440         void *rc;
2441         loff_t idxpos = *pos;
2442
2443         rc = neigh_get_idx(seq, &idxpos);
2444         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2445                 rc = pneigh_get_idx(seq, &idxpos);
2446
2447         return rc;
2448 }
2449
2450 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2451         __acquires(rcu_bh)
2452 {
2453         struct neigh_seq_state *state = seq->private;
2454
2455         state->tbl = tbl;
2456         state->bucket = 0;
2457         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2458
2459         rcu_read_lock_bh();
2460         state->nht = rcu_dereference_bh(tbl->nht);
2461
2462         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2463 }
2464 EXPORT_SYMBOL(neigh_seq_start);
2465
2466 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2467 {
2468         struct neigh_seq_state *state;
2469         void *rc;
2470
2471         if (v == SEQ_START_TOKEN) {
2472                 rc = neigh_get_first(seq);
2473                 goto out;
2474         }
2475
2476         state = seq->private;
2477         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2478                 rc = neigh_get_next(seq, v, NULL);
2479                 if (rc)
2480                         goto out;
2481                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2482                         rc = pneigh_get_first(seq);
2483         } else {
2484                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2485                 rc = pneigh_get_next(seq, v, NULL);
2486         }
2487 out:
2488         ++(*pos);
2489         return rc;
2490 }
2491 EXPORT_SYMBOL(neigh_seq_next);
2492
2493 void neigh_seq_stop(struct seq_file *seq, void *v)
2494         __releases(rcu_bh)
2495 {
2496         rcu_read_unlock_bh();
2497 }
2498 EXPORT_SYMBOL(neigh_seq_stop);
2499
2500 /* statistics via seq_file */
2501
2502 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2503 {
2504         struct neigh_table *tbl = seq->private;
2505         int cpu;
2506
2507         if (*pos == 0)
2508                 return SEQ_START_TOKEN;
2509
2510         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2511                 if (!cpu_possible(cpu))
2512                         continue;
2513                 *pos = cpu+1;
2514                 return per_cpu_ptr(tbl->stats, cpu);
2515         }
2516         return NULL;
2517 }
2518
2519 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2520 {
2521         struct neigh_table *tbl = seq->private;
2522         int cpu;
2523
2524         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2525                 if (!cpu_possible(cpu))
2526                         continue;
2527                 *pos = cpu+1;
2528                 return per_cpu_ptr(tbl->stats, cpu);
2529         }
2530         return NULL;
2531 }
2532
2533 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2534 {
2535
2536 }
2537
2538 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2539 {
2540         struct neigh_table *tbl = seq->private;
2541         struct neigh_statistics *st = v;
2542
2543         if (v == SEQ_START_TOKEN) {
2544                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2545                 return 0;
2546         }
2547
2548         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2549                         "%08lx %08lx  %08lx %08lx %08lx\n",
2550                    atomic_read(&tbl->entries),
2551
2552                    st->allocs,
2553                    st->destroys,
2554                    st->hash_grows,
2555
2556                    st->lookups,
2557                    st->hits,
2558
2559                    st->res_failed,
2560
2561                    st->rcv_probes_mcast,
2562                    st->rcv_probes_ucast,
2563
2564                    st->periodic_gc_runs,
2565                    st->forced_gc_runs,
2566                    st->unres_discards
2567                    );
2568
2569         return 0;
2570 }
2571
2572 static const struct seq_operations neigh_stat_seq_ops = {
2573         .start  = neigh_stat_seq_start,
2574         .next   = neigh_stat_seq_next,
2575         .stop   = neigh_stat_seq_stop,
2576         .show   = neigh_stat_seq_show,
2577 };
2578
2579 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2580 {
2581         int ret = seq_open(file, &neigh_stat_seq_ops);
2582
2583         if (!ret) {
2584                 struct seq_file *sf = file->private_data;
2585                 sf->private = PDE(inode)->data;
2586         }
2587         return ret;
2588 };
2589
2590 static const struct file_operations neigh_stat_seq_fops = {
2591         .owner   = THIS_MODULE,
2592         .open    = neigh_stat_seq_open,
2593         .read    = seq_read,
2594         .llseek  = seq_lseek,
2595         .release = seq_release,
2596 };
2597
2598 #endif /* CONFIG_PROC_FS */
2599
2600 static inline size_t neigh_nlmsg_size(void)
2601 {
2602         return NLMSG_ALIGN(sizeof(struct ndmsg))
2603                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2604                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2605                + nla_total_size(sizeof(struct nda_cacheinfo))
2606                + nla_total_size(4); /* NDA_PROBES */
2607 }
2608
2609 static void __neigh_notify(struct neighbour *n, int type, int flags)
2610 {
2611         struct net *net = dev_net(n->dev);
2612         struct sk_buff *skb;
2613         int err = -ENOBUFS;
2614
2615         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2616         if (skb == NULL)
2617                 goto errout;
2618
2619         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2620         if (err < 0) {
2621                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2622                 WARN_ON(err == -EMSGSIZE);
2623                 kfree_skb(skb);
2624                 goto errout;
2625         }
2626         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2627         return;
2628 errout:
2629         if (err < 0)
2630                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2631 }
2632
2633 #ifdef CONFIG_ARPD
2634 void neigh_app_ns(struct neighbour *n)
2635 {
2636         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2637 }
2638 EXPORT_SYMBOL(neigh_app_ns);
2639 #endif /* CONFIG_ARPD */
2640
2641 #ifdef CONFIG_SYSCTL
2642
2643 #define NEIGH_VARS_MAX 19
2644
2645 static struct neigh_sysctl_table {
2646         struct ctl_table_header *sysctl_header;
2647         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2648         char *dev_name;
2649 } neigh_sysctl_template __read_mostly = {
2650         .neigh_vars = {
2651                 {
2652                         .procname       = "mcast_solicit",
2653                         .maxlen         = sizeof(int),
2654                         .mode           = 0644,
2655                         .proc_handler   = proc_dointvec,
2656                 },
2657                 {
2658                         .procname       = "ucast_solicit",
2659                         .maxlen         = sizeof(int),
2660                         .mode           = 0644,
2661                         .proc_handler   = proc_dointvec,
2662                 },
2663                 {
2664                         .procname       = "app_solicit",
2665                         .maxlen         = sizeof(int),
2666                         .mode           = 0644,
2667                         .proc_handler   = proc_dointvec,
2668                 },
2669                 {
2670                         .procname       = "retrans_time",
2671                         .maxlen         = sizeof(int),
2672                         .mode           = 0644,
2673                         .proc_handler   = proc_dointvec_userhz_jiffies,
2674                 },
2675                 {
2676                         .procname       = "base_reachable_time",
2677                         .maxlen         = sizeof(int),
2678                         .mode           = 0644,
2679                         .proc_handler   = proc_dointvec_jiffies,
2680                 },
2681                 {
2682                         .procname       = "delay_first_probe_time",
2683                         .maxlen         = sizeof(int),
2684                         .mode           = 0644,
2685                         .proc_handler   = proc_dointvec_jiffies,
2686                 },
2687                 {
2688                         .procname       = "gc_stale_time",
2689                         .maxlen         = sizeof(int),
2690                         .mode           = 0644,
2691                         .proc_handler   = proc_dointvec_jiffies,
2692                 },
2693                 {
2694                         .procname       = "unres_qlen",
2695                         .maxlen         = sizeof(int),
2696                         .mode           = 0644,
2697                         .proc_handler   = proc_dointvec,
2698                 },
2699                 {
2700                         .procname       = "proxy_qlen",
2701                         .maxlen         = sizeof(int),
2702                         .mode           = 0644,
2703                         .proc_handler   = proc_dointvec,
2704                 },
2705                 {
2706                         .procname       = "anycast_delay",
2707                         .maxlen         = sizeof(int),
2708                         .mode           = 0644,
2709                         .proc_handler   = proc_dointvec_userhz_jiffies,
2710                 },
2711                 {
2712                         .procname       = "proxy_delay",
2713                         .maxlen         = sizeof(int),
2714                         .mode           = 0644,
2715                         .proc_handler   = proc_dointvec_userhz_jiffies,
2716                 },
2717                 {
2718                         .procname       = "locktime",
2719                         .maxlen         = sizeof(int),
2720                         .mode           = 0644,
2721                         .proc_handler   = proc_dointvec_userhz_jiffies,
2722                 },
2723                 {
2724                         .procname       = "retrans_time_ms",
2725                         .maxlen         = sizeof(int),
2726                         .mode           = 0644,
2727                         .proc_handler   = proc_dointvec_ms_jiffies,
2728                 },
2729                 {
2730                         .procname       = "base_reachable_time_ms",
2731                         .maxlen         = sizeof(int),
2732                         .mode           = 0644,
2733                         .proc_handler   = proc_dointvec_ms_jiffies,
2734                 },
2735                 {
2736                         .procname       = "gc_interval",
2737                         .maxlen         = sizeof(int),
2738                         .mode           = 0644,
2739                         .proc_handler   = proc_dointvec_jiffies,
2740                 },
2741                 {
2742                         .procname       = "gc_thresh1",
2743                         .maxlen         = sizeof(int),
2744                         .mode           = 0644,
2745                         .proc_handler   = proc_dointvec,
2746                 },
2747                 {
2748                         .procname       = "gc_thresh2",
2749                         .maxlen         = sizeof(int),
2750                         .mode           = 0644,
2751                         .proc_handler   = proc_dointvec,
2752                 },
2753                 {
2754                         .procname       = "gc_thresh3",
2755                         .maxlen         = sizeof(int),
2756                         .mode           = 0644,
2757                         .proc_handler   = proc_dointvec,
2758                 },
2759                 {},
2760         },
2761 };
2762
2763 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2764                           char *p_name, proc_handler *handler)
2765 {
2766         struct neigh_sysctl_table *t;
2767         const char *dev_name_source = NULL;
2768
2769 #define NEIGH_CTL_PATH_ROOT     0
2770 #define NEIGH_CTL_PATH_PROTO    1
2771 #define NEIGH_CTL_PATH_NEIGH    2
2772 #define NEIGH_CTL_PATH_DEV      3
2773
2774         struct ctl_path neigh_path[] = {
2775                 { .procname = "net",     },
2776                 { .procname = "proto",   },
2777                 { .procname = "neigh",   },
2778                 { .procname = "default", },
2779                 { },
2780         };
2781
2782         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2783         if (!t)
2784                 goto err;
2785
2786         t->neigh_vars[0].data  = &p->mcast_probes;
2787         t->neigh_vars[1].data  = &p->ucast_probes;
2788         t->neigh_vars[2].data  = &p->app_probes;
2789         t->neigh_vars[3].data  = &p->retrans_time;
2790         t->neigh_vars[4].data  = &p->base_reachable_time;
2791         t->neigh_vars[5].data  = &p->delay_probe_time;
2792         t->neigh_vars[6].data  = &p->gc_staletime;
2793         t->neigh_vars[7].data  = &p->queue_len;
2794         t->neigh_vars[8].data  = &p->proxy_qlen;
2795         t->neigh_vars[9].data  = &p->anycast_delay;
2796         t->neigh_vars[10].data = &p->proxy_delay;
2797         t->neigh_vars[11].data = &p->locktime;
2798         t->neigh_vars[12].data  = &p->retrans_time;
2799         t->neigh_vars[13].data  = &p->base_reachable_time;
2800
2801         if (dev) {
2802                 dev_name_source = dev->name;
2803                 /* Terminate the table early */
2804                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2805         } else {
2806                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2807                 t->neigh_vars[14].data = (int *)(p + 1);
2808                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2809                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2810                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2811         }
2812
2813
2814         if (handler) {
2815                 /* RetransTime */
2816                 t->neigh_vars[3].proc_handler = handler;
2817                 t->neigh_vars[3].extra1 = dev;
2818                 /* ReachableTime */
2819                 t->neigh_vars[4].proc_handler = handler;
2820                 t->neigh_vars[4].extra1 = dev;
2821                 /* RetransTime (in milliseconds)*/
2822                 t->neigh_vars[12].proc_handler = handler;
2823                 t->neigh_vars[12].extra1 = dev;
2824                 /* ReachableTime (in milliseconds) */
2825                 t->neigh_vars[13].proc_handler = handler;
2826                 t->neigh_vars[13].extra1 = dev;
2827         }
2828
2829         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2830         if (!t->dev_name)
2831                 goto free;
2832
2833         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2834         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2835
2836         t->sysctl_header =
2837                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2838         if (!t->sysctl_header)
2839                 goto free_procname;
2840
2841         p->sysctl_table = t;
2842         return 0;
2843
2844 free_procname:
2845         kfree(t->dev_name);
2846 free:
2847         kfree(t);
2848 err:
2849         return -ENOBUFS;
2850 }
2851 EXPORT_SYMBOL(neigh_sysctl_register);
2852
2853 void neigh_sysctl_unregister(struct neigh_parms *p)
2854 {
2855         if (p->sysctl_table) {
2856                 struct neigh_sysctl_table *t = p->sysctl_table;
2857                 p->sysctl_table = NULL;
2858                 unregister_sysctl_table(t->sysctl_header);
2859                 kfree(t->dev_name);
2860                 kfree(t);
2861         }
2862 }
2863 EXPORT_SYMBOL(neigh_sysctl_unregister);
2864
2865 #endif  /* CONFIG_SYSCTL */
2866
2867 static int __init neigh_init(void)
2868 {
2869         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2870         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2871         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2872
2873         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2874                       NULL);
2875         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2876
2877         return 0;
2878 }
2879
2880 subsys_initcall(neigh_init);
2881