Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->output = neigh_blackhole;
242                                 if (n->nud_state & NUD_VALID)
243                                         n->nud_state = NUD_NOARP;
244                                 else
245                                         n->nud_state = NUD_NONE;
246                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
247                         }
248                         write_unlock(&n->lock);
249                         neigh_cleanup_and_release(n);
250                 }
251         }
252 }
253
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256         write_lock_bh(&tbl->lock);
257         neigh_flush_dev(tbl, dev);
258         write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264         write_lock_bh(&tbl->lock);
265         neigh_flush_dev(tbl, dev);
266         pneigh_ifdown(tbl, dev);
267         write_unlock_bh(&tbl->lock);
268
269         del_timer_sync(&tbl->proxy_timer);
270         pneigh_queue_purge(&tbl->proxy_queue);
271         return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277         struct neighbour *n = NULL;
278         unsigned long now = jiffies;
279         int entries;
280
281         entries = atomic_inc_return(&tbl->entries) - 1;
282         if (entries >= tbl->gc_thresh3 ||
283             (entries >= tbl->gc_thresh2 &&
284              time_after(now, tbl->last_flush + 5 * HZ))) {
285                 if (!neigh_forced_gc(tbl) &&
286                     entries >= tbl->gc_thresh3)
287                         goto out_entries;
288         }
289
290         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291         if (!n)
292                 goto out_entries;
293
294         skb_queue_head_init(&n->arp_queue);
295         rwlock_init(&n->lock);
296         seqlock_init(&n->ha_lock);
297         n->updated        = n->used = now;
298         n->nud_state      = NUD_NONE;
299         n->output         = neigh_blackhole;
300         seqlock_init(&n->hh.hh_lock);
301         n->parms          = neigh_parms_clone(&tbl->parms);
302         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303
304         NEIGH_CACHE_STAT_INC(tbl, allocs);
305         n->tbl            = tbl;
306         atomic_set(&n->refcnt, 1);
307         n->dead           = 1;
308 out:
309         return n;
310
311 out_entries:
312         atomic_dec(&tbl->entries);
313         goto out;
314 }
315
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
317 {
318         size_t size = (1 << shift) * sizeof(struct neighbour *);
319         struct neigh_hash_table *ret;
320         struct neighbour __rcu **buckets;
321
322         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323         if (!ret)
324                 return NULL;
325         if (size <= PAGE_SIZE)
326                 buckets = kzalloc(size, GFP_ATOMIC);
327         else
328                 buckets = (struct neighbour __rcu **)
329                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330                                            get_order(size));
331         if (!buckets) {
332                 kfree(ret);
333                 return NULL;
334         }
335         ret->hash_buckets = buckets;
336         ret->hash_shift = shift;
337         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338         ret->hash_rnd |= 1;
339         return ret;
340 }
341
342 static void neigh_hash_free_rcu(struct rcu_head *head)
343 {
344         struct neigh_hash_table *nht = container_of(head,
345                                                     struct neigh_hash_table,
346                                                     rcu);
347         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
348         struct neighbour __rcu **buckets = nht->hash_buckets;
349
350         if (size <= PAGE_SIZE)
351                 kfree(buckets);
352         else
353                 free_pages((unsigned long)buckets, get_order(size));
354         kfree(nht);
355 }
356
357 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
358                                                 unsigned long new_shift)
359 {
360         unsigned int i, hash;
361         struct neigh_hash_table *new_nht, *old_nht;
362
363         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
364
365         old_nht = rcu_dereference_protected(tbl->nht,
366                                             lockdep_is_held(&tbl->lock));
367         new_nht = neigh_hash_alloc(new_shift);
368         if (!new_nht)
369                 return old_nht;
370
371         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
372                 struct neighbour *n, *next;
373
374                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375                                                    lockdep_is_held(&tbl->lock));
376                      n != NULL;
377                      n = next) {
378                         hash = tbl->hash(n->primary_key, n->dev,
379                                          new_nht->hash_rnd);
380
381                         hash >>= (32 - new_nht->hash_shift);
382                         next = rcu_dereference_protected(n->next,
383                                                 lockdep_is_held(&tbl->lock));
384
385                         rcu_assign_pointer(n->next,
386                                            rcu_dereference_protected(
387                                                 new_nht->hash_buckets[hash],
388                                                 lockdep_is_held(&tbl->lock)));
389                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390                 }
391         }
392
393         rcu_assign_pointer(tbl->nht, new_nht);
394         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395         return new_nht;
396 }
397
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399                                struct net_device *dev)
400 {
401         struct neighbour *n;
402         int key_len = tbl->key_len;
403         u32 hash_val;
404         struct neigh_hash_table *nht;
405
406         NEIGH_CACHE_STAT_INC(tbl, lookups);
407
408         rcu_read_lock_bh();
409         nht = rcu_dereference_bh(tbl->nht);
410         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
411
412         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413              n != NULL;
414              n = rcu_dereference_bh(n->next)) {
415                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416                         if (!atomic_inc_not_zero(&n->refcnt))
417                                 n = NULL;
418                         NEIGH_CACHE_STAT_INC(tbl, hits);
419                         break;
420                 }
421         }
422
423         rcu_read_unlock_bh();
424         return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429                                      const void *pkey)
430 {
431         struct neighbour *n;
432         int key_len = tbl->key_len;
433         u32 hash_val;
434         struct neigh_hash_table *nht;
435
436         NEIGH_CACHE_STAT_INC(tbl, lookups);
437
438         rcu_read_lock_bh();
439         nht = rcu_dereference_bh(tbl->nht);
440         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
441
442         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443              n != NULL;
444              n = rcu_dereference_bh(n->next)) {
445                 if (!memcmp(n->primary_key, pkey, key_len) &&
446                     net_eq(dev_net(n->dev), net)) {
447                         if (!atomic_inc_not_zero(&n->refcnt))
448                                 n = NULL;
449                         NEIGH_CACHE_STAT_INC(tbl, hits);
450                         break;
451                 }
452         }
453
454         rcu_read_unlock_bh();
455         return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         u32 hash_val;
463         int key_len = tbl->key_len;
464         int error;
465         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466         struct neigh_hash_table *nht;
467
468         if (!n) {
469                 rc = ERR_PTR(-ENOBUFS);
470                 goto out;
471         }
472
473         memcpy(n->primary_key, pkey, key_len);
474         n->dev = dev;
475         dev_hold(dev);
476
477         /* Protocol specific setup. */
478         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
479                 rc = ERR_PTR(error);
480                 goto out_neigh_release;
481         }
482
483         /* Device specific setup. */
484         if (n->parms->neigh_setup &&
485             (error = n->parms->neigh_setup(n)) < 0) {
486                 rc = ERR_PTR(error);
487                 goto out_neigh_release;
488         }
489
490         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491
492         write_lock_bh(&tbl->lock);
493         nht = rcu_dereference_protected(tbl->nht,
494                                         lockdep_is_held(&tbl->lock));
495
496         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
497                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
498
499         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
500
501         if (n->parms->dead) {
502                 rc = ERR_PTR(-EINVAL);
503                 goto out_tbl_unlock;
504         }
505
506         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507                                             lockdep_is_held(&tbl->lock));
508              n1 != NULL;
509              n1 = rcu_dereference_protected(n1->next,
510                         lockdep_is_held(&tbl->lock))) {
511                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512                         neigh_hold(n1);
513                         rc = n1;
514                         goto out_tbl_unlock;
515                 }
516         }
517
518         n->dead = 0;
519         neigh_hold(n);
520         rcu_assign_pointer(n->next,
521                            rcu_dereference_protected(nht->hash_buckets[hash_val],
522                                                      lockdep_is_held(&tbl->lock)));
523         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524         write_unlock_bh(&tbl->lock);
525         NEIGH_PRINTK2("neigh %p is created.\n", n);
526         rc = n;
527 out:
528         return rc;
529 out_tbl_unlock:
530         write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532         neigh_release(n);
533         goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539         u32 hash_val = *(u32 *)(pkey + key_len - 4);
540         hash_val ^= (hash_val >> 16);
541         hash_val ^= hash_val >> 8;
542         hash_val ^= hash_val >> 4;
543         hash_val &= PNEIGH_HASHMASK;
544         return hash_val;
545 }
546
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548                                               struct net *net,
549                                               const void *pkey,
550                                               int key_len,
551                                               struct net_device *dev)
552 {
553         while (n) {
554                 if (!memcmp(n->key, pkey, key_len) &&
555                     net_eq(pneigh_net(n), net) &&
556                     (n->dev == dev || !n->dev))
557                         return n;
558                 n = n->next;
559         }
560         return NULL;
561 }
562
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564                 struct net *net, const void *pkey, struct net_device *dev)
565 {
566         int key_len = tbl->key_len;
567         u32 hash_val = pneigh_hash(pkey, key_len);
568
569         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570                                  net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575                                     struct net *net, const void *pkey,
576                                     struct net_device *dev, int creat)
577 {
578         struct pneigh_entry *n;
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         read_lock_bh(&tbl->lock);
583         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584                               net, pkey, key_len, dev);
585         read_unlock_bh(&tbl->lock);
586
587         if (n || !creat)
588                 goto out;
589
590         ASSERT_RTNL();
591
592         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593         if (!n)
594                 goto out;
595
596         write_pnet(&n->net, hold_net(net));
597         memcpy(n->key, pkey, key_len);
598         n->dev = dev;
599         if (dev)
600                 dev_hold(dev);
601
602         if (tbl->pconstructor && tbl->pconstructor(n)) {
603                 if (dev)
604                         dev_put(dev);
605                 release_net(net);
606                 kfree(n);
607                 n = NULL;
608                 goto out;
609         }
610
611         write_lock_bh(&tbl->lock);
612         n->next = tbl->phash_buckets[hash_val];
613         tbl->phash_buckets[hash_val] = n;
614         write_unlock_bh(&tbl->lock);
615 out:
616         return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619
620
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622                   struct net_device *dev)
623 {
624         struct pneigh_entry *n, **np;
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         write_lock_bh(&tbl->lock);
629         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630              np = &n->next) {
631                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632                     net_eq(pneigh_net(n), net)) {
633                         *np = n->next;
634                         write_unlock_bh(&tbl->lock);
635                         if (tbl->pdestructor)
636                                 tbl->pdestructor(n);
637                         if (n->dev)
638                                 dev_put(n->dev);
639                         release_net(pneigh_net(n));
640                         kfree(n);
641                         return 0;
642                 }
643         }
644         write_unlock_bh(&tbl->lock);
645         return -ENOENT;
646 }
647
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         u32 h;
652
653         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654                 np = &tbl->phash_buckets[h];
655                 while ((n = *np) != NULL) {
656                         if (!dev || n->dev == dev) {
657                                 *np = n->next;
658                                 if (tbl->pdestructor)
659                                         tbl->pdestructor(n);
660                                 if (n->dev)
661                                         dev_put(n->dev);
662                                 release_net(pneigh_net(n));
663                                 kfree(n);
664                                 continue;
665                         }
666                         np = &n->next;
667                 }
668         }
669         return -ENOENT;
670 }
671
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676         if (atomic_dec_and_test(&parms->refcnt))
677                 neigh_parms_destroy(parms);
678 }
679
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *      neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
693
694         if (!neigh->dead) {
695                 printk(KERN_WARNING
696                        "Destroying alive neighbour %p\n", neigh);
697                 dump_stack();
698                 return;
699         }
700
701         if (neigh_del_timer(neigh))
702                 printk(KERN_WARNING "Impossible event.\n");
703
704         skb_queue_purge(&neigh->arp_queue);
705
706         dev_put(neigh->dev);
707         neigh_parms_put(neigh->parms);
708
709         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
710
711         atomic_dec(&neigh->tbl->entries);
712         call_rcu(&neigh->rcu, neigh_destroy_rcu);
713 }
714 EXPORT_SYMBOL(neigh_destroy);
715
716 /* Neighbour state is suspicious;
717    disable fast path.
718
719    Called with write_locked neigh.
720  */
721 static void neigh_suspect(struct neighbour *neigh)
722 {
723         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
724
725         neigh->output = neigh->ops->output;
726 }
727
728 /* Neighbour state is OK;
729    enable fast path.
730
731    Called with write_locked neigh.
732  */
733 static void neigh_connect(struct neighbour *neigh)
734 {
735         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
736
737         neigh->output = neigh->ops->connected_output;
738 }
739
740 static void neigh_periodic_work(struct work_struct *work)
741 {
742         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
743         struct neighbour *n;
744         struct neighbour __rcu **np;
745         unsigned int i;
746         struct neigh_hash_table *nht;
747
748         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
749
750         write_lock_bh(&tbl->lock);
751         nht = rcu_dereference_protected(tbl->nht,
752                                         lockdep_is_held(&tbl->lock));
753
754         /*
755          *      periodically recompute ReachableTime from random function
756          */
757
758         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
759                 struct neigh_parms *p;
760                 tbl->last_rand = jiffies;
761                 for (p = &tbl->parms; p; p = p->next)
762                         p->reachable_time =
763                                 neigh_rand_reach_time(p->base_reachable_time);
764         }
765
766         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
767                 np = &nht->hash_buckets[i];
768
769                 while ((n = rcu_dereference_protected(*np,
770                                 lockdep_is_held(&tbl->lock))) != NULL) {
771                         unsigned int state;
772
773                         write_lock(&n->lock);
774
775                         state = n->nud_state;
776                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
777                                 write_unlock(&n->lock);
778                                 goto next_elt;
779                         }
780
781                         if (time_before(n->used, n->confirmed))
782                                 n->used = n->confirmed;
783
784                         if (atomic_read(&n->refcnt) == 1 &&
785                             (state == NUD_FAILED ||
786                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
787                                 *np = n->next;
788                                 n->dead = 1;
789                                 write_unlock(&n->lock);
790                                 neigh_cleanup_and_release(n);
791                                 continue;
792                         }
793                         write_unlock(&n->lock);
794
795 next_elt:
796                         np = &n->next;
797                 }
798                 /*
799                  * It's fine to release lock here, even if hash table
800                  * grows while we are preempted.
801                  */
802                 write_unlock_bh(&tbl->lock);
803                 cond_resched();
804                 write_lock_bh(&tbl->lock);
805         }
806         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
807          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
808          * base_reachable_time.
809          */
810         schedule_delayed_work(&tbl->gc_work,
811                               tbl->parms.base_reachable_time >> 1);
812         write_unlock_bh(&tbl->lock);
813 }
814
815 static __inline__ int neigh_max_probes(struct neighbour *n)
816 {
817         struct neigh_parms *p = n->parms;
818         return (n->nud_state & NUD_PROBE) ?
819                 p->ucast_probes :
820                 p->ucast_probes + p->app_probes + p->mcast_probes;
821 }
822
823 static void neigh_invalidate(struct neighbour *neigh)
824         __releases(neigh->lock)
825         __acquires(neigh->lock)
826 {
827         struct sk_buff *skb;
828
829         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
830         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
831         neigh->updated = jiffies;
832
833         /* It is very thin place. report_unreachable is very complicated
834            routine. Particularly, it can hit the same neighbour entry!
835
836            So that, we try to be accurate and avoid dead loop. --ANK
837          */
838         while (neigh->nud_state == NUD_FAILED &&
839                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
840                 write_unlock(&neigh->lock);
841                 neigh->ops->error_report(neigh, skb);
842                 write_lock(&neigh->lock);
843         }
844         skb_queue_purge(&neigh->arp_queue);
845 }
846
847 static void neigh_probe(struct neighbour *neigh)
848         __releases(neigh->lock)
849 {
850         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
851         /* keep skb alive even if arp_queue overflows */
852         if (skb)
853                 skb = skb_copy(skb, GFP_ATOMIC);
854         write_unlock(&neigh->lock);
855         neigh->ops->solicit(neigh, skb);
856         atomic_inc(&neigh->probes);
857         kfree_skb(skb);
858 }
859
860 /* Called when a timer expires for a neighbour entry. */
861
862 static void neigh_timer_handler(unsigned long arg)
863 {
864         unsigned long now, next;
865         struct neighbour *neigh = (struct neighbour *)arg;
866         unsigned state;
867         int notify = 0;
868
869         write_lock(&neigh->lock);
870
871         state = neigh->nud_state;
872         now = jiffies;
873         next = now + HZ;
874
875         if (!(state & NUD_IN_TIMER)) {
876 #ifndef CONFIG_SMP
877                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
878 #endif
879                 goto out;
880         }
881
882         if (state & NUD_REACHABLE) {
883                 if (time_before_eq(now,
884                                    neigh->confirmed + neigh->parms->reachable_time)) {
885                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
886                         next = neigh->confirmed + neigh->parms->reachable_time;
887                 } else if (time_before_eq(now,
888                                           neigh->used + neigh->parms->delay_probe_time)) {
889                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
890                         neigh->nud_state = NUD_DELAY;
891                         neigh->updated = jiffies;
892                         neigh_suspect(neigh);
893                         next = now + neigh->parms->delay_probe_time;
894                 } else {
895                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
896                         neigh->nud_state = NUD_STALE;
897                         neigh->updated = jiffies;
898                         neigh_suspect(neigh);
899                         notify = 1;
900                 }
901         } else if (state & NUD_DELAY) {
902                 if (time_before_eq(now,
903                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
904                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
905                         neigh->nud_state = NUD_REACHABLE;
906                         neigh->updated = jiffies;
907                         neigh_connect(neigh);
908                         notify = 1;
909                         next = neigh->confirmed + neigh->parms->reachable_time;
910                 } else {
911                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
912                         neigh->nud_state = NUD_PROBE;
913                         neigh->updated = jiffies;
914                         atomic_set(&neigh->probes, 0);
915                         next = now + neigh->parms->retrans_time;
916                 }
917         } else {
918                 /* NUD_PROBE|NUD_INCOMPLETE */
919                 next = now + neigh->parms->retrans_time;
920         }
921
922         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
923             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
924                 neigh->nud_state = NUD_FAILED;
925                 notify = 1;
926                 neigh_invalidate(neigh);
927         }
928
929         if (neigh->nud_state & NUD_IN_TIMER) {
930                 if (time_before(next, jiffies + HZ/2))
931                         next = jiffies + HZ/2;
932                 if (!mod_timer(&neigh->timer, next))
933                         neigh_hold(neigh);
934         }
935         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
936                 neigh_probe(neigh);
937         } else {
938 out:
939                 write_unlock(&neigh->lock);
940         }
941
942         if (notify)
943                 neigh_update_notify(neigh);
944
945         neigh_release(neigh);
946 }
947
948 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
949 {
950         int rc;
951         bool immediate_probe = false;
952
953         write_lock_bh(&neigh->lock);
954
955         rc = 0;
956         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
957                 goto out_unlock_bh;
958
959         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
960                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
961                         unsigned long next, now = jiffies;
962
963                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
964                         neigh->nud_state     = NUD_INCOMPLETE;
965                         neigh->updated = now;
966                         next = now + max(neigh->parms->retrans_time, HZ/2);
967                         neigh_add_timer(neigh, next);
968                         immediate_probe = true;
969                 } else {
970                         neigh->nud_state = NUD_FAILED;
971                         neigh->updated = jiffies;
972                         write_unlock_bh(&neigh->lock);
973
974                         kfree_skb(skb);
975                         return 1;
976                 }
977         } else if (neigh->nud_state & NUD_STALE) {
978                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
979                 neigh->nud_state = NUD_DELAY;
980                 neigh->updated = jiffies;
981                 neigh_add_timer(neigh,
982                                 jiffies + neigh->parms->delay_probe_time);
983         }
984
985         if (neigh->nud_state == NUD_INCOMPLETE) {
986                 if (skb) {
987                         if (skb_queue_len(&neigh->arp_queue) >=
988                             neigh->parms->queue_len) {
989                                 struct sk_buff *buff;
990                                 buff = __skb_dequeue(&neigh->arp_queue);
991                                 kfree_skb(buff);
992                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
993                         }
994                         skb_dst_force(skb);
995                         __skb_queue_tail(&neigh->arp_queue, skb);
996                 }
997                 rc = 1;
998         }
999 out_unlock_bh:
1000         if (immediate_probe)
1001                 neigh_probe(neigh);
1002         else
1003                 write_unlock(&neigh->lock);
1004         local_bh_enable();
1005         return rc;
1006 }
1007 EXPORT_SYMBOL(__neigh_event_send);
1008
1009 static void neigh_update_hhs(struct neighbour *neigh)
1010 {
1011         struct hh_cache *hh;
1012         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1013                 = NULL;
1014
1015         if (neigh->dev->header_ops)
1016                 update = neigh->dev->header_ops->cache_update;
1017
1018         if (update) {
1019                 hh = &neigh->hh;
1020                 if (hh->hh_len) {
1021                         write_seqlock_bh(&hh->hh_lock);
1022                         update(hh, neigh->dev, neigh->ha);
1023                         write_sequnlock_bh(&hh->hh_lock);
1024                 }
1025         }
1026 }
1027
1028
1029
1030 /* Generic update routine.
1031    -- lladdr is new lladdr or NULL, if it is not supplied.
1032    -- new    is new state.
1033    -- flags
1034         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1035                                 if it is different.
1036         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1037                                 lladdr instead of overriding it
1038                                 if it is different.
1039                                 It also allows to retain current state
1040                                 if lladdr is unchanged.
1041         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1042
1043         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1044                                 NTF_ROUTER flag.
1045         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1046                                 a router.
1047
1048    Caller MUST hold reference count on the entry.
1049  */
1050
1051 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1052                  u32 flags)
1053 {
1054         u8 old;
1055         int err;
1056         int notify = 0;
1057         struct net_device *dev;
1058         int update_isrouter = 0;
1059
1060         write_lock_bh(&neigh->lock);
1061
1062         dev    = neigh->dev;
1063         old    = neigh->nud_state;
1064         err    = -EPERM;
1065
1066         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1067             (old & (NUD_NOARP | NUD_PERMANENT)))
1068                 goto out;
1069
1070         if (!(new & NUD_VALID)) {
1071                 neigh_del_timer(neigh);
1072                 if (old & NUD_CONNECTED)
1073                         neigh_suspect(neigh);
1074                 neigh->nud_state = new;
1075                 err = 0;
1076                 notify = old & NUD_VALID;
1077                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1078                     (new & NUD_FAILED)) {
1079                         neigh_invalidate(neigh);
1080                         notify = 1;
1081                 }
1082                 goto out;
1083         }
1084
1085         /* Compare new lladdr with cached one */
1086         if (!dev->addr_len) {
1087                 /* First case: device needs no address. */
1088                 lladdr = neigh->ha;
1089         } else if (lladdr) {
1090                 /* The second case: if something is already cached
1091                    and a new address is proposed:
1092                    - compare new & old
1093                    - if they are different, check override flag
1094                  */
1095                 if ((old & NUD_VALID) &&
1096                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1097                         lladdr = neigh->ha;
1098         } else {
1099                 /* No address is supplied; if we know something,
1100                    use it, otherwise discard the request.
1101                  */
1102                 err = -EINVAL;
1103                 if (!(old & NUD_VALID))
1104                         goto out;
1105                 lladdr = neigh->ha;
1106         }
1107
1108         if (new & NUD_CONNECTED)
1109                 neigh->confirmed = jiffies;
1110         neigh->updated = jiffies;
1111
1112         /* If entry was valid and address is not changed,
1113            do not change entry state, if new one is STALE.
1114          */
1115         err = 0;
1116         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1117         if (old & NUD_VALID) {
1118                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1119                         update_isrouter = 0;
1120                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1121                             (old & NUD_CONNECTED)) {
1122                                 lladdr = neigh->ha;
1123                                 new = NUD_STALE;
1124                         } else
1125                                 goto out;
1126                 } else {
1127                         if (lladdr == neigh->ha && new == NUD_STALE &&
1128                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1129                              (old & NUD_CONNECTED))
1130                             )
1131                                 new = old;
1132                 }
1133         }
1134
1135         if (new != old) {
1136                 neigh_del_timer(neigh);
1137                 if (new & NUD_IN_TIMER)
1138                         neigh_add_timer(neigh, (jiffies +
1139                                                 ((new & NUD_REACHABLE) ?
1140                                                  neigh->parms->reachable_time :
1141                                                  0)));
1142                 neigh->nud_state = new;
1143         }
1144
1145         if (lladdr != neigh->ha) {
1146                 write_seqlock(&neigh->ha_lock);
1147                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1148                 write_sequnlock(&neigh->ha_lock);
1149                 neigh_update_hhs(neigh);
1150                 if (!(new & NUD_CONNECTED))
1151                         neigh->confirmed = jiffies -
1152                                       (neigh->parms->base_reachable_time << 1);
1153                 notify = 1;
1154         }
1155         if (new == old)
1156                 goto out;
1157         if (new & NUD_CONNECTED)
1158                 neigh_connect(neigh);
1159         else
1160                 neigh_suspect(neigh);
1161         if (!(old & NUD_VALID)) {
1162                 struct sk_buff *skb;
1163
1164                 /* Again: avoid dead loop if something went wrong */
1165
1166                 while (neigh->nud_state & NUD_VALID &&
1167                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1168                         struct dst_entry *dst = skb_dst(skb);
1169                         struct neighbour *n2, *n1 = neigh;
1170                         write_unlock_bh(&neigh->lock);
1171
1172                         rcu_read_lock();
1173                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1174                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1175                                 n1 = n2;
1176                         n1->output(n1, skb);
1177                         rcu_read_unlock();
1178
1179                         write_lock_bh(&neigh->lock);
1180                 }
1181                 skb_queue_purge(&neigh->arp_queue);
1182         }
1183 out:
1184         if (update_isrouter) {
1185                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1186                         (neigh->flags | NTF_ROUTER) :
1187                         (neigh->flags & ~NTF_ROUTER);
1188         }
1189         write_unlock_bh(&neigh->lock);
1190
1191         if (notify)
1192                 neigh_update_notify(neigh);
1193
1194         return err;
1195 }
1196 EXPORT_SYMBOL(neigh_update);
1197
1198 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1199                                  u8 *lladdr, void *saddr,
1200                                  struct net_device *dev)
1201 {
1202         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1203                                                  lladdr || !dev->addr_len);
1204         if (neigh)
1205                 neigh_update(neigh, lladdr, NUD_STALE,
1206                              NEIGH_UPDATE_F_OVERRIDE);
1207         return neigh;
1208 }
1209 EXPORT_SYMBOL(neigh_event_ns);
1210
1211 /* called with read_lock_bh(&n->lock); */
1212 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1213 {
1214         struct net_device *dev = dst->dev;
1215         __be16 prot = dst->ops->protocol;
1216         struct hh_cache *hh = &n->hh;
1217
1218         write_lock_bh(&n->lock);
1219
1220         /* Only one thread can come in here and initialize the
1221          * hh_cache entry.
1222          */
1223         if (!hh->hh_len)
1224                 dev->header_ops->cache(n, hh, prot);
1225
1226         write_unlock_bh(&n->lock);
1227 }
1228
1229 /* This function can be used in contexts, where only old dev_queue_xmit
1230  * worked, f.e. if you want to override normal output path (eql, shaper),
1231  * but resolution is not made yet.
1232  */
1233
1234 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1235 {
1236         struct net_device *dev = skb->dev;
1237
1238         __skb_pull(skb, skb_network_offset(skb));
1239
1240         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1241                             skb->len) < 0 &&
1242             dev->header_ops->rebuild(skb))
1243                 return 0;
1244
1245         return dev_queue_xmit(skb);
1246 }
1247 EXPORT_SYMBOL(neigh_compat_output);
1248
1249 /* Slow and careful. */
1250
1251 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1252 {
1253         struct dst_entry *dst = skb_dst(skb);
1254         int rc = 0;
1255
1256         if (!dst)
1257                 goto discard;
1258
1259         __skb_pull(skb, skb_network_offset(skb));
1260
1261         if (!neigh_event_send(neigh, skb)) {
1262                 int err;
1263                 struct net_device *dev = neigh->dev;
1264                 unsigned int seq;
1265
1266                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1267                         neigh_hh_init(neigh, dst);
1268
1269                 do {
1270                         seq = read_seqbegin(&neigh->ha_lock);
1271                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1272                                               neigh->ha, NULL, skb->len);
1273                 } while (read_seqretry(&neigh->ha_lock, seq));
1274
1275                 if (err >= 0)
1276                         rc = dev_queue_xmit(skb);
1277                 else
1278                         goto out_kfree_skb;
1279         }
1280 out:
1281         return rc;
1282 discard:
1283         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1284                       dst, neigh);
1285 out_kfree_skb:
1286         rc = -EINVAL;
1287         kfree_skb(skb);
1288         goto out;
1289 }
1290 EXPORT_SYMBOL(neigh_resolve_output);
1291
1292 /* As fast as possible without hh cache */
1293
1294 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1295 {
1296         struct net_device *dev = neigh->dev;
1297         unsigned int seq;
1298         int err;
1299
1300         __skb_pull(skb, skb_network_offset(skb));
1301
1302         do {
1303                 seq = read_seqbegin(&neigh->ha_lock);
1304                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1305                                       neigh->ha, NULL, skb->len);
1306         } while (read_seqretry(&neigh->ha_lock, seq));
1307
1308         if (err >= 0)
1309                 err = dev_queue_xmit(skb);
1310         else {
1311                 err = -EINVAL;
1312                 kfree_skb(skb);
1313         }
1314         return err;
1315 }
1316 EXPORT_SYMBOL(neigh_connected_output);
1317
1318 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1319 {
1320         return dev_queue_xmit(skb);
1321 }
1322 EXPORT_SYMBOL(neigh_direct_output);
1323
1324 static void neigh_proxy_process(unsigned long arg)
1325 {
1326         struct neigh_table *tbl = (struct neigh_table *)arg;
1327         long sched_next = 0;
1328         unsigned long now = jiffies;
1329         struct sk_buff *skb, *n;
1330
1331         spin_lock(&tbl->proxy_queue.lock);
1332
1333         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1334                 long tdif = NEIGH_CB(skb)->sched_next - now;
1335
1336                 if (tdif <= 0) {
1337                         struct net_device *dev = skb->dev;
1338
1339                         __skb_unlink(skb, &tbl->proxy_queue);
1340                         if (tbl->proxy_redo && netif_running(dev)) {
1341                                 rcu_read_lock();
1342                                 tbl->proxy_redo(skb);
1343                                 rcu_read_unlock();
1344                         } else {
1345                                 kfree_skb(skb);
1346                         }
1347
1348                         dev_put(dev);
1349                 } else if (!sched_next || tdif < sched_next)
1350                         sched_next = tdif;
1351         }
1352         del_timer(&tbl->proxy_timer);
1353         if (sched_next)
1354                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1355         spin_unlock(&tbl->proxy_queue.lock);
1356 }
1357
1358 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1359                     struct sk_buff *skb)
1360 {
1361         unsigned long now = jiffies;
1362         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1363
1364         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1365                 kfree_skb(skb);
1366                 return;
1367         }
1368
1369         NEIGH_CB(skb)->sched_next = sched_next;
1370         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1371
1372         spin_lock(&tbl->proxy_queue.lock);
1373         if (del_timer(&tbl->proxy_timer)) {
1374                 if (time_before(tbl->proxy_timer.expires, sched_next))
1375                         sched_next = tbl->proxy_timer.expires;
1376         }
1377         skb_dst_drop(skb);
1378         dev_hold(skb->dev);
1379         __skb_queue_tail(&tbl->proxy_queue, skb);
1380         mod_timer(&tbl->proxy_timer, sched_next);
1381         spin_unlock(&tbl->proxy_queue.lock);
1382 }
1383 EXPORT_SYMBOL(pneigh_enqueue);
1384
1385 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1386                                                       struct net *net, int ifindex)
1387 {
1388         struct neigh_parms *p;
1389
1390         for (p = &tbl->parms; p; p = p->next) {
1391                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1392                     (!p->dev && !ifindex))
1393                         return p;
1394         }
1395
1396         return NULL;
1397 }
1398
1399 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1400                                       struct neigh_table *tbl)
1401 {
1402         struct neigh_parms *p, *ref;
1403         struct net *net = dev_net(dev);
1404         const struct net_device_ops *ops = dev->netdev_ops;
1405
1406         ref = lookup_neigh_parms(tbl, net, 0);
1407         if (!ref)
1408                 return NULL;
1409
1410         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1411         if (p) {
1412                 p->tbl            = tbl;
1413                 atomic_set(&p->refcnt, 1);
1414                 p->reachable_time =
1415                                 neigh_rand_reach_time(p->base_reachable_time);
1416
1417                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1418                         kfree(p);
1419                         return NULL;
1420                 }
1421
1422                 dev_hold(dev);
1423                 p->dev = dev;
1424                 write_pnet(&p->net, hold_net(net));
1425                 p->sysctl_table = NULL;
1426                 write_lock_bh(&tbl->lock);
1427                 p->next         = tbl->parms.next;
1428                 tbl->parms.next = p;
1429                 write_unlock_bh(&tbl->lock);
1430         }
1431         return p;
1432 }
1433 EXPORT_SYMBOL(neigh_parms_alloc);
1434
1435 static void neigh_rcu_free_parms(struct rcu_head *head)
1436 {
1437         struct neigh_parms *parms =
1438                 container_of(head, struct neigh_parms, rcu_head);
1439
1440         neigh_parms_put(parms);
1441 }
1442
1443 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1444 {
1445         struct neigh_parms **p;
1446
1447         if (!parms || parms == &tbl->parms)
1448                 return;
1449         write_lock_bh(&tbl->lock);
1450         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1451                 if (*p == parms) {
1452                         *p = parms->next;
1453                         parms->dead = 1;
1454                         write_unlock_bh(&tbl->lock);
1455                         if (parms->dev)
1456                                 dev_put(parms->dev);
1457                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1458                         return;
1459                 }
1460         }
1461         write_unlock_bh(&tbl->lock);
1462         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1463 }
1464 EXPORT_SYMBOL(neigh_parms_release);
1465
1466 static void neigh_parms_destroy(struct neigh_parms *parms)
1467 {
1468         release_net(neigh_parms_net(parms));
1469         kfree(parms);
1470 }
1471
1472 static struct lock_class_key neigh_table_proxy_queue_class;
1473
1474 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1475 {
1476         unsigned long now = jiffies;
1477         unsigned long phsize;
1478
1479         write_pnet(&tbl->parms.net, &init_net);
1480         atomic_set(&tbl->parms.refcnt, 1);
1481         tbl->parms.reachable_time =
1482                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1483
1484         if (!tbl->kmem_cachep)
1485                 tbl->kmem_cachep =
1486                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1487                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1488                                           NULL);
1489         tbl->stats = alloc_percpu(struct neigh_statistics);
1490         if (!tbl->stats)
1491                 panic("cannot create neighbour cache statistics");
1492
1493 #ifdef CONFIG_PROC_FS
1494         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1495                               &neigh_stat_seq_fops, tbl))
1496                 panic("cannot create neighbour proc dir entry");
1497 #endif
1498
1499         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1500
1501         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1502         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1503
1504         if (!tbl->nht || !tbl->phash_buckets)
1505                 panic("cannot allocate neighbour cache hashes");
1506
1507         rwlock_init(&tbl->lock);
1508         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1509         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1510         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1511         skb_queue_head_init_class(&tbl->proxy_queue,
1512                         &neigh_table_proxy_queue_class);
1513
1514         tbl->last_flush = now;
1515         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1516 }
1517 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1518
1519 void neigh_table_init(struct neigh_table *tbl)
1520 {
1521         struct neigh_table *tmp;
1522
1523         neigh_table_init_no_netlink(tbl);
1524         write_lock(&neigh_tbl_lock);
1525         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1526                 if (tmp->family == tbl->family)
1527                         break;
1528         }
1529         tbl->next       = neigh_tables;
1530         neigh_tables    = tbl;
1531         write_unlock(&neigh_tbl_lock);
1532
1533         if (unlikely(tmp)) {
1534                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1535                        "family %d\n", tbl->family);
1536                 dump_stack();
1537         }
1538 }
1539 EXPORT_SYMBOL(neigh_table_init);
1540
1541 int neigh_table_clear(struct neigh_table *tbl)
1542 {
1543         struct neigh_table **tp;
1544
1545         /* It is not clean... Fix it to unload IPv6 module safely */
1546         cancel_delayed_work_sync(&tbl->gc_work);
1547         del_timer_sync(&tbl->proxy_timer);
1548         pneigh_queue_purge(&tbl->proxy_queue);
1549         neigh_ifdown(tbl, NULL);
1550         if (atomic_read(&tbl->entries))
1551                 printk(KERN_CRIT "neighbour leakage\n");
1552         write_lock(&neigh_tbl_lock);
1553         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1554                 if (*tp == tbl) {
1555                         *tp = tbl->next;
1556                         break;
1557                 }
1558         }
1559         write_unlock(&neigh_tbl_lock);
1560
1561         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1562                  neigh_hash_free_rcu);
1563         tbl->nht = NULL;
1564
1565         kfree(tbl->phash_buckets);
1566         tbl->phash_buckets = NULL;
1567
1568         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1569
1570         free_percpu(tbl->stats);
1571         tbl->stats = NULL;
1572
1573         kmem_cache_destroy(tbl->kmem_cachep);
1574         tbl->kmem_cachep = NULL;
1575
1576         return 0;
1577 }
1578 EXPORT_SYMBOL(neigh_table_clear);
1579
1580 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1581 {
1582         struct net *net = sock_net(skb->sk);
1583         struct ndmsg *ndm;
1584         struct nlattr *dst_attr;
1585         struct neigh_table *tbl;
1586         struct net_device *dev = NULL;
1587         int err = -EINVAL;
1588
1589         ASSERT_RTNL();
1590         if (nlmsg_len(nlh) < sizeof(*ndm))
1591                 goto out;
1592
1593         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1594         if (dst_attr == NULL)
1595                 goto out;
1596
1597         ndm = nlmsg_data(nlh);
1598         if (ndm->ndm_ifindex) {
1599                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1600                 if (dev == NULL) {
1601                         err = -ENODEV;
1602                         goto out;
1603                 }
1604         }
1605
1606         read_lock(&neigh_tbl_lock);
1607         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1608                 struct neighbour *neigh;
1609
1610                 if (tbl->family != ndm->ndm_family)
1611                         continue;
1612                 read_unlock(&neigh_tbl_lock);
1613
1614                 if (nla_len(dst_attr) < tbl->key_len)
1615                         goto out;
1616
1617                 if (ndm->ndm_flags & NTF_PROXY) {
1618                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1619                         goto out;
1620                 }
1621
1622                 if (dev == NULL)
1623                         goto out;
1624
1625                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1626                 if (neigh == NULL) {
1627                         err = -ENOENT;
1628                         goto out;
1629                 }
1630
1631                 err = neigh_update(neigh, NULL, NUD_FAILED,
1632                                    NEIGH_UPDATE_F_OVERRIDE |
1633                                    NEIGH_UPDATE_F_ADMIN);
1634                 neigh_release(neigh);
1635                 goto out;
1636         }
1637         read_unlock(&neigh_tbl_lock);
1638         err = -EAFNOSUPPORT;
1639
1640 out:
1641         return err;
1642 }
1643
1644 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1645 {
1646         struct net *net = sock_net(skb->sk);
1647         struct ndmsg *ndm;
1648         struct nlattr *tb[NDA_MAX+1];
1649         struct neigh_table *tbl;
1650         struct net_device *dev = NULL;
1651         int err;
1652
1653         ASSERT_RTNL();
1654         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1655         if (err < 0)
1656                 goto out;
1657
1658         err = -EINVAL;
1659         if (tb[NDA_DST] == NULL)
1660                 goto out;
1661
1662         ndm = nlmsg_data(nlh);
1663         if (ndm->ndm_ifindex) {
1664                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1665                 if (dev == NULL) {
1666                         err = -ENODEV;
1667                         goto out;
1668                 }
1669
1670                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1671                         goto out;
1672         }
1673
1674         read_lock(&neigh_tbl_lock);
1675         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1676                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1677                 struct neighbour *neigh;
1678                 void *dst, *lladdr;
1679
1680                 if (tbl->family != ndm->ndm_family)
1681                         continue;
1682                 read_unlock(&neigh_tbl_lock);
1683
1684                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1685                         goto out;
1686                 dst = nla_data(tb[NDA_DST]);
1687                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1688
1689                 if (ndm->ndm_flags & NTF_PROXY) {
1690                         struct pneigh_entry *pn;
1691
1692                         err = -ENOBUFS;
1693                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1694                         if (pn) {
1695                                 pn->flags = ndm->ndm_flags;
1696                                 err = 0;
1697                         }
1698                         goto out;
1699                 }
1700
1701                 if (dev == NULL)
1702                         goto out;
1703
1704                 neigh = neigh_lookup(tbl, dst, dev);
1705                 if (neigh == NULL) {
1706                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1707                                 err = -ENOENT;
1708                                 goto out;
1709                         }
1710
1711                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1712                         if (IS_ERR(neigh)) {
1713                                 err = PTR_ERR(neigh);
1714                                 goto out;
1715                         }
1716                 } else {
1717                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1718                                 err = -EEXIST;
1719                                 neigh_release(neigh);
1720                                 goto out;
1721                         }
1722
1723                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1724                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1725                 }
1726
1727                 if (ndm->ndm_flags & NTF_USE) {
1728                         neigh_event_send(neigh, NULL);
1729                         err = 0;
1730                 } else
1731                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1732                 neigh_release(neigh);
1733                 goto out;
1734         }
1735
1736         read_unlock(&neigh_tbl_lock);
1737         err = -EAFNOSUPPORT;
1738 out:
1739         return err;
1740 }
1741
1742 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1743 {
1744         struct nlattr *nest;
1745
1746         nest = nla_nest_start(skb, NDTA_PARMS);
1747         if (nest == NULL)
1748                 return -ENOBUFS;
1749
1750         if (parms->dev)
1751                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1752
1753         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1754         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1755         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1756         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1757         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1758         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1759         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1760         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1761                       parms->base_reachable_time);
1762         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1763         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1764         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1765         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1766         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1767         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1768
1769         return nla_nest_end(skb, nest);
1770
1771 nla_put_failure:
1772         nla_nest_cancel(skb, nest);
1773         return -EMSGSIZE;
1774 }
1775
1776 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1777                               u32 pid, u32 seq, int type, int flags)
1778 {
1779         struct nlmsghdr *nlh;
1780         struct ndtmsg *ndtmsg;
1781
1782         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1783         if (nlh == NULL)
1784                 return -EMSGSIZE;
1785
1786         ndtmsg = nlmsg_data(nlh);
1787
1788         read_lock_bh(&tbl->lock);
1789         ndtmsg->ndtm_family = tbl->family;
1790         ndtmsg->ndtm_pad1   = 0;
1791         ndtmsg->ndtm_pad2   = 0;
1792
1793         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1794         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1795         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1796         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1797         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1798
1799         {
1800                 unsigned long now = jiffies;
1801                 unsigned int flush_delta = now - tbl->last_flush;
1802                 unsigned int rand_delta = now - tbl->last_rand;
1803                 struct neigh_hash_table *nht;
1804                 struct ndt_config ndc = {
1805                         .ndtc_key_len           = tbl->key_len,
1806                         .ndtc_entry_size        = tbl->entry_size,
1807                         .ndtc_entries           = atomic_read(&tbl->entries),
1808                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1809                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1810                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1811                 };
1812
1813                 rcu_read_lock_bh();
1814                 nht = rcu_dereference_bh(tbl->nht);
1815                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1816                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1817                 rcu_read_unlock_bh();
1818
1819                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1820         }
1821
1822         {
1823                 int cpu;
1824                 struct ndt_stats ndst;
1825
1826                 memset(&ndst, 0, sizeof(ndst));
1827
1828                 for_each_possible_cpu(cpu) {
1829                         struct neigh_statistics *st;
1830
1831                         st = per_cpu_ptr(tbl->stats, cpu);
1832                         ndst.ndts_allocs                += st->allocs;
1833                         ndst.ndts_destroys              += st->destroys;
1834                         ndst.ndts_hash_grows            += st->hash_grows;
1835                         ndst.ndts_res_failed            += st->res_failed;
1836                         ndst.ndts_lookups               += st->lookups;
1837                         ndst.ndts_hits                  += st->hits;
1838                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1839                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1840                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1841                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1842                 }
1843
1844                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1845         }
1846
1847         BUG_ON(tbl->parms.dev);
1848         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1849                 goto nla_put_failure;
1850
1851         read_unlock_bh(&tbl->lock);
1852         return nlmsg_end(skb, nlh);
1853
1854 nla_put_failure:
1855         read_unlock_bh(&tbl->lock);
1856         nlmsg_cancel(skb, nlh);
1857         return -EMSGSIZE;
1858 }
1859
1860 static int neightbl_fill_param_info(struct sk_buff *skb,
1861                                     struct neigh_table *tbl,
1862                                     struct neigh_parms *parms,
1863                                     u32 pid, u32 seq, int type,
1864                                     unsigned int flags)
1865 {
1866         struct ndtmsg *ndtmsg;
1867         struct nlmsghdr *nlh;
1868
1869         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1870         if (nlh == NULL)
1871                 return -EMSGSIZE;
1872
1873         ndtmsg = nlmsg_data(nlh);
1874
1875         read_lock_bh(&tbl->lock);
1876         ndtmsg->ndtm_family = tbl->family;
1877         ndtmsg->ndtm_pad1   = 0;
1878         ndtmsg->ndtm_pad2   = 0;
1879
1880         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1881             neightbl_fill_parms(skb, parms) < 0)
1882                 goto errout;
1883
1884         read_unlock_bh(&tbl->lock);
1885         return nlmsg_end(skb, nlh);
1886 errout:
1887         read_unlock_bh(&tbl->lock);
1888         nlmsg_cancel(skb, nlh);
1889         return -EMSGSIZE;
1890 }
1891
1892 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1893         [NDTA_NAME]             = { .type = NLA_STRING },
1894         [NDTA_THRESH1]          = { .type = NLA_U32 },
1895         [NDTA_THRESH2]          = { .type = NLA_U32 },
1896         [NDTA_THRESH3]          = { .type = NLA_U32 },
1897         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1898         [NDTA_PARMS]            = { .type = NLA_NESTED },
1899 };
1900
1901 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1902         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1903         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1904         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1905         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1906         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1907         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1908         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1909         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1910         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1911         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1912         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1913         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1914         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1915 };
1916
1917 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1918 {
1919         struct net *net = sock_net(skb->sk);
1920         struct neigh_table *tbl;
1921         struct ndtmsg *ndtmsg;
1922         struct nlattr *tb[NDTA_MAX+1];
1923         int err;
1924
1925         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1926                           nl_neightbl_policy);
1927         if (err < 0)
1928                 goto errout;
1929
1930         if (tb[NDTA_NAME] == NULL) {
1931                 err = -EINVAL;
1932                 goto errout;
1933         }
1934
1935         ndtmsg = nlmsg_data(nlh);
1936         read_lock(&neigh_tbl_lock);
1937         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1938                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1939                         continue;
1940
1941                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1942                         break;
1943         }
1944
1945         if (tbl == NULL) {
1946                 err = -ENOENT;
1947                 goto errout_locked;
1948         }
1949
1950         /*
1951          * We acquire tbl->lock to be nice to the periodic timers and
1952          * make sure they always see a consistent set of values.
1953          */
1954         write_lock_bh(&tbl->lock);
1955
1956         if (tb[NDTA_PARMS]) {
1957                 struct nlattr *tbp[NDTPA_MAX+1];
1958                 struct neigh_parms *p;
1959                 int i, ifindex = 0;
1960
1961                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1962                                        nl_ntbl_parm_policy);
1963                 if (err < 0)
1964                         goto errout_tbl_lock;
1965
1966                 if (tbp[NDTPA_IFINDEX])
1967                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1968
1969                 p = lookup_neigh_parms(tbl, net, ifindex);
1970                 if (p == NULL) {
1971                         err = -ENOENT;
1972                         goto errout_tbl_lock;
1973                 }
1974
1975                 for (i = 1; i <= NDTPA_MAX; i++) {
1976                         if (tbp[i] == NULL)
1977                                 continue;
1978
1979                         switch (i) {
1980                         case NDTPA_QUEUE_LEN:
1981                                 p->queue_len = nla_get_u32(tbp[i]);
1982                                 break;
1983                         case NDTPA_PROXY_QLEN:
1984                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1985                                 break;
1986                         case NDTPA_APP_PROBES:
1987                                 p->app_probes = nla_get_u32(tbp[i]);
1988                                 break;
1989                         case NDTPA_UCAST_PROBES:
1990                                 p->ucast_probes = nla_get_u32(tbp[i]);
1991                                 break;
1992                         case NDTPA_MCAST_PROBES:
1993                                 p->mcast_probes = nla_get_u32(tbp[i]);
1994                                 break;
1995                         case NDTPA_BASE_REACHABLE_TIME:
1996                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
1997                                 break;
1998                         case NDTPA_GC_STALETIME:
1999                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2000                                 break;
2001                         case NDTPA_DELAY_PROBE_TIME:
2002                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2003                                 break;
2004                         case NDTPA_RETRANS_TIME:
2005                                 p->retrans_time = nla_get_msecs(tbp[i]);
2006                                 break;
2007                         case NDTPA_ANYCAST_DELAY:
2008                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2009                                 break;
2010                         case NDTPA_PROXY_DELAY:
2011                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2012                                 break;
2013                         case NDTPA_LOCKTIME:
2014                                 p->locktime = nla_get_msecs(tbp[i]);
2015                                 break;
2016                         }
2017                 }
2018         }
2019
2020         if (tb[NDTA_THRESH1])
2021                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2022
2023         if (tb[NDTA_THRESH2])
2024                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2025
2026         if (tb[NDTA_THRESH3])
2027                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2028
2029         if (tb[NDTA_GC_INTERVAL])
2030                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2031
2032         err = 0;
2033
2034 errout_tbl_lock:
2035         write_unlock_bh(&tbl->lock);
2036 errout_locked:
2037         read_unlock(&neigh_tbl_lock);
2038 errout:
2039         return err;
2040 }
2041
2042 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2043 {
2044         struct net *net = sock_net(skb->sk);
2045         int family, tidx, nidx = 0;
2046         int tbl_skip = cb->args[0];
2047         int neigh_skip = cb->args[1];
2048         struct neigh_table *tbl;
2049
2050         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2051
2052         read_lock(&neigh_tbl_lock);
2053         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2054                 struct neigh_parms *p;
2055
2056                 if (tidx < tbl_skip || (family && tbl->family != family))
2057                         continue;
2058
2059                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2060                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2061                                        NLM_F_MULTI) <= 0)
2062                         break;
2063
2064                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2065                         if (!net_eq(neigh_parms_net(p), net))
2066                                 continue;
2067
2068                         if (nidx < neigh_skip)
2069                                 goto next;
2070
2071                         if (neightbl_fill_param_info(skb, tbl, p,
2072                                                      NETLINK_CB(cb->skb).pid,
2073                                                      cb->nlh->nlmsg_seq,
2074                                                      RTM_NEWNEIGHTBL,
2075                                                      NLM_F_MULTI) <= 0)
2076                                 goto out;
2077                 next:
2078                         nidx++;
2079                 }
2080
2081                 neigh_skip = 0;
2082         }
2083 out:
2084         read_unlock(&neigh_tbl_lock);
2085         cb->args[0] = tidx;
2086         cb->args[1] = nidx;
2087
2088         return skb->len;
2089 }
2090
2091 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2092                            u32 pid, u32 seq, int type, unsigned int flags)
2093 {
2094         unsigned long now = jiffies;
2095         struct nda_cacheinfo ci;
2096         struct nlmsghdr *nlh;
2097         struct ndmsg *ndm;
2098
2099         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2100         if (nlh == NULL)
2101                 return -EMSGSIZE;
2102
2103         ndm = nlmsg_data(nlh);
2104         ndm->ndm_family  = neigh->ops->family;
2105         ndm->ndm_pad1    = 0;
2106         ndm->ndm_pad2    = 0;
2107         ndm->ndm_flags   = neigh->flags;
2108         ndm->ndm_type    = neigh->type;
2109         ndm->ndm_ifindex = neigh->dev->ifindex;
2110
2111         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2112
2113         read_lock_bh(&neigh->lock);
2114         ndm->ndm_state   = neigh->nud_state;
2115         if (neigh->nud_state & NUD_VALID) {
2116                 char haddr[MAX_ADDR_LEN];
2117
2118                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2119                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2120                         read_unlock_bh(&neigh->lock);
2121                         goto nla_put_failure;
2122                 }
2123         }
2124
2125         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2126         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2127         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2128         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2129         read_unlock_bh(&neigh->lock);
2130
2131         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2132         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2133
2134         return nlmsg_end(skb, nlh);
2135
2136 nla_put_failure:
2137         nlmsg_cancel(skb, nlh);
2138         return -EMSGSIZE;
2139 }
2140
2141 static void neigh_update_notify(struct neighbour *neigh)
2142 {
2143         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2144         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2145 }
2146
2147 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2148                             struct netlink_callback *cb)
2149 {
2150         struct net *net = sock_net(skb->sk);
2151         struct neighbour *n;
2152         int rc, h, s_h = cb->args[1];
2153         int idx, s_idx = idx = cb->args[2];
2154         struct neigh_hash_table *nht;
2155
2156         rcu_read_lock_bh();
2157         nht = rcu_dereference_bh(tbl->nht);
2158
2159         for (h = 0; h < (1 << nht->hash_shift); h++) {
2160                 if (h < s_h)
2161                         continue;
2162                 if (h > s_h)
2163                         s_idx = 0;
2164                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2165                      n != NULL;
2166                      n = rcu_dereference_bh(n->next)) {
2167                         if (!net_eq(dev_net(n->dev), net))
2168                                 continue;
2169                         if (idx < s_idx)
2170                                 goto next;
2171                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2172                                             cb->nlh->nlmsg_seq,
2173                                             RTM_NEWNEIGH,
2174                                             NLM_F_MULTI) <= 0) {
2175                                 rc = -1;
2176                                 goto out;
2177                         }
2178 next:
2179                         idx++;
2180                 }
2181         }
2182         rc = skb->len;
2183 out:
2184         rcu_read_unlock_bh();
2185         cb->args[1] = h;
2186         cb->args[2] = idx;
2187         return rc;
2188 }
2189
2190 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2191 {
2192         struct neigh_table *tbl;
2193         int t, family, s_t;
2194
2195         read_lock(&neigh_tbl_lock);
2196         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2197         s_t = cb->args[0];
2198
2199         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2200                 if (t < s_t || (family && tbl->family != family))
2201                         continue;
2202                 if (t > s_t)
2203                         memset(&cb->args[1], 0, sizeof(cb->args) -
2204                                                 sizeof(cb->args[0]));
2205                 if (neigh_dump_table(tbl, skb, cb) < 0)
2206                         break;
2207         }
2208         read_unlock(&neigh_tbl_lock);
2209
2210         cb->args[0] = t;
2211         return skb->len;
2212 }
2213
2214 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2215 {
2216         int chain;
2217         struct neigh_hash_table *nht;
2218
2219         rcu_read_lock_bh();
2220         nht = rcu_dereference_bh(tbl->nht);
2221
2222         read_lock(&tbl->lock); /* avoid resizes */
2223         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2224                 struct neighbour *n;
2225
2226                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2227                      n != NULL;
2228                      n = rcu_dereference_bh(n->next))
2229                         cb(n, cookie);
2230         }
2231         read_unlock(&tbl->lock);
2232         rcu_read_unlock_bh();
2233 }
2234 EXPORT_SYMBOL(neigh_for_each);
2235
2236 /* The tbl->lock must be held as a writer and BH disabled. */
2237 void __neigh_for_each_release(struct neigh_table *tbl,
2238                               int (*cb)(struct neighbour *))
2239 {
2240         int chain;
2241         struct neigh_hash_table *nht;
2242
2243         nht = rcu_dereference_protected(tbl->nht,
2244                                         lockdep_is_held(&tbl->lock));
2245         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2246                 struct neighbour *n;
2247                 struct neighbour __rcu **np;
2248
2249                 np = &nht->hash_buckets[chain];
2250                 while ((n = rcu_dereference_protected(*np,
2251                                         lockdep_is_held(&tbl->lock))) != NULL) {
2252                         int release;
2253
2254                         write_lock(&n->lock);
2255                         release = cb(n);
2256                         if (release) {
2257                                 rcu_assign_pointer(*np,
2258                                         rcu_dereference_protected(n->next,
2259                                                 lockdep_is_held(&tbl->lock)));
2260                                 n->dead = 1;
2261                         } else
2262                                 np = &n->next;
2263                         write_unlock(&n->lock);
2264                         if (release)
2265                                 neigh_cleanup_and_release(n);
2266                 }
2267         }
2268 }
2269 EXPORT_SYMBOL(__neigh_for_each_release);
2270
2271 #ifdef CONFIG_PROC_FS
2272
2273 static struct neighbour *neigh_get_first(struct seq_file *seq)
2274 {
2275         struct neigh_seq_state *state = seq->private;
2276         struct net *net = seq_file_net(seq);
2277         struct neigh_hash_table *nht = state->nht;
2278         struct neighbour *n = NULL;
2279         int bucket = state->bucket;
2280
2281         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2282         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2283                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2284
2285                 while (n) {
2286                         if (!net_eq(dev_net(n->dev), net))
2287                                 goto next;
2288                         if (state->neigh_sub_iter) {
2289                                 loff_t fakep = 0;
2290                                 void *v;
2291
2292                                 v = state->neigh_sub_iter(state, n, &fakep);
2293                                 if (!v)
2294                                         goto next;
2295                         }
2296                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2297                                 break;
2298                         if (n->nud_state & ~NUD_NOARP)
2299                                 break;
2300 next:
2301                         n = rcu_dereference_bh(n->next);
2302                 }
2303
2304                 if (n)
2305                         break;
2306         }
2307         state->bucket = bucket;
2308
2309         return n;
2310 }
2311
2312 static struct neighbour *neigh_get_next(struct seq_file *seq,
2313                                         struct neighbour *n,
2314                                         loff_t *pos)
2315 {
2316         struct neigh_seq_state *state = seq->private;
2317         struct net *net = seq_file_net(seq);
2318         struct neigh_hash_table *nht = state->nht;
2319
2320         if (state->neigh_sub_iter) {
2321                 void *v = state->neigh_sub_iter(state, n, pos);
2322                 if (v)
2323                         return n;
2324         }
2325         n = rcu_dereference_bh(n->next);
2326
2327         while (1) {
2328                 while (n) {
2329                         if (!net_eq(dev_net(n->dev), net))
2330                                 goto next;
2331                         if (state->neigh_sub_iter) {
2332                                 void *v = state->neigh_sub_iter(state, n, pos);
2333                                 if (v)
2334                                         return n;
2335                                 goto next;
2336                         }
2337                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2338                                 break;
2339
2340                         if (n->nud_state & ~NUD_NOARP)
2341                                 break;
2342 next:
2343                         n = rcu_dereference_bh(n->next);
2344                 }
2345
2346                 if (n)
2347                         break;
2348
2349                 if (++state->bucket >= (1 << nht->hash_shift))
2350                         break;
2351
2352                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2353         }
2354
2355         if (n && pos)
2356                 --(*pos);
2357         return n;
2358 }
2359
2360 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2361 {
2362         struct neighbour *n = neigh_get_first(seq);
2363
2364         if (n) {
2365                 --(*pos);
2366                 while (*pos) {
2367                         n = neigh_get_next(seq, n, pos);
2368                         if (!n)
2369                                 break;
2370                 }
2371         }
2372         return *pos ? NULL : n;
2373 }
2374
2375 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2376 {
2377         struct neigh_seq_state *state = seq->private;
2378         struct net *net = seq_file_net(seq);
2379         struct neigh_table *tbl = state->tbl;
2380         struct pneigh_entry *pn = NULL;
2381         int bucket = state->bucket;
2382
2383         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2384         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2385                 pn = tbl->phash_buckets[bucket];
2386                 while (pn && !net_eq(pneigh_net(pn), net))
2387                         pn = pn->next;
2388                 if (pn)
2389                         break;
2390         }
2391         state->bucket = bucket;
2392
2393         return pn;
2394 }
2395
2396 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2397                                             struct pneigh_entry *pn,
2398                                             loff_t *pos)
2399 {
2400         struct neigh_seq_state *state = seq->private;
2401         struct net *net = seq_file_net(seq);
2402         struct neigh_table *tbl = state->tbl;
2403
2404         pn = pn->next;
2405         while (!pn) {
2406                 if (++state->bucket > PNEIGH_HASHMASK)
2407                         break;
2408                 pn = tbl->phash_buckets[state->bucket];
2409                 while (pn && !net_eq(pneigh_net(pn), net))
2410                         pn = pn->next;
2411                 if (pn)
2412                         break;
2413         }
2414
2415         if (pn && pos)
2416                 --(*pos);
2417
2418         return pn;
2419 }
2420
2421 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2422 {
2423         struct pneigh_entry *pn = pneigh_get_first(seq);
2424
2425         if (pn) {
2426                 --(*pos);
2427                 while (*pos) {
2428                         pn = pneigh_get_next(seq, pn, pos);
2429                         if (!pn)
2430                                 break;
2431                 }
2432         }
2433         return *pos ? NULL : pn;
2434 }
2435
2436 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2437 {
2438         struct neigh_seq_state *state = seq->private;
2439         void *rc;
2440         loff_t idxpos = *pos;
2441
2442         rc = neigh_get_idx(seq, &idxpos);
2443         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2444                 rc = pneigh_get_idx(seq, &idxpos);
2445
2446         return rc;
2447 }
2448
2449 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2450         __acquires(rcu_bh)
2451 {
2452         struct neigh_seq_state *state = seq->private;
2453
2454         state->tbl = tbl;
2455         state->bucket = 0;
2456         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2457
2458         rcu_read_lock_bh();
2459         state->nht = rcu_dereference_bh(tbl->nht);
2460
2461         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2462 }
2463 EXPORT_SYMBOL(neigh_seq_start);
2464
2465 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2466 {
2467         struct neigh_seq_state *state;
2468         void *rc;
2469
2470         if (v == SEQ_START_TOKEN) {
2471                 rc = neigh_get_first(seq);
2472                 goto out;
2473         }
2474
2475         state = seq->private;
2476         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2477                 rc = neigh_get_next(seq, v, NULL);
2478                 if (rc)
2479                         goto out;
2480                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2481                         rc = pneigh_get_first(seq);
2482         } else {
2483                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2484                 rc = pneigh_get_next(seq, v, NULL);
2485         }
2486 out:
2487         ++(*pos);
2488         return rc;
2489 }
2490 EXPORT_SYMBOL(neigh_seq_next);
2491
2492 void neigh_seq_stop(struct seq_file *seq, void *v)
2493         __releases(rcu_bh)
2494 {
2495         rcu_read_unlock_bh();
2496 }
2497 EXPORT_SYMBOL(neigh_seq_stop);
2498
2499 /* statistics via seq_file */
2500
2501 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2502 {
2503         struct neigh_table *tbl = seq->private;
2504         int cpu;
2505
2506         if (*pos == 0)
2507                 return SEQ_START_TOKEN;
2508
2509         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2510                 if (!cpu_possible(cpu))
2511                         continue;
2512                 *pos = cpu+1;
2513                 return per_cpu_ptr(tbl->stats, cpu);
2514         }
2515         return NULL;
2516 }
2517
2518 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2519 {
2520         struct neigh_table *tbl = seq->private;
2521         int cpu;
2522
2523         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2524                 if (!cpu_possible(cpu))
2525                         continue;
2526                 *pos = cpu+1;
2527                 return per_cpu_ptr(tbl->stats, cpu);
2528         }
2529         return NULL;
2530 }
2531
2532 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2533 {
2534
2535 }
2536
2537 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2538 {
2539         struct neigh_table *tbl = seq->private;
2540         struct neigh_statistics *st = v;
2541
2542         if (v == SEQ_START_TOKEN) {
2543                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2544                 return 0;
2545         }
2546
2547         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2548                         "%08lx %08lx  %08lx %08lx %08lx\n",
2549                    atomic_read(&tbl->entries),
2550
2551                    st->allocs,
2552                    st->destroys,
2553                    st->hash_grows,
2554
2555                    st->lookups,
2556                    st->hits,
2557
2558                    st->res_failed,
2559
2560                    st->rcv_probes_mcast,
2561                    st->rcv_probes_ucast,
2562
2563                    st->periodic_gc_runs,
2564                    st->forced_gc_runs,
2565                    st->unres_discards
2566                    );
2567
2568         return 0;
2569 }
2570
2571 static const struct seq_operations neigh_stat_seq_ops = {
2572         .start  = neigh_stat_seq_start,
2573         .next   = neigh_stat_seq_next,
2574         .stop   = neigh_stat_seq_stop,
2575         .show   = neigh_stat_seq_show,
2576 };
2577
2578 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2579 {
2580         int ret = seq_open(file, &neigh_stat_seq_ops);
2581
2582         if (!ret) {
2583                 struct seq_file *sf = file->private_data;
2584                 sf->private = PDE(inode)->data;
2585         }
2586         return ret;
2587 };
2588
2589 static const struct file_operations neigh_stat_seq_fops = {
2590         .owner   = THIS_MODULE,
2591         .open    = neigh_stat_seq_open,
2592         .read    = seq_read,
2593         .llseek  = seq_lseek,
2594         .release = seq_release,
2595 };
2596
2597 #endif /* CONFIG_PROC_FS */
2598
2599 static inline size_t neigh_nlmsg_size(void)
2600 {
2601         return NLMSG_ALIGN(sizeof(struct ndmsg))
2602                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2603                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2604                + nla_total_size(sizeof(struct nda_cacheinfo))
2605                + nla_total_size(4); /* NDA_PROBES */
2606 }
2607
2608 static void __neigh_notify(struct neighbour *n, int type, int flags)
2609 {
2610         struct net *net = dev_net(n->dev);
2611         struct sk_buff *skb;
2612         int err = -ENOBUFS;
2613
2614         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2615         if (skb == NULL)
2616                 goto errout;
2617
2618         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2619         if (err < 0) {
2620                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2621                 WARN_ON(err == -EMSGSIZE);
2622                 kfree_skb(skb);
2623                 goto errout;
2624         }
2625         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2626         return;
2627 errout:
2628         if (err < 0)
2629                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2630 }
2631
2632 #ifdef CONFIG_ARPD
2633 void neigh_app_ns(struct neighbour *n)
2634 {
2635         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2636 }
2637 EXPORT_SYMBOL(neigh_app_ns);
2638 #endif /* CONFIG_ARPD */
2639
2640 #ifdef CONFIG_SYSCTL
2641
2642 #define NEIGH_VARS_MAX 19
2643
2644 static struct neigh_sysctl_table {
2645         struct ctl_table_header *sysctl_header;
2646         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2647         char *dev_name;
2648 } neigh_sysctl_template __read_mostly = {
2649         .neigh_vars = {
2650                 {
2651                         .procname       = "mcast_solicit",
2652                         .maxlen         = sizeof(int),
2653                         .mode           = 0644,
2654                         .proc_handler   = proc_dointvec,
2655                 },
2656                 {
2657                         .procname       = "ucast_solicit",
2658                         .maxlen         = sizeof(int),
2659                         .mode           = 0644,
2660                         .proc_handler   = proc_dointvec,
2661                 },
2662                 {
2663                         .procname       = "app_solicit",
2664                         .maxlen         = sizeof(int),
2665                         .mode           = 0644,
2666                         .proc_handler   = proc_dointvec,
2667                 },
2668                 {
2669                         .procname       = "retrans_time",
2670                         .maxlen         = sizeof(int),
2671                         .mode           = 0644,
2672                         .proc_handler   = proc_dointvec_userhz_jiffies,
2673                 },
2674                 {
2675                         .procname       = "base_reachable_time",
2676                         .maxlen         = sizeof(int),
2677                         .mode           = 0644,
2678                         .proc_handler   = proc_dointvec_jiffies,
2679                 },
2680                 {
2681                         .procname       = "delay_first_probe_time",
2682                         .maxlen         = sizeof(int),
2683                         .mode           = 0644,
2684                         .proc_handler   = proc_dointvec_jiffies,
2685                 },
2686                 {
2687                         .procname       = "gc_stale_time",
2688                         .maxlen         = sizeof(int),
2689                         .mode           = 0644,
2690                         .proc_handler   = proc_dointvec_jiffies,
2691                 },
2692                 {
2693                         .procname       = "unres_qlen",
2694                         .maxlen         = sizeof(int),
2695                         .mode           = 0644,
2696                         .proc_handler   = proc_dointvec,
2697                 },
2698                 {
2699                         .procname       = "proxy_qlen",
2700                         .maxlen         = sizeof(int),
2701                         .mode           = 0644,
2702                         .proc_handler   = proc_dointvec,
2703                 },
2704                 {
2705                         .procname       = "anycast_delay",
2706                         .maxlen         = sizeof(int),
2707                         .mode           = 0644,
2708                         .proc_handler   = proc_dointvec_userhz_jiffies,
2709                 },
2710                 {
2711                         .procname       = "proxy_delay",
2712                         .maxlen         = sizeof(int),
2713                         .mode           = 0644,
2714                         .proc_handler   = proc_dointvec_userhz_jiffies,
2715                 },
2716                 {
2717                         .procname       = "locktime",
2718                         .maxlen         = sizeof(int),
2719                         .mode           = 0644,
2720                         .proc_handler   = proc_dointvec_userhz_jiffies,
2721                 },
2722                 {
2723                         .procname       = "retrans_time_ms",
2724                         .maxlen         = sizeof(int),
2725                         .mode           = 0644,
2726                         .proc_handler   = proc_dointvec_ms_jiffies,
2727                 },
2728                 {
2729                         .procname       = "base_reachable_time_ms",
2730                         .maxlen         = sizeof(int),
2731                         .mode           = 0644,
2732                         .proc_handler   = proc_dointvec_ms_jiffies,
2733                 },
2734                 {
2735                         .procname       = "gc_interval",
2736                         .maxlen         = sizeof(int),
2737                         .mode           = 0644,
2738                         .proc_handler   = proc_dointvec_jiffies,
2739                 },
2740                 {
2741                         .procname       = "gc_thresh1",
2742                         .maxlen         = sizeof(int),
2743                         .mode           = 0644,
2744                         .proc_handler   = proc_dointvec,
2745                 },
2746                 {
2747                         .procname       = "gc_thresh2",
2748                         .maxlen         = sizeof(int),
2749                         .mode           = 0644,
2750                         .proc_handler   = proc_dointvec,
2751                 },
2752                 {
2753                         .procname       = "gc_thresh3",
2754                         .maxlen         = sizeof(int),
2755                         .mode           = 0644,
2756                         .proc_handler   = proc_dointvec,
2757                 },
2758                 {},
2759         },
2760 };
2761
2762 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2763                           char *p_name, proc_handler *handler)
2764 {
2765         struct neigh_sysctl_table *t;
2766         const char *dev_name_source = NULL;
2767
2768 #define NEIGH_CTL_PATH_ROOT     0
2769 #define NEIGH_CTL_PATH_PROTO    1
2770 #define NEIGH_CTL_PATH_NEIGH    2
2771 #define NEIGH_CTL_PATH_DEV      3
2772
2773         struct ctl_path neigh_path[] = {
2774                 { .procname = "net",     },
2775                 { .procname = "proto",   },
2776                 { .procname = "neigh",   },
2777                 { .procname = "default", },
2778                 { },
2779         };
2780
2781         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2782         if (!t)
2783                 goto err;
2784
2785         t->neigh_vars[0].data  = &p->mcast_probes;
2786         t->neigh_vars[1].data  = &p->ucast_probes;
2787         t->neigh_vars[2].data  = &p->app_probes;
2788         t->neigh_vars[3].data  = &p->retrans_time;
2789         t->neigh_vars[4].data  = &p->base_reachable_time;
2790         t->neigh_vars[5].data  = &p->delay_probe_time;
2791         t->neigh_vars[6].data  = &p->gc_staletime;
2792         t->neigh_vars[7].data  = &p->queue_len;
2793         t->neigh_vars[8].data  = &p->proxy_qlen;
2794         t->neigh_vars[9].data  = &p->anycast_delay;
2795         t->neigh_vars[10].data = &p->proxy_delay;
2796         t->neigh_vars[11].data = &p->locktime;
2797         t->neigh_vars[12].data  = &p->retrans_time;
2798         t->neigh_vars[13].data  = &p->base_reachable_time;
2799
2800         if (dev) {
2801                 dev_name_source = dev->name;
2802                 /* Terminate the table early */
2803                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2804         } else {
2805                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2806                 t->neigh_vars[14].data = (int *)(p + 1);
2807                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2808                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2809                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2810         }
2811
2812
2813         if (handler) {
2814                 /* RetransTime */
2815                 t->neigh_vars[3].proc_handler = handler;
2816                 t->neigh_vars[3].extra1 = dev;
2817                 /* ReachableTime */
2818                 t->neigh_vars[4].proc_handler = handler;
2819                 t->neigh_vars[4].extra1 = dev;
2820                 /* RetransTime (in milliseconds)*/
2821                 t->neigh_vars[12].proc_handler = handler;
2822                 t->neigh_vars[12].extra1 = dev;
2823                 /* ReachableTime (in milliseconds) */
2824                 t->neigh_vars[13].proc_handler = handler;
2825                 t->neigh_vars[13].extra1 = dev;
2826         }
2827
2828         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2829         if (!t->dev_name)
2830                 goto free;
2831
2832         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2833         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2834
2835         t->sysctl_header =
2836                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2837         if (!t->sysctl_header)
2838                 goto free_procname;
2839
2840         p->sysctl_table = t;
2841         return 0;
2842
2843 free_procname:
2844         kfree(t->dev_name);
2845 free:
2846         kfree(t);
2847 err:
2848         return -ENOBUFS;
2849 }
2850 EXPORT_SYMBOL(neigh_sysctl_register);
2851
2852 void neigh_sysctl_unregister(struct neigh_parms *p)
2853 {
2854         if (p->sysctl_table) {
2855                 struct neigh_sysctl_table *t = p->sysctl_table;
2856                 p->sysctl_table = NULL;
2857                 unregister_sysctl_table(t->sysctl_header);
2858                 kfree(t->dev_name);
2859                 kfree(t);
2860         }
2861 }
2862 EXPORT_SYMBOL(neigh_sysctl_unregister);
2863
2864 #endif  /* CONFIG_SYSCTL */
2865
2866 static int __init neigh_init(void)
2867 {
2868         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2869         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2870         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2871
2872         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2873                       NULL);
2874         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2875
2876         return 0;
2877 }
2878
2879 subsys_initcall(neigh_init);
2880