Merge branch 'for-linus' of git://git.kernel.dk/linux-block
[pandora-kernel.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->output = neigh_blackhole;
242                                 if (n->nud_state & NUD_VALID)
243                                         n->nud_state = NUD_NOARP;
244                                 else
245                                         n->nud_state = NUD_NONE;
246                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
247                         }
248                         write_unlock(&n->lock);
249                         neigh_cleanup_and_release(n);
250                 }
251         }
252 }
253
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256         write_lock_bh(&tbl->lock);
257         neigh_flush_dev(tbl, dev);
258         write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264         write_lock_bh(&tbl->lock);
265         neigh_flush_dev(tbl, dev);
266         pneigh_ifdown(tbl, dev);
267         write_unlock_bh(&tbl->lock);
268
269         del_timer_sync(&tbl->proxy_timer);
270         pneigh_queue_purge(&tbl->proxy_queue);
271         return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277         struct neighbour *n = NULL;
278         unsigned long now = jiffies;
279         int entries;
280
281         entries = atomic_inc_return(&tbl->entries) - 1;
282         if (entries >= tbl->gc_thresh3 ||
283             (entries >= tbl->gc_thresh2 &&
284              time_after(now, tbl->last_flush + 5 * HZ))) {
285                 if (!neigh_forced_gc(tbl) &&
286                     entries >= tbl->gc_thresh3)
287                         goto out_entries;
288         }
289
290         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291         if (!n)
292                 goto out_entries;
293
294         skb_queue_head_init(&n->arp_queue);
295         rwlock_init(&n->lock);
296         seqlock_init(&n->ha_lock);
297         n->updated        = n->used = now;
298         n->nud_state      = NUD_NONE;
299         n->output         = neigh_blackhole;
300         seqlock_init(&n->hh.hh_lock);
301         n->parms          = neigh_parms_clone(&tbl->parms);
302         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303
304         NEIGH_CACHE_STAT_INC(tbl, allocs);
305         n->tbl            = tbl;
306         atomic_set(&n->refcnt, 1);
307         n->dead           = 1;
308 out:
309         return n;
310
311 out_entries:
312         atomic_dec(&tbl->entries);
313         goto out;
314 }
315
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
317 {
318         size_t size = (1 << shift) * sizeof(struct neighbour *);
319         struct neigh_hash_table *ret;
320         struct neighbour __rcu **buckets;
321
322         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323         if (!ret)
324                 return NULL;
325         if (size <= PAGE_SIZE)
326                 buckets = kzalloc(size, GFP_ATOMIC);
327         else
328                 buckets = (struct neighbour __rcu **)
329                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330                                            get_order(size));
331         if (!buckets) {
332                 kfree(ret);
333                 return NULL;
334         }
335         ret->hash_buckets = buckets;
336         ret->hash_shift = shift;
337         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338         ret->hash_rnd |= 1;
339         return ret;
340 }
341
342 static void neigh_hash_free_rcu(struct rcu_head *head)
343 {
344         struct neigh_hash_table *nht = container_of(head,
345                                                     struct neigh_hash_table,
346                                                     rcu);
347         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
348         struct neighbour __rcu **buckets = nht->hash_buckets;
349
350         if (size <= PAGE_SIZE)
351                 kfree(buckets);
352         else
353                 free_pages((unsigned long)buckets, get_order(size));
354         kfree(nht);
355 }
356
357 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
358                                                 unsigned long new_shift)
359 {
360         unsigned int i, hash;
361         struct neigh_hash_table *new_nht, *old_nht;
362
363         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
364
365         old_nht = rcu_dereference_protected(tbl->nht,
366                                             lockdep_is_held(&tbl->lock));
367         new_nht = neigh_hash_alloc(new_shift);
368         if (!new_nht)
369                 return old_nht;
370
371         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
372                 struct neighbour *n, *next;
373
374                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375                                                    lockdep_is_held(&tbl->lock));
376                      n != NULL;
377                      n = next) {
378                         hash = tbl->hash(n->primary_key, n->dev,
379                                          new_nht->hash_rnd);
380
381                         hash >>= (32 - new_nht->hash_shift);
382                         next = rcu_dereference_protected(n->next,
383                                                 lockdep_is_held(&tbl->lock));
384
385                         rcu_assign_pointer(n->next,
386                                            rcu_dereference_protected(
387                                                 new_nht->hash_buckets[hash],
388                                                 lockdep_is_held(&tbl->lock)));
389                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390                 }
391         }
392
393         rcu_assign_pointer(tbl->nht, new_nht);
394         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395         return new_nht;
396 }
397
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399                                struct net_device *dev)
400 {
401         struct neighbour *n;
402         int key_len = tbl->key_len;
403         u32 hash_val;
404         struct neigh_hash_table *nht;
405
406         NEIGH_CACHE_STAT_INC(tbl, lookups);
407
408         rcu_read_lock_bh();
409         nht = rcu_dereference_bh(tbl->nht);
410         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
411
412         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413              n != NULL;
414              n = rcu_dereference_bh(n->next)) {
415                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416                         if (!atomic_inc_not_zero(&n->refcnt))
417                                 n = NULL;
418                         NEIGH_CACHE_STAT_INC(tbl, hits);
419                         break;
420                 }
421         }
422
423         rcu_read_unlock_bh();
424         return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429                                      const void *pkey)
430 {
431         struct neighbour *n;
432         int key_len = tbl->key_len;
433         u32 hash_val;
434         struct neigh_hash_table *nht;
435
436         NEIGH_CACHE_STAT_INC(tbl, lookups);
437
438         rcu_read_lock_bh();
439         nht = rcu_dereference_bh(tbl->nht);
440         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
441
442         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443              n != NULL;
444              n = rcu_dereference_bh(n->next)) {
445                 if (!memcmp(n->primary_key, pkey, key_len) &&
446                     net_eq(dev_net(n->dev), net)) {
447                         if (!atomic_inc_not_zero(&n->refcnt))
448                                 n = NULL;
449                         NEIGH_CACHE_STAT_INC(tbl, hits);
450                         break;
451                 }
452         }
453
454         rcu_read_unlock_bh();
455         return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         u32 hash_val;
463         int key_len = tbl->key_len;
464         int error;
465         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466         struct neigh_hash_table *nht;
467
468         if (!n) {
469                 rc = ERR_PTR(-ENOBUFS);
470                 goto out;
471         }
472
473         memcpy(n->primary_key, pkey, key_len);
474         n->dev = dev;
475         dev_hold(dev);
476
477         /* Protocol specific setup. */
478         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
479                 rc = ERR_PTR(error);
480                 goto out_neigh_release;
481         }
482
483         /* Device specific setup. */
484         if (n->parms->neigh_setup &&
485             (error = n->parms->neigh_setup(n)) < 0) {
486                 rc = ERR_PTR(error);
487                 goto out_neigh_release;
488         }
489
490         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491
492         write_lock_bh(&tbl->lock);
493         nht = rcu_dereference_protected(tbl->nht,
494                                         lockdep_is_held(&tbl->lock));
495
496         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
497                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
498
499         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
500
501         if (n->parms->dead) {
502                 rc = ERR_PTR(-EINVAL);
503                 goto out_tbl_unlock;
504         }
505
506         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507                                             lockdep_is_held(&tbl->lock));
508              n1 != NULL;
509              n1 = rcu_dereference_protected(n1->next,
510                         lockdep_is_held(&tbl->lock))) {
511                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512                         neigh_hold(n1);
513                         rc = n1;
514                         goto out_tbl_unlock;
515                 }
516         }
517
518         n->dead = 0;
519         neigh_hold(n);
520         rcu_assign_pointer(n->next,
521                            rcu_dereference_protected(nht->hash_buckets[hash_val],
522                                                      lockdep_is_held(&tbl->lock)));
523         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524         write_unlock_bh(&tbl->lock);
525         NEIGH_PRINTK2("neigh %p is created.\n", n);
526         rc = n;
527 out:
528         return rc;
529 out_tbl_unlock:
530         write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532         neigh_release(n);
533         goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539         u32 hash_val = *(u32 *)(pkey + key_len - 4);
540         hash_val ^= (hash_val >> 16);
541         hash_val ^= hash_val >> 8;
542         hash_val ^= hash_val >> 4;
543         hash_val &= PNEIGH_HASHMASK;
544         return hash_val;
545 }
546
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548                                               struct net *net,
549                                               const void *pkey,
550                                               int key_len,
551                                               struct net_device *dev)
552 {
553         while (n) {
554                 if (!memcmp(n->key, pkey, key_len) &&
555                     net_eq(pneigh_net(n), net) &&
556                     (n->dev == dev || !n->dev))
557                         return n;
558                 n = n->next;
559         }
560         return NULL;
561 }
562
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564                 struct net *net, const void *pkey, struct net_device *dev)
565 {
566         int key_len = tbl->key_len;
567         u32 hash_val = pneigh_hash(pkey, key_len);
568
569         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570                                  net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575                                     struct net *net, const void *pkey,
576                                     struct net_device *dev, int creat)
577 {
578         struct pneigh_entry *n;
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         read_lock_bh(&tbl->lock);
583         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584                               net, pkey, key_len, dev);
585         read_unlock_bh(&tbl->lock);
586
587         if (n || !creat)
588                 goto out;
589
590         ASSERT_RTNL();
591
592         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593         if (!n)
594                 goto out;
595
596         write_pnet(&n->net, hold_net(net));
597         memcpy(n->key, pkey, key_len);
598         n->dev = dev;
599         if (dev)
600                 dev_hold(dev);
601
602         if (tbl->pconstructor && tbl->pconstructor(n)) {
603                 if (dev)
604                         dev_put(dev);
605                 release_net(net);
606                 kfree(n);
607                 n = NULL;
608                 goto out;
609         }
610
611         write_lock_bh(&tbl->lock);
612         n->next = tbl->phash_buckets[hash_val];
613         tbl->phash_buckets[hash_val] = n;
614         write_unlock_bh(&tbl->lock);
615 out:
616         return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619
620
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622                   struct net_device *dev)
623 {
624         struct pneigh_entry *n, **np;
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         write_lock_bh(&tbl->lock);
629         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630              np = &n->next) {
631                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632                     net_eq(pneigh_net(n), net)) {
633                         *np = n->next;
634                         write_unlock_bh(&tbl->lock);
635                         if (tbl->pdestructor)
636                                 tbl->pdestructor(n);
637                         if (n->dev)
638                                 dev_put(n->dev);
639                         release_net(pneigh_net(n));
640                         kfree(n);
641                         return 0;
642                 }
643         }
644         write_unlock_bh(&tbl->lock);
645         return -ENOENT;
646 }
647
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         u32 h;
652
653         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654                 np = &tbl->phash_buckets[h];
655                 while ((n = *np) != NULL) {
656                         if (!dev || n->dev == dev) {
657                                 *np = n->next;
658                                 if (tbl->pdestructor)
659                                         tbl->pdestructor(n);
660                                 if (n->dev)
661                                         dev_put(n->dev);
662                                 release_net(pneigh_net(n));
663                                 kfree(n);
664                                 continue;
665                         }
666                         np = &n->next;
667                 }
668         }
669         return -ENOENT;
670 }
671
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676         if (atomic_dec_and_test(&parms->refcnt))
677                 neigh_parms_destroy(parms);
678 }
679
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *      neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
693
694         if (!neigh->dead) {
695                 printk(KERN_WARNING
696                        "Destroying alive neighbour %p\n", neigh);
697                 dump_stack();
698                 return;
699         }
700
701         if (neigh_del_timer(neigh))
702                 printk(KERN_WARNING "Impossible event.\n");
703
704         skb_queue_purge(&neigh->arp_queue);
705
706         dev_put(neigh->dev);
707         neigh_parms_put(neigh->parms);
708
709         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
710
711         atomic_dec(&neigh->tbl->entries);
712         call_rcu(&neigh->rcu, neigh_destroy_rcu);
713 }
714 EXPORT_SYMBOL(neigh_destroy);
715
716 /* Neighbour state is suspicious;
717    disable fast path.
718
719    Called with write_locked neigh.
720  */
721 static void neigh_suspect(struct neighbour *neigh)
722 {
723         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
724
725         neigh->output = neigh->ops->output;
726 }
727
728 /* Neighbour state is OK;
729    enable fast path.
730
731    Called with write_locked neigh.
732  */
733 static void neigh_connect(struct neighbour *neigh)
734 {
735         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
736
737         neigh->output = neigh->ops->connected_output;
738 }
739
740 static void neigh_periodic_work(struct work_struct *work)
741 {
742         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
743         struct neighbour *n;
744         struct neighbour __rcu **np;
745         unsigned int i;
746         struct neigh_hash_table *nht;
747
748         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
749
750         write_lock_bh(&tbl->lock);
751         nht = rcu_dereference_protected(tbl->nht,
752                                         lockdep_is_held(&tbl->lock));
753
754         /*
755          *      periodically recompute ReachableTime from random function
756          */
757
758         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
759                 struct neigh_parms *p;
760                 tbl->last_rand = jiffies;
761                 for (p = &tbl->parms; p; p = p->next)
762                         p->reachable_time =
763                                 neigh_rand_reach_time(p->base_reachable_time);
764         }
765
766         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
767                 np = &nht->hash_buckets[i];
768
769                 while ((n = rcu_dereference_protected(*np,
770                                 lockdep_is_held(&tbl->lock))) != NULL) {
771                         unsigned int state;
772
773                         write_lock(&n->lock);
774
775                         state = n->nud_state;
776                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
777                                 write_unlock(&n->lock);
778                                 goto next_elt;
779                         }
780
781                         if (time_before(n->used, n->confirmed))
782                                 n->used = n->confirmed;
783
784                         if (atomic_read(&n->refcnt) == 1 &&
785                             (state == NUD_FAILED ||
786                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
787                                 *np = n->next;
788                                 n->dead = 1;
789                                 write_unlock(&n->lock);
790                                 neigh_cleanup_and_release(n);
791                                 continue;
792                         }
793                         write_unlock(&n->lock);
794
795 next_elt:
796                         np = &n->next;
797                 }
798                 /*
799                  * It's fine to release lock here, even if hash table
800                  * grows while we are preempted.
801                  */
802                 write_unlock_bh(&tbl->lock);
803                 cond_resched();
804                 write_lock_bh(&tbl->lock);
805         }
806         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
807          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
808          * base_reachable_time.
809          */
810         schedule_delayed_work(&tbl->gc_work,
811                               tbl->parms.base_reachable_time >> 1);
812         write_unlock_bh(&tbl->lock);
813 }
814
815 static __inline__ int neigh_max_probes(struct neighbour *n)
816 {
817         struct neigh_parms *p = n->parms;
818         return (n->nud_state & NUD_PROBE) ?
819                 p->ucast_probes :
820                 p->ucast_probes + p->app_probes + p->mcast_probes;
821 }
822
823 static void neigh_invalidate(struct neighbour *neigh)
824         __releases(neigh->lock)
825         __acquires(neigh->lock)
826 {
827         struct sk_buff *skb;
828
829         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
830         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
831         neigh->updated = jiffies;
832
833         /* It is very thin place. report_unreachable is very complicated
834            routine. Particularly, it can hit the same neighbour entry!
835
836            So that, we try to be accurate and avoid dead loop. --ANK
837          */
838         while (neigh->nud_state == NUD_FAILED &&
839                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
840                 write_unlock(&neigh->lock);
841                 neigh->ops->error_report(neigh, skb);
842                 write_lock(&neigh->lock);
843         }
844         skb_queue_purge(&neigh->arp_queue);
845 }
846
847 /* Called when a timer expires for a neighbour entry. */
848
849 static void neigh_timer_handler(unsigned long arg)
850 {
851         unsigned long now, next;
852         struct neighbour *neigh = (struct neighbour *)arg;
853         unsigned state;
854         int notify = 0;
855
856         write_lock(&neigh->lock);
857
858         state = neigh->nud_state;
859         now = jiffies;
860         next = now + HZ;
861
862         if (!(state & NUD_IN_TIMER)) {
863 #ifndef CONFIG_SMP
864                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
865 #endif
866                 goto out;
867         }
868
869         if (state & NUD_REACHABLE) {
870                 if (time_before_eq(now,
871                                    neigh->confirmed + neigh->parms->reachable_time)) {
872                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
873                         next = neigh->confirmed + neigh->parms->reachable_time;
874                 } else if (time_before_eq(now,
875                                           neigh->used + neigh->parms->delay_probe_time)) {
876                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
877                         neigh->nud_state = NUD_DELAY;
878                         neigh->updated = jiffies;
879                         neigh_suspect(neigh);
880                         next = now + neigh->parms->delay_probe_time;
881                 } else {
882                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
883                         neigh->nud_state = NUD_STALE;
884                         neigh->updated = jiffies;
885                         neigh_suspect(neigh);
886                         notify = 1;
887                 }
888         } else if (state & NUD_DELAY) {
889                 if (time_before_eq(now,
890                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
891                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
892                         neigh->nud_state = NUD_REACHABLE;
893                         neigh->updated = jiffies;
894                         neigh_connect(neigh);
895                         notify = 1;
896                         next = neigh->confirmed + neigh->parms->reachable_time;
897                 } else {
898                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
899                         neigh->nud_state = NUD_PROBE;
900                         neigh->updated = jiffies;
901                         atomic_set(&neigh->probes, 0);
902                         next = now + neigh->parms->retrans_time;
903                 }
904         } else {
905                 /* NUD_PROBE|NUD_INCOMPLETE */
906                 next = now + neigh->parms->retrans_time;
907         }
908
909         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
910             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
911                 neigh->nud_state = NUD_FAILED;
912                 notify = 1;
913                 neigh_invalidate(neigh);
914         }
915
916         if (neigh->nud_state & NUD_IN_TIMER) {
917                 if (time_before(next, jiffies + HZ/2))
918                         next = jiffies + HZ/2;
919                 if (!mod_timer(&neigh->timer, next))
920                         neigh_hold(neigh);
921         }
922         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
923                 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
924                 /* keep skb alive even if arp_queue overflows */
925                 if (skb)
926                         skb = skb_copy(skb, GFP_ATOMIC);
927                 write_unlock(&neigh->lock);
928                 neigh->ops->solicit(neigh, skb);
929                 atomic_inc(&neigh->probes);
930                 kfree_skb(skb);
931         } else {
932 out:
933                 write_unlock(&neigh->lock);
934         }
935
936         if (notify)
937                 neigh_update_notify(neigh);
938
939         neigh_release(neigh);
940 }
941
942 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
943 {
944         int rc;
945         unsigned long now;
946
947         write_lock_bh(&neigh->lock);
948
949         rc = 0;
950         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
951                 goto out_unlock_bh;
952
953         now = jiffies;
954
955         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
956                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
957                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
958                         neigh->nud_state     = NUD_INCOMPLETE;
959                         neigh->updated = jiffies;
960                         neigh_add_timer(neigh, now + 1);
961                 } else {
962                         neigh->nud_state = NUD_FAILED;
963                         neigh->updated = jiffies;
964                         write_unlock_bh(&neigh->lock);
965
966                         kfree_skb(skb);
967                         return 1;
968                 }
969         } else if (neigh->nud_state & NUD_STALE) {
970                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
971                 neigh->nud_state = NUD_DELAY;
972                 neigh->updated = jiffies;
973                 neigh_add_timer(neigh,
974                                 jiffies + neigh->parms->delay_probe_time);
975         }
976
977         if (neigh->nud_state == NUD_INCOMPLETE) {
978                 if (skb) {
979                         if (skb_queue_len(&neigh->arp_queue) >=
980                             neigh->parms->queue_len) {
981                                 struct sk_buff *buff;
982                                 buff = __skb_dequeue(&neigh->arp_queue);
983                                 kfree_skb(buff);
984                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
985                         }
986                         skb_dst_force(skb);
987                         __skb_queue_tail(&neigh->arp_queue, skb);
988                 }
989                 rc = 1;
990         }
991 out_unlock_bh:
992         write_unlock_bh(&neigh->lock);
993         return rc;
994 }
995 EXPORT_SYMBOL(__neigh_event_send);
996
997 static void neigh_update_hhs(struct neighbour *neigh)
998 {
999         struct hh_cache *hh;
1000         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1001                 = NULL;
1002
1003         if (neigh->dev->header_ops)
1004                 update = neigh->dev->header_ops->cache_update;
1005
1006         if (update) {
1007                 hh = &neigh->hh;
1008                 if (hh->hh_len) {
1009                         write_seqlock_bh(&hh->hh_lock);
1010                         update(hh, neigh->dev, neigh->ha);
1011                         write_sequnlock_bh(&hh->hh_lock);
1012                 }
1013         }
1014 }
1015
1016
1017
1018 /* Generic update routine.
1019    -- lladdr is new lladdr or NULL, if it is not supplied.
1020    -- new    is new state.
1021    -- flags
1022         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1023                                 if it is different.
1024         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1025                                 lladdr instead of overriding it
1026                                 if it is different.
1027                                 It also allows to retain current state
1028                                 if lladdr is unchanged.
1029         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1030
1031         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1032                                 NTF_ROUTER flag.
1033         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1034                                 a router.
1035
1036    Caller MUST hold reference count on the entry.
1037  */
1038
1039 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1040                  u32 flags)
1041 {
1042         u8 old;
1043         int err;
1044         int notify = 0;
1045         struct net_device *dev;
1046         int update_isrouter = 0;
1047
1048         write_lock_bh(&neigh->lock);
1049
1050         dev    = neigh->dev;
1051         old    = neigh->nud_state;
1052         err    = -EPERM;
1053
1054         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1055             (old & (NUD_NOARP | NUD_PERMANENT)))
1056                 goto out;
1057
1058         if (!(new & NUD_VALID)) {
1059                 neigh_del_timer(neigh);
1060                 if (old & NUD_CONNECTED)
1061                         neigh_suspect(neigh);
1062                 neigh->nud_state = new;
1063                 err = 0;
1064                 notify = old & NUD_VALID;
1065                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1066                     (new & NUD_FAILED)) {
1067                         neigh_invalidate(neigh);
1068                         notify = 1;
1069                 }
1070                 goto out;
1071         }
1072
1073         /* Compare new lladdr with cached one */
1074         if (!dev->addr_len) {
1075                 /* First case: device needs no address. */
1076                 lladdr = neigh->ha;
1077         } else if (lladdr) {
1078                 /* The second case: if something is already cached
1079                    and a new address is proposed:
1080                    - compare new & old
1081                    - if they are different, check override flag
1082                  */
1083                 if ((old & NUD_VALID) &&
1084                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1085                         lladdr = neigh->ha;
1086         } else {
1087                 /* No address is supplied; if we know something,
1088                    use it, otherwise discard the request.
1089                  */
1090                 err = -EINVAL;
1091                 if (!(old & NUD_VALID))
1092                         goto out;
1093                 lladdr = neigh->ha;
1094         }
1095
1096         if (new & NUD_CONNECTED)
1097                 neigh->confirmed = jiffies;
1098         neigh->updated = jiffies;
1099
1100         /* If entry was valid and address is not changed,
1101            do not change entry state, if new one is STALE.
1102          */
1103         err = 0;
1104         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1105         if (old & NUD_VALID) {
1106                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1107                         update_isrouter = 0;
1108                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1109                             (old & NUD_CONNECTED)) {
1110                                 lladdr = neigh->ha;
1111                                 new = NUD_STALE;
1112                         } else
1113                                 goto out;
1114                 } else {
1115                         if (lladdr == neigh->ha && new == NUD_STALE &&
1116                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1117                              (old & NUD_CONNECTED))
1118                             )
1119                                 new = old;
1120                 }
1121         }
1122
1123         if (new != old) {
1124                 neigh_del_timer(neigh);
1125                 if (new & NUD_IN_TIMER)
1126                         neigh_add_timer(neigh, (jiffies +
1127                                                 ((new & NUD_REACHABLE) ?
1128                                                  neigh->parms->reachable_time :
1129                                                  0)));
1130                 neigh->nud_state = new;
1131         }
1132
1133         if (lladdr != neigh->ha) {
1134                 write_seqlock(&neigh->ha_lock);
1135                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1136                 write_sequnlock(&neigh->ha_lock);
1137                 neigh_update_hhs(neigh);
1138                 if (!(new & NUD_CONNECTED))
1139                         neigh->confirmed = jiffies -
1140                                       (neigh->parms->base_reachable_time << 1);
1141                 notify = 1;
1142         }
1143         if (new == old)
1144                 goto out;
1145         if (new & NUD_CONNECTED)
1146                 neigh_connect(neigh);
1147         else
1148                 neigh_suspect(neigh);
1149         if (!(old & NUD_VALID)) {
1150                 struct sk_buff *skb;
1151
1152                 /* Again: avoid dead loop if something went wrong */
1153
1154                 while (neigh->nud_state & NUD_VALID &&
1155                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1156                         struct dst_entry *dst = skb_dst(skb);
1157                         struct neighbour *n2, *n1 = neigh;
1158                         write_unlock_bh(&neigh->lock);
1159                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1160                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1161                                 n1 = n2;
1162                         n1->output(n1, skb);
1163                         write_lock_bh(&neigh->lock);
1164                 }
1165                 skb_queue_purge(&neigh->arp_queue);
1166         }
1167 out:
1168         if (update_isrouter) {
1169                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1170                         (neigh->flags | NTF_ROUTER) :
1171                         (neigh->flags & ~NTF_ROUTER);
1172         }
1173         write_unlock_bh(&neigh->lock);
1174
1175         if (notify)
1176                 neigh_update_notify(neigh);
1177
1178         return err;
1179 }
1180 EXPORT_SYMBOL(neigh_update);
1181
1182 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1183                                  u8 *lladdr, void *saddr,
1184                                  struct net_device *dev)
1185 {
1186         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1187                                                  lladdr || !dev->addr_len);
1188         if (neigh)
1189                 neigh_update(neigh, lladdr, NUD_STALE,
1190                              NEIGH_UPDATE_F_OVERRIDE);
1191         return neigh;
1192 }
1193 EXPORT_SYMBOL(neigh_event_ns);
1194
1195 /* called with read_lock_bh(&n->lock); */
1196 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1197 {
1198         struct net_device *dev = dst->dev;
1199         __be16 prot = dst->ops->protocol;
1200         struct hh_cache *hh = &n->hh;
1201
1202         write_lock_bh(&n->lock);
1203
1204         /* Only one thread can come in here and initialize the
1205          * hh_cache entry.
1206          */
1207         if (!hh->hh_len)
1208                 dev->header_ops->cache(n, hh, prot);
1209
1210         write_unlock_bh(&n->lock);
1211 }
1212
1213 /* This function can be used in contexts, where only old dev_queue_xmit
1214  * worked, f.e. if you want to override normal output path (eql, shaper),
1215  * but resolution is not made yet.
1216  */
1217
1218 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1219 {
1220         struct net_device *dev = skb->dev;
1221
1222         __skb_pull(skb, skb_network_offset(skb));
1223
1224         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1225                             skb->len) < 0 &&
1226             dev->header_ops->rebuild(skb))
1227                 return 0;
1228
1229         return dev_queue_xmit(skb);
1230 }
1231 EXPORT_SYMBOL(neigh_compat_output);
1232
1233 /* Slow and careful. */
1234
1235 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1236 {
1237         struct dst_entry *dst = skb_dst(skb);
1238         int rc = 0;
1239
1240         if (!dst)
1241                 goto discard;
1242
1243         __skb_pull(skb, skb_network_offset(skb));
1244
1245         if (!neigh_event_send(neigh, skb)) {
1246                 int err;
1247                 struct net_device *dev = neigh->dev;
1248                 unsigned int seq;
1249
1250                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1251                         neigh_hh_init(neigh, dst);
1252
1253                 do {
1254                         seq = read_seqbegin(&neigh->ha_lock);
1255                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1256                                               neigh->ha, NULL, skb->len);
1257                 } while (read_seqretry(&neigh->ha_lock, seq));
1258
1259                 if (err >= 0)
1260                         rc = dev_queue_xmit(skb);
1261                 else
1262                         goto out_kfree_skb;
1263         }
1264 out:
1265         return rc;
1266 discard:
1267         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1268                       dst, neigh);
1269 out_kfree_skb:
1270         rc = -EINVAL;
1271         kfree_skb(skb);
1272         goto out;
1273 }
1274 EXPORT_SYMBOL(neigh_resolve_output);
1275
1276 /* As fast as possible without hh cache */
1277
1278 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1279 {
1280         struct net_device *dev = neigh->dev;
1281         unsigned int seq;
1282         int err;
1283
1284         __skb_pull(skb, skb_network_offset(skb));
1285
1286         do {
1287                 seq = read_seqbegin(&neigh->ha_lock);
1288                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1289                                       neigh->ha, NULL, skb->len);
1290         } while (read_seqretry(&neigh->ha_lock, seq));
1291
1292         if (err >= 0)
1293                 err = dev_queue_xmit(skb);
1294         else {
1295                 err = -EINVAL;
1296                 kfree_skb(skb);
1297         }
1298         return err;
1299 }
1300 EXPORT_SYMBOL(neigh_connected_output);
1301
1302 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1303 {
1304         return dev_queue_xmit(skb);
1305 }
1306 EXPORT_SYMBOL(neigh_direct_output);
1307
1308 static void neigh_proxy_process(unsigned long arg)
1309 {
1310         struct neigh_table *tbl = (struct neigh_table *)arg;
1311         long sched_next = 0;
1312         unsigned long now = jiffies;
1313         struct sk_buff *skb, *n;
1314
1315         spin_lock(&tbl->proxy_queue.lock);
1316
1317         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1318                 long tdif = NEIGH_CB(skb)->sched_next - now;
1319
1320                 if (tdif <= 0) {
1321                         struct net_device *dev = skb->dev;
1322
1323                         __skb_unlink(skb, &tbl->proxy_queue);
1324                         if (tbl->proxy_redo && netif_running(dev)) {
1325                                 rcu_read_lock();
1326                                 tbl->proxy_redo(skb);
1327                                 rcu_read_unlock();
1328                         } else {
1329                                 kfree_skb(skb);
1330                         }
1331
1332                         dev_put(dev);
1333                 } else if (!sched_next || tdif < sched_next)
1334                         sched_next = tdif;
1335         }
1336         del_timer(&tbl->proxy_timer);
1337         if (sched_next)
1338                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1339         spin_unlock(&tbl->proxy_queue.lock);
1340 }
1341
1342 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1343                     struct sk_buff *skb)
1344 {
1345         unsigned long now = jiffies;
1346         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1347
1348         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1349                 kfree_skb(skb);
1350                 return;
1351         }
1352
1353         NEIGH_CB(skb)->sched_next = sched_next;
1354         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1355
1356         spin_lock(&tbl->proxy_queue.lock);
1357         if (del_timer(&tbl->proxy_timer)) {
1358                 if (time_before(tbl->proxy_timer.expires, sched_next))
1359                         sched_next = tbl->proxy_timer.expires;
1360         }
1361         skb_dst_drop(skb);
1362         dev_hold(skb->dev);
1363         __skb_queue_tail(&tbl->proxy_queue, skb);
1364         mod_timer(&tbl->proxy_timer, sched_next);
1365         spin_unlock(&tbl->proxy_queue.lock);
1366 }
1367 EXPORT_SYMBOL(pneigh_enqueue);
1368
1369 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1370                                                       struct net *net, int ifindex)
1371 {
1372         struct neigh_parms *p;
1373
1374         for (p = &tbl->parms; p; p = p->next) {
1375                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1376                     (!p->dev && !ifindex))
1377                         return p;
1378         }
1379
1380         return NULL;
1381 }
1382
1383 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1384                                       struct neigh_table *tbl)
1385 {
1386         struct neigh_parms *p, *ref;
1387         struct net *net = dev_net(dev);
1388         const struct net_device_ops *ops = dev->netdev_ops;
1389
1390         ref = lookup_neigh_parms(tbl, net, 0);
1391         if (!ref)
1392                 return NULL;
1393
1394         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1395         if (p) {
1396                 p->tbl            = tbl;
1397                 atomic_set(&p->refcnt, 1);
1398                 p->reachable_time =
1399                                 neigh_rand_reach_time(p->base_reachable_time);
1400
1401                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1402                         kfree(p);
1403                         return NULL;
1404                 }
1405
1406                 dev_hold(dev);
1407                 p->dev = dev;
1408                 write_pnet(&p->net, hold_net(net));
1409                 p->sysctl_table = NULL;
1410                 write_lock_bh(&tbl->lock);
1411                 p->next         = tbl->parms.next;
1412                 tbl->parms.next = p;
1413                 write_unlock_bh(&tbl->lock);
1414         }
1415         return p;
1416 }
1417 EXPORT_SYMBOL(neigh_parms_alloc);
1418
1419 static void neigh_rcu_free_parms(struct rcu_head *head)
1420 {
1421         struct neigh_parms *parms =
1422                 container_of(head, struct neigh_parms, rcu_head);
1423
1424         neigh_parms_put(parms);
1425 }
1426
1427 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1428 {
1429         struct neigh_parms **p;
1430
1431         if (!parms || parms == &tbl->parms)
1432                 return;
1433         write_lock_bh(&tbl->lock);
1434         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1435                 if (*p == parms) {
1436                         *p = parms->next;
1437                         parms->dead = 1;
1438                         write_unlock_bh(&tbl->lock);
1439                         if (parms->dev)
1440                                 dev_put(parms->dev);
1441                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1442                         return;
1443                 }
1444         }
1445         write_unlock_bh(&tbl->lock);
1446         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1447 }
1448 EXPORT_SYMBOL(neigh_parms_release);
1449
1450 static void neigh_parms_destroy(struct neigh_parms *parms)
1451 {
1452         release_net(neigh_parms_net(parms));
1453         kfree(parms);
1454 }
1455
1456 static struct lock_class_key neigh_table_proxy_queue_class;
1457
1458 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1459 {
1460         unsigned long now = jiffies;
1461         unsigned long phsize;
1462
1463         write_pnet(&tbl->parms.net, &init_net);
1464         atomic_set(&tbl->parms.refcnt, 1);
1465         tbl->parms.reachable_time =
1466                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1467
1468         if (!tbl->kmem_cachep)
1469                 tbl->kmem_cachep =
1470                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1471                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1472                                           NULL);
1473         tbl->stats = alloc_percpu(struct neigh_statistics);
1474         if (!tbl->stats)
1475                 panic("cannot create neighbour cache statistics");
1476
1477 #ifdef CONFIG_PROC_FS
1478         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1479                               &neigh_stat_seq_fops, tbl))
1480                 panic("cannot create neighbour proc dir entry");
1481 #endif
1482
1483         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1484
1485         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1486         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1487
1488         if (!tbl->nht || !tbl->phash_buckets)
1489                 panic("cannot allocate neighbour cache hashes");
1490
1491         rwlock_init(&tbl->lock);
1492         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1493         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1494         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1495         skb_queue_head_init_class(&tbl->proxy_queue,
1496                         &neigh_table_proxy_queue_class);
1497
1498         tbl->last_flush = now;
1499         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1500 }
1501 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1502
1503 void neigh_table_init(struct neigh_table *tbl)
1504 {
1505         struct neigh_table *tmp;
1506
1507         neigh_table_init_no_netlink(tbl);
1508         write_lock(&neigh_tbl_lock);
1509         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1510                 if (tmp->family == tbl->family)
1511                         break;
1512         }
1513         tbl->next       = neigh_tables;
1514         neigh_tables    = tbl;
1515         write_unlock(&neigh_tbl_lock);
1516
1517         if (unlikely(tmp)) {
1518                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1519                        "family %d\n", tbl->family);
1520                 dump_stack();
1521         }
1522 }
1523 EXPORT_SYMBOL(neigh_table_init);
1524
1525 int neigh_table_clear(struct neigh_table *tbl)
1526 {
1527         struct neigh_table **tp;
1528
1529         /* It is not clean... Fix it to unload IPv6 module safely */
1530         cancel_delayed_work_sync(&tbl->gc_work);
1531         del_timer_sync(&tbl->proxy_timer);
1532         pneigh_queue_purge(&tbl->proxy_queue);
1533         neigh_ifdown(tbl, NULL);
1534         if (atomic_read(&tbl->entries))
1535                 printk(KERN_CRIT "neighbour leakage\n");
1536         write_lock(&neigh_tbl_lock);
1537         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1538                 if (*tp == tbl) {
1539                         *tp = tbl->next;
1540                         break;
1541                 }
1542         }
1543         write_unlock(&neigh_tbl_lock);
1544
1545         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1546                  neigh_hash_free_rcu);
1547         tbl->nht = NULL;
1548
1549         kfree(tbl->phash_buckets);
1550         tbl->phash_buckets = NULL;
1551
1552         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1553
1554         free_percpu(tbl->stats);
1555         tbl->stats = NULL;
1556
1557         kmem_cache_destroy(tbl->kmem_cachep);
1558         tbl->kmem_cachep = NULL;
1559
1560         return 0;
1561 }
1562 EXPORT_SYMBOL(neigh_table_clear);
1563
1564 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1565 {
1566         struct net *net = sock_net(skb->sk);
1567         struct ndmsg *ndm;
1568         struct nlattr *dst_attr;
1569         struct neigh_table *tbl;
1570         struct net_device *dev = NULL;
1571         int err = -EINVAL;
1572
1573         ASSERT_RTNL();
1574         if (nlmsg_len(nlh) < sizeof(*ndm))
1575                 goto out;
1576
1577         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1578         if (dst_attr == NULL)
1579                 goto out;
1580
1581         ndm = nlmsg_data(nlh);
1582         if (ndm->ndm_ifindex) {
1583                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1584                 if (dev == NULL) {
1585                         err = -ENODEV;
1586                         goto out;
1587                 }
1588         }
1589
1590         read_lock(&neigh_tbl_lock);
1591         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1592                 struct neighbour *neigh;
1593
1594                 if (tbl->family != ndm->ndm_family)
1595                         continue;
1596                 read_unlock(&neigh_tbl_lock);
1597
1598                 if (nla_len(dst_attr) < tbl->key_len)
1599                         goto out;
1600
1601                 if (ndm->ndm_flags & NTF_PROXY) {
1602                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1603                         goto out;
1604                 }
1605
1606                 if (dev == NULL)
1607                         goto out;
1608
1609                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1610                 if (neigh == NULL) {
1611                         err = -ENOENT;
1612                         goto out;
1613                 }
1614
1615                 err = neigh_update(neigh, NULL, NUD_FAILED,
1616                                    NEIGH_UPDATE_F_OVERRIDE |
1617                                    NEIGH_UPDATE_F_ADMIN);
1618                 neigh_release(neigh);
1619                 goto out;
1620         }
1621         read_unlock(&neigh_tbl_lock);
1622         err = -EAFNOSUPPORT;
1623
1624 out:
1625         return err;
1626 }
1627
1628 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1629 {
1630         struct net *net = sock_net(skb->sk);
1631         struct ndmsg *ndm;
1632         struct nlattr *tb[NDA_MAX+1];
1633         struct neigh_table *tbl;
1634         struct net_device *dev = NULL;
1635         int err;
1636
1637         ASSERT_RTNL();
1638         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1639         if (err < 0)
1640                 goto out;
1641
1642         err = -EINVAL;
1643         if (tb[NDA_DST] == NULL)
1644                 goto out;
1645
1646         ndm = nlmsg_data(nlh);
1647         if (ndm->ndm_ifindex) {
1648                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1649                 if (dev == NULL) {
1650                         err = -ENODEV;
1651                         goto out;
1652                 }
1653
1654                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1655                         goto out;
1656         }
1657
1658         read_lock(&neigh_tbl_lock);
1659         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1660                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1661                 struct neighbour *neigh;
1662                 void *dst, *lladdr;
1663
1664                 if (tbl->family != ndm->ndm_family)
1665                         continue;
1666                 read_unlock(&neigh_tbl_lock);
1667
1668                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1669                         goto out;
1670                 dst = nla_data(tb[NDA_DST]);
1671                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1672
1673                 if (ndm->ndm_flags & NTF_PROXY) {
1674                         struct pneigh_entry *pn;
1675
1676                         err = -ENOBUFS;
1677                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1678                         if (pn) {
1679                                 pn->flags = ndm->ndm_flags;
1680                                 err = 0;
1681                         }
1682                         goto out;
1683                 }
1684
1685                 if (dev == NULL)
1686                         goto out;
1687
1688                 neigh = neigh_lookup(tbl, dst, dev);
1689                 if (neigh == NULL) {
1690                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1691                                 err = -ENOENT;
1692                                 goto out;
1693                         }
1694
1695                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1696                         if (IS_ERR(neigh)) {
1697                                 err = PTR_ERR(neigh);
1698                                 goto out;
1699                         }
1700                 } else {
1701                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1702                                 err = -EEXIST;
1703                                 neigh_release(neigh);
1704                                 goto out;
1705                         }
1706
1707                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1708                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1709                 }
1710
1711                 if (ndm->ndm_flags & NTF_USE) {
1712                         neigh_event_send(neigh, NULL);
1713                         err = 0;
1714                 } else
1715                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1716                 neigh_release(neigh);
1717                 goto out;
1718         }
1719
1720         read_unlock(&neigh_tbl_lock);
1721         err = -EAFNOSUPPORT;
1722 out:
1723         return err;
1724 }
1725
1726 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1727 {
1728         struct nlattr *nest;
1729
1730         nest = nla_nest_start(skb, NDTA_PARMS);
1731         if (nest == NULL)
1732                 return -ENOBUFS;
1733
1734         if (parms->dev)
1735                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1736
1737         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1738         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1739         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1740         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1741         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1742         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1743         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1744         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1745                       parms->base_reachable_time);
1746         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1747         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1748         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1749         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1750         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1751         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1752
1753         return nla_nest_end(skb, nest);
1754
1755 nla_put_failure:
1756         nla_nest_cancel(skb, nest);
1757         return -EMSGSIZE;
1758 }
1759
1760 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1761                               u32 pid, u32 seq, int type, int flags)
1762 {
1763         struct nlmsghdr *nlh;
1764         struct ndtmsg *ndtmsg;
1765
1766         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1767         if (nlh == NULL)
1768                 return -EMSGSIZE;
1769
1770         ndtmsg = nlmsg_data(nlh);
1771
1772         read_lock_bh(&tbl->lock);
1773         ndtmsg->ndtm_family = tbl->family;
1774         ndtmsg->ndtm_pad1   = 0;
1775         ndtmsg->ndtm_pad2   = 0;
1776
1777         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1778         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1779         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1780         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1781         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1782
1783         {
1784                 unsigned long now = jiffies;
1785                 unsigned int flush_delta = now - tbl->last_flush;
1786                 unsigned int rand_delta = now - tbl->last_rand;
1787                 struct neigh_hash_table *nht;
1788                 struct ndt_config ndc = {
1789                         .ndtc_key_len           = tbl->key_len,
1790                         .ndtc_entry_size        = tbl->entry_size,
1791                         .ndtc_entries           = atomic_read(&tbl->entries),
1792                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1793                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1794                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1795                 };
1796
1797                 rcu_read_lock_bh();
1798                 nht = rcu_dereference_bh(tbl->nht);
1799                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1800                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1801                 rcu_read_unlock_bh();
1802
1803                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1804         }
1805
1806         {
1807                 int cpu;
1808                 struct ndt_stats ndst;
1809
1810                 memset(&ndst, 0, sizeof(ndst));
1811
1812                 for_each_possible_cpu(cpu) {
1813                         struct neigh_statistics *st;
1814
1815                         st = per_cpu_ptr(tbl->stats, cpu);
1816                         ndst.ndts_allocs                += st->allocs;
1817                         ndst.ndts_destroys              += st->destroys;
1818                         ndst.ndts_hash_grows            += st->hash_grows;
1819                         ndst.ndts_res_failed            += st->res_failed;
1820                         ndst.ndts_lookups               += st->lookups;
1821                         ndst.ndts_hits                  += st->hits;
1822                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1823                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1824                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1825                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1826                 }
1827
1828                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1829         }
1830
1831         BUG_ON(tbl->parms.dev);
1832         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1833                 goto nla_put_failure;
1834
1835         read_unlock_bh(&tbl->lock);
1836         return nlmsg_end(skb, nlh);
1837
1838 nla_put_failure:
1839         read_unlock_bh(&tbl->lock);
1840         nlmsg_cancel(skb, nlh);
1841         return -EMSGSIZE;
1842 }
1843
1844 static int neightbl_fill_param_info(struct sk_buff *skb,
1845                                     struct neigh_table *tbl,
1846                                     struct neigh_parms *parms,
1847                                     u32 pid, u32 seq, int type,
1848                                     unsigned int flags)
1849 {
1850         struct ndtmsg *ndtmsg;
1851         struct nlmsghdr *nlh;
1852
1853         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1854         if (nlh == NULL)
1855                 return -EMSGSIZE;
1856
1857         ndtmsg = nlmsg_data(nlh);
1858
1859         read_lock_bh(&tbl->lock);
1860         ndtmsg->ndtm_family = tbl->family;
1861         ndtmsg->ndtm_pad1   = 0;
1862         ndtmsg->ndtm_pad2   = 0;
1863
1864         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1865             neightbl_fill_parms(skb, parms) < 0)
1866                 goto errout;
1867
1868         read_unlock_bh(&tbl->lock);
1869         return nlmsg_end(skb, nlh);
1870 errout:
1871         read_unlock_bh(&tbl->lock);
1872         nlmsg_cancel(skb, nlh);
1873         return -EMSGSIZE;
1874 }
1875
1876 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1877         [NDTA_NAME]             = { .type = NLA_STRING },
1878         [NDTA_THRESH1]          = { .type = NLA_U32 },
1879         [NDTA_THRESH2]          = { .type = NLA_U32 },
1880         [NDTA_THRESH3]          = { .type = NLA_U32 },
1881         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1882         [NDTA_PARMS]            = { .type = NLA_NESTED },
1883 };
1884
1885 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1886         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1887         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1888         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1889         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1890         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1891         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1892         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1893         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1894         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1895         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1896         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1897         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1898         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1899 };
1900
1901 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1902 {
1903         struct net *net = sock_net(skb->sk);
1904         struct neigh_table *tbl;
1905         struct ndtmsg *ndtmsg;
1906         struct nlattr *tb[NDTA_MAX+1];
1907         int err;
1908
1909         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1910                           nl_neightbl_policy);
1911         if (err < 0)
1912                 goto errout;
1913
1914         if (tb[NDTA_NAME] == NULL) {
1915                 err = -EINVAL;
1916                 goto errout;
1917         }
1918
1919         ndtmsg = nlmsg_data(nlh);
1920         read_lock(&neigh_tbl_lock);
1921         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1922                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1923                         continue;
1924
1925                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1926                         break;
1927         }
1928
1929         if (tbl == NULL) {
1930                 err = -ENOENT;
1931                 goto errout_locked;
1932         }
1933
1934         /*
1935          * We acquire tbl->lock to be nice to the periodic timers and
1936          * make sure they always see a consistent set of values.
1937          */
1938         write_lock_bh(&tbl->lock);
1939
1940         if (tb[NDTA_PARMS]) {
1941                 struct nlattr *tbp[NDTPA_MAX+1];
1942                 struct neigh_parms *p;
1943                 int i, ifindex = 0;
1944
1945                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1946                                        nl_ntbl_parm_policy);
1947                 if (err < 0)
1948                         goto errout_tbl_lock;
1949
1950                 if (tbp[NDTPA_IFINDEX])
1951                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1952
1953                 p = lookup_neigh_parms(tbl, net, ifindex);
1954                 if (p == NULL) {
1955                         err = -ENOENT;
1956                         goto errout_tbl_lock;
1957                 }
1958
1959                 for (i = 1; i <= NDTPA_MAX; i++) {
1960                         if (tbp[i] == NULL)
1961                                 continue;
1962
1963                         switch (i) {
1964                         case NDTPA_QUEUE_LEN:
1965                                 p->queue_len = nla_get_u32(tbp[i]);
1966                                 break;
1967                         case NDTPA_PROXY_QLEN:
1968                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1969                                 break;
1970                         case NDTPA_APP_PROBES:
1971                                 p->app_probes = nla_get_u32(tbp[i]);
1972                                 break;
1973                         case NDTPA_UCAST_PROBES:
1974                                 p->ucast_probes = nla_get_u32(tbp[i]);
1975                                 break;
1976                         case NDTPA_MCAST_PROBES:
1977                                 p->mcast_probes = nla_get_u32(tbp[i]);
1978                                 break;
1979                         case NDTPA_BASE_REACHABLE_TIME:
1980                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
1981                                 break;
1982                         case NDTPA_GC_STALETIME:
1983                                 p->gc_staletime = nla_get_msecs(tbp[i]);
1984                                 break;
1985                         case NDTPA_DELAY_PROBE_TIME:
1986                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
1987                                 break;
1988                         case NDTPA_RETRANS_TIME:
1989                                 p->retrans_time = nla_get_msecs(tbp[i]);
1990                                 break;
1991                         case NDTPA_ANYCAST_DELAY:
1992                                 p->anycast_delay = nla_get_msecs(tbp[i]);
1993                                 break;
1994                         case NDTPA_PROXY_DELAY:
1995                                 p->proxy_delay = nla_get_msecs(tbp[i]);
1996                                 break;
1997                         case NDTPA_LOCKTIME:
1998                                 p->locktime = nla_get_msecs(tbp[i]);
1999                                 break;
2000                         }
2001                 }
2002         }
2003
2004         if (tb[NDTA_THRESH1])
2005                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2006
2007         if (tb[NDTA_THRESH2])
2008                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2009
2010         if (tb[NDTA_THRESH3])
2011                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2012
2013         if (tb[NDTA_GC_INTERVAL])
2014                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2015
2016         err = 0;
2017
2018 errout_tbl_lock:
2019         write_unlock_bh(&tbl->lock);
2020 errout_locked:
2021         read_unlock(&neigh_tbl_lock);
2022 errout:
2023         return err;
2024 }
2025
2026 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2027 {
2028         struct net *net = sock_net(skb->sk);
2029         int family, tidx, nidx = 0;
2030         int tbl_skip = cb->args[0];
2031         int neigh_skip = cb->args[1];
2032         struct neigh_table *tbl;
2033
2034         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2035
2036         read_lock(&neigh_tbl_lock);
2037         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2038                 struct neigh_parms *p;
2039
2040                 if (tidx < tbl_skip || (family && tbl->family != family))
2041                         continue;
2042
2043                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2044                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2045                                        NLM_F_MULTI) <= 0)
2046                         break;
2047
2048                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2049                         if (!net_eq(neigh_parms_net(p), net))
2050                                 continue;
2051
2052                         if (nidx < neigh_skip)
2053                                 goto next;
2054
2055                         if (neightbl_fill_param_info(skb, tbl, p,
2056                                                      NETLINK_CB(cb->skb).pid,
2057                                                      cb->nlh->nlmsg_seq,
2058                                                      RTM_NEWNEIGHTBL,
2059                                                      NLM_F_MULTI) <= 0)
2060                                 goto out;
2061                 next:
2062                         nidx++;
2063                 }
2064
2065                 neigh_skip = 0;
2066         }
2067 out:
2068         read_unlock(&neigh_tbl_lock);
2069         cb->args[0] = tidx;
2070         cb->args[1] = nidx;
2071
2072         return skb->len;
2073 }
2074
2075 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2076                            u32 pid, u32 seq, int type, unsigned int flags)
2077 {
2078         unsigned long now = jiffies;
2079         struct nda_cacheinfo ci;
2080         struct nlmsghdr *nlh;
2081         struct ndmsg *ndm;
2082
2083         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2084         if (nlh == NULL)
2085                 return -EMSGSIZE;
2086
2087         ndm = nlmsg_data(nlh);
2088         ndm->ndm_family  = neigh->ops->family;
2089         ndm->ndm_pad1    = 0;
2090         ndm->ndm_pad2    = 0;
2091         ndm->ndm_flags   = neigh->flags;
2092         ndm->ndm_type    = neigh->type;
2093         ndm->ndm_ifindex = neigh->dev->ifindex;
2094
2095         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2096
2097         read_lock_bh(&neigh->lock);
2098         ndm->ndm_state   = neigh->nud_state;
2099         if (neigh->nud_state & NUD_VALID) {
2100                 char haddr[MAX_ADDR_LEN];
2101
2102                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2103                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2104                         read_unlock_bh(&neigh->lock);
2105                         goto nla_put_failure;
2106                 }
2107         }
2108
2109         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2110         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2111         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2112         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2113         read_unlock_bh(&neigh->lock);
2114
2115         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2116         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2117
2118         return nlmsg_end(skb, nlh);
2119
2120 nla_put_failure:
2121         nlmsg_cancel(skb, nlh);
2122         return -EMSGSIZE;
2123 }
2124
2125 static void neigh_update_notify(struct neighbour *neigh)
2126 {
2127         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2128         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2129 }
2130
2131 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2132                             struct netlink_callback *cb)
2133 {
2134         struct net *net = sock_net(skb->sk);
2135         struct neighbour *n;
2136         int rc, h, s_h = cb->args[1];
2137         int idx, s_idx = idx = cb->args[2];
2138         struct neigh_hash_table *nht;
2139
2140         rcu_read_lock_bh();
2141         nht = rcu_dereference_bh(tbl->nht);
2142
2143         for (h = 0; h < (1 << nht->hash_shift); h++) {
2144                 if (h < s_h)
2145                         continue;
2146                 if (h > s_h)
2147                         s_idx = 0;
2148                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2149                      n != NULL;
2150                      n = rcu_dereference_bh(n->next)) {
2151                         if (!net_eq(dev_net(n->dev), net))
2152                                 continue;
2153                         if (idx < s_idx)
2154                                 goto next;
2155                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2156                                             cb->nlh->nlmsg_seq,
2157                                             RTM_NEWNEIGH,
2158                                             NLM_F_MULTI) <= 0) {
2159                                 rc = -1;
2160                                 goto out;
2161                         }
2162 next:
2163                         idx++;
2164                 }
2165         }
2166         rc = skb->len;
2167 out:
2168         rcu_read_unlock_bh();
2169         cb->args[1] = h;
2170         cb->args[2] = idx;
2171         return rc;
2172 }
2173
2174 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2175 {
2176         struct neigh_table *tbl;
2177         int t, family, s_t;
2178
2179         read_lock(&neigh_tbl_lock);
2180         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2181         s_t = cb->args[0];
2182
2183         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2184                 if (t < s_t || (family && tbl->family != family))
2185                         continue;
2186                 if (t > s_t)
2187                         memset(&cb->args[1], 0, sizeof(cb->args) -
2188                                                 sizeof(cb->args[0]));
2189                 if (neigh_dump_table(tbl, skb, cb) < 0)
2190                         break;
2191         }
2192         read_unlock(&neigh_tbl_lock);
2193
2194         cb->args[0] = t;
2195         return skb->len;
2196 }
2197
2198 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2199 {
2200         int chain;
2201         struct neigh_hash_table *nht;
2202
2203         rcu_read_lock_bh();
2204         nht = rcu_dereference_bh(tbl->nht);
2205
2206         read_lock(&tbl->lock); /* avoid resizes */
2207         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2208                 struct neighbour *n;
2209
2210                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2211                      n != NULL;
2212                      n = rcu_dereference_bh(n->next))
2213                         cb(n, cookie);
2214         }
2215         read_unlock(&tbl->lock);
2216         rcu_read_unlock_bh();
2217 }
2218 EXPORT_SYMBOL(neigh_for_each);
2219
2220 /* The tbl->lock must be held as a writer and BH disabled. */
2221 void __neigh_for_each_release(struct neigh_table *tbl,
2222                               int (*cb)(struct neighbour *))
2223 {
2224         int chain;
2225         struct neigh_hash_table *nht;
2226
2227         nht = rcu_dereference_protected(tbl->nht,
2228                                         lockdep_is_held(&tbl->lock));
2229         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2230                 struct neighbour *n;
2231                 struct neighbour __rcu **np;
2232
2233                 np = &nht->hash_buckets[chain];
2234                 while ((n = rcu_dereference_protected(*np,
2235                                         lockdep_is_held(&tbl->lock))) != NULL) {
2236                         int release;
2237
2238                         write_lock(&n->lock);
2239                         release = cb(n);
2240                         if (release) {
2241                                 rcu_assign_pointer(*np,
2242                                         rcu_dereference_protected(n->next,
2243                                                 lockdep_is_held(&tbl->lock)));
2244                                 n->dead = 1;
2245                         } else
2246                                 np = &n->next;
2247                         write_unlock(&n->lock);
2248                         if (release)
2249                                 neigh_cleanup_and_release(n);
2250                 }
2251         }
2252 }
2253 EXPORT_SYMBOL(__neigh_for_each_release);
2254
2255 #ifdef CONFIG_PROC_FS
2256
2257 static struct neighbour *neigh_get_first(struct seq_file *seq)
2258 {
2259         struct neigh_seq_state *state = seq->private;
2260         struct net *net = seq_file_net(seq);
2261         struct neigh_hash_table *nht = state->nht;
2262         struct neighbour *n = NULL;
2263         int bucket = state->bucket;
2264
2265         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2266         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2267                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2268
2269                 while (n) {
2270                         if (!net_eq(dev_net(n->dev), net))
2271                                 goto next;
2272                         if (state->neigh_sub_iter) {
2273                                 loff_t fakep = 0;
2274                                 void *v;
2275
2276                                 v = state->neigh_sub_iter(state, n, &fakep);
2277                                 if (!v)
2278                                         goto next;
2279                         }
2280                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2281                                 break;
2282                         if (n->nud_state & ~NUD_NOARP)
2283                                 break;
2284 next:
2285                         n = rcu_dereference_bh(n->next);
2286                 }
2287
2288                 if (n)
2289                         break;
2290         }
2291         state->bucket = bucket;
2292
2293         return n;
2294 }
2295
2296 static struct neighbour *neigh_get_next(struct seq_file *seq,
2297                                         struct neighbour *n,
2298                                         loff_t *pos)
2299 {
2300         struct neigh_seq_state *state = seq->private;
2301         struct net *net = seq_file_net(seq);
2302         struct neigh_hash_table *nht = state->nht;
2303
2304         if (state->neigh_sub_iter) {
2305                 void *v = state->neigh_sub_iter(state, n, pos);
2306                 if (v)
2307                         return n;
2308         }
2309         n = rcu_dereference_bh(n->next);
2310
2311         while (1) {
2312                 while (n) {
2313                         if (!net_eq(dev_net(n->dev), net))
2314                                 goto next;
2315                         if (state->neigh_sub_iter) {
2316                                 void *v = state->neigh_sub_iter(state, n, pos);
2317                                 if (v)
2318                                         return n;
2319                                 goto next;
2320                         }
2321                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2322                                 break;
2323
2324                         if (n->nud_state & ~NUD_NOARP)
2325                                 break;
2326 next:
2327                         n = rcu_dereference_bh(n->next);
2328                 }
2329
2330                 if (n)
2331                         break;
2332
2333                 if (++state->bucket >= (1 << nht->hash_shift))
2334                         break;
2335
2336                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2337         }
2338
2339         if (n && pos)
2340                 --(*pos);
2341         return n;
2342 }
2343
2344 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2345 {
2346         struct neighbour *n = neigh_get_first(seq);
2347
2348         if (n) {
2349                 --(*pos);
2350                 while (*pos) {
2351                         n = neigh_get_next(seq, n, pos);
2352                         if (!n)
2353                                 break;
2354                 }
2355         }
2356         return *pos ? NULL : n;
2357 }
2358
2359 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2360 {
2361         struct neigh_seq_state *state = seq->private;
2362         struct net *net = seq_file_net(seq);
2363         struct neigh_table *tbl = state->tbl;
2364         struct pneigh_entry *pn = NULL;
2365         int bucket = state->bucket;
2366
2367         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2368         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2369                 pn = tbl->phash_buckets[bucket];
2370                 while (pn && !net_eq(pneigh_net(pn), net))
2371                         pn = pn->next;
2372                 if (pn)
2373                         break;
2374         }
2375         state->bucket = bucket;
2376
2377         return pn;
2378 }
2379
2380 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2381                                             struct pneigh_entry *pn,
2382                                             loff_t *pos)
2383 {
2384         struct neigh_seq_state *state = seq->private;
2385         struct net *net = seq_file_net(seq);
2386         struct neigh_table *tbl = state->tbl;
2387
2388         pn = pn->next;
2389         while (!pn) {
2390                 if (++state->bucket > PNEIGH_HASHMASK)
2391                         break;
2392                 pn = tbl->phash_buckets[state->bucket];
2393                 while (pn && !net_eq(pneigh_net(pn), net))
2394                         pn = pn->next;
2395                 if (pn)
2396                         break;
2397         }
2398
2399         if (pn && pos)
2400                 --(*pos);
2401
2402         return pn;
2403 }
2404
2405 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2406 {
2407         struct pneigh_entry *pn = pneigh_get_first(seq);
2408
2409         if (pn) {
2410                 --(*pos);
2411                 while (*pos) {
2412                         pn = pneigh_get_next(seq, pn, pos);
2413                         if (!pn)
2414                                 break;
2415                 }
2416         }
2417         return *pos ? NULL : pn;
2418 }
2419
2420 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2421 {
2422         struct neigh_seq_state *state = seq->private;
2423         void *rc;
2424         loff_t idxpos = *pos;
2425
2426         rc = neigh_get_idx(seq, &idxpos);
2427         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2428                 rc = pneigh_get_idx(seq, &idxpos);
2429
2430         return rc;
2431 }
2432
2433 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2434         __acquires(rcu_bh)
2435 {
2436         struct neigh_seq_state *state = seq->private;
2437
2438         state->tbl = tbl;
2439         state->bucket = 0;
2440         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2441
2442         rcu_read_lock_bh();
2443         state->nht = rcu_dereference_bh(tbl->nht);
2444
2445         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2446 }
2447 EXPORT_SYMBOL(neigh_seq_start);
2448
2449 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2450 {
2451         struct neigh_seq_state *state;
2452         void *rc;
2453
2454         if (v == SEQ_START_TOKEN) {
2455                 rc = neigh_get_first(seq);
2456                 goto out;
2457         }
2458
2459         state = seq->private;
2460         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2461                 rc = neigh_get_next(seq, v, NULL);
2462                 if (rc)
2463                         goto out;
2464                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2465                         rc = pneigh_get_first(seq);
2466         } else {
2467                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2468                 rc = pneigh_get_next(seq, v, NULL);
2469         }
2470 out:
2471         ++(*pos);
2472         return rc;
2473 }
2474 EXPORT_SYMBOL(neigh_seq_next);
2475
2476 void neigh_seq_stop(struct seq_file *seq, void *v)
2477         __releases(rcu_bh)
2478 {
2479         rcu_read_unlock_bh();
2480 }
2481 EXPORT_SYMBOL(neigh_seq_stop);
2482
2483 /* statistics via seq_file */
2484
2485 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2486 {
2487         struct neigh_table *tbl = seq->private;
2488         int cpu;
2489
2490         if (*pos == 0)
2491                 return SEQ_START_TOKEN;
2492
2493         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2494                 if (!cpu_possible(cpu))
2495                         continue;
2496                 *pos = cpu+1;
2497                 return per_cpu_ptr(tbl->stats, cpu);
2498         }
2499         return NULL;
2500 }
2501
2502 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2503 {
2504         struct neigh_table *tbl = seq->private;
2505         int cpu;
2506
2507         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2508                 if (!cpu_possible(cpu))
2509                         continue;
2510                 *pos = cpu+1;
2511                 return per_cpu_ptr(tbl->stats, cpu);
2512         }
2513         return NULL;
2514 }
2515
2516 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2517 {
2518
2519 }
2520
2521 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2522 {
2523         struct neigh_table *tbl = seq->private;
2524         struct neigh_statistics *st = v;
2525
2526         if (v == SEQ_START_TOKEN) {
2527                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2528                 return 0;
2529         }
2530
2531         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2532                         "%08lx %08lx  %08lx %08lx %08lx\n",
2533                    atomic_read(&tbl->entries),
2534
2535                    st->allocs,
2536                    st->destroys,
2537                    st->hash_grows,
2538
2539                    st->lookups,
2540                    st->hits,
2541
2542                    st->res_failed,
2543
2544                    st->rcv_probes_mcast,
2545                    st->rcv_probes_ucast,
2546
2547                    st->periodic_gc_runs,
2548                    st->forced_gc_runs,
2549                    st->unres_discards
2550                    );
2551
2552         return 0;
2553 }
2554
2555 static const struct seq_operations neigh_stat_seq_ops = {
2556         .start  = neigh_stat_seq_start,
2557         .next   = neigh_stat_seq_next,
2558         .stop   = neigh_stat_seq_stop,
2559         .show   = neigh_stat_seq_show,
2560 };
2561
2562 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2563 {
2564         int ret = seq_open(file, &neigh_stat_seq_ops);
2565
2566         if (!ret) {
2567                 struct seq_file *sf = file->private_data;
2568                 sf->private = PDE(inode)->data;
2569         }
2570         return ret;
2571 };
2572
2573 static const struct file_operations neigh_stat_seq_fops = {
2574         .owner   = THIS_MODULE,
2575         .open    = neigh_stat_seq_open,
2576         .read    = seq_read,
2577         .llseek  = seq_lseek,
2578         .release = seq_release,
2579 };
2580
2581 #endif /* CONFIG_PROC_FS */
2582
2583 static inline size_t neigh_nlmsg_size(void)
2584 {
2585         return NLMSG_ALIGN(sizeof(struct ndmsg))
2586                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2587                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2588                + nla_total_size(sizeof(struct nda_cacheinfo))
2589                + nla_total_size(4); /* NDA_PROBES */
2590 }
2591
2592 static void __neigh_notify(struct neighbour *n, int type, int flags)
2593 {
2594         struct net *net = dev_net(n->dev);
2595         struct sk_buff *skb;
2596         int err = -ENOBUFS;
2597
2598         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2599         if (skb == NULL)
2600                 goto errout;
2601
2602         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2603         if (err < 0) {
2604                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2605                 WARN_ON(err == -EMSGSIZE);
2606                 kfree_skb(skb);
2607                 goto errout;
2608         }
2609         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2610         return;
2611 errout:
2612         if (err < 0)
2613                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2614 }
2615
2616 #ifdef CONFIG_ARPD
2617 void neigh_app_ns(struct neighbour *n)
2618 {
2619         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2620 }
2621 EXPORT_SYMBOL(neigh_app_ns);
2622 #endif /* CONFIG_ARPD */
2623
2624 #ifdef CONFIG_SYSCTL
2625
2626 #define NEIGH_VARS_MAX 19
2627
2628 static struct neigh_sysctl_table {
2629         struct ctl_table_header *sysctl_header;
2630         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2631         char *dev_name;
2632 } neigh_sysctl_template __read_mostly = {
2633         .neigh_vars = {
2634                 {
2635                         .procname       = "mcast_solicit",
2636                         .maxlen         = sizeof(int),
2637                         .mode           = 0644,
2638                         .proc_handler   = proc_dointvec,
2639                 },
2640                 {
2641                         .procname       = "ucast_solicit",
2642                         .maxlen         = sizeof(int),
2643                         .mode           = 0644,
2644                         .proc_handler   = proc_dointvec,
2645                 },
2646                 {
2647                         .procname       = "app_solicit",
2648                         .maxlen         = sizeof(int),
2649                         .mode           = 0644,
2650                         .proc_handler   = proc_dointvec,
2651                 },
2652                 {
2653                         .procname       = "retrans_time",
2654                         .maxlen         = sizeof(int),
2655                         .mode           = 0644,
2656                         .proc_handler   = proc_dointvec_userhz_jiffies,
2657                 },
2658                 {
2659                         .procname       = "base_reachable_time",
2660                         .maxlen         = sizeof(int),
2661                         .mode           = 0644,
2662                         .proc_handler   = proc_dointvec_jiffies,
2663                 },
2664                 {
2665                         .procname       = "delay_first_probe_time",
2666                         .maxlen         = sizeof(int),
2667                         .mode           = 0644,
2668                         .proc_handler   = proc_dointvec_jiffies,
2669                 },
2670                 {
2671                         .procname       = "gc_stale_time",
2672                         .maxlen         = sizeof(int),
2673                         .mode           = 0644,
2674                         .proc_handler   = proc_dointvec_jiffies,
2675                 },
2676                 {
2677                         .procname       = "unres_qlen",
2678                         .maxlen         = sizeof(int),
2679                         .mode           = 0644,
2680                         .proc_handler   = proc_dointvec,
2681                 },
2682                 {
2683                         .procname       = "proxy_qlen",
2684                         .maxlen         = sizeof(int),
2685                         .mode           = 0644,
2686                         .proc_handler   = proc_dointvec,
2687                 },
2688                 {
2689                         .procname       = "anycast_delay",
2690                         .maxlen         = sizeof(int),
2691                         .mode           = 0644,
2692                         .proc_handler   = proc_dointvec_userhz_jiffies,
2693                 },
2694                 {
2695                         .procname       = "proxy_delay",
2696                         .maxlen         = sizeof(int),
2697                         .mode           = 0644,
2698                         .proc_handler   = proc_dointvec_userhz_jiffies,
2699                 },
2700                 {
2701                         .procname       = "locktime",
2702                         .maxlen         = sizeof(int),
2703                         .mode           = 0644,
2704                         .proc_handler   = proc_dointvec_userhz_jiffies,
2705                 },
2706                 {
2707                         .procname       = "retrans_time_ms",
2708                         .maxlen         = sizeof(int),
2709                         .mode           = 0644,
2710                         .proc_handler   = proc_dointvec_ms_jiffies,
2711                 },
2712                 {
2713                         .procname       = "base_reachable_time_ms",
2714                         .maxlen         = sizeof(int),
2715                         .mode           = 0644,
2716                         .proc_handler   = proc_dointvec_ms_jiffies,
2717                 },
2718                 {
2719                         .procname       = "gc_interval",
2720                         .maxlen         = sizeof(int),
2721                         .mode           = 0644,
2722                         .proc_handler   = proc_dointvec_jiffies,
2723                 },
2724                 {
2725                         .procname       = "gc_thresh1",
2726                         .maxlen         = sizeof(int),
2727                         .mode           = 0644,
2728                         .proc_handler   = proc_dointvec,
2729                 },
2730                 {
2731                         .procname       = "gc_thresh2",
2732                         .maxlen         = sizeof(int),
2733                         .mode           = 0644,
2734                         .proc_handler   = proc_dointvec,
2735                 },
2736                 {
2737                         .procname       = "gc_thresh3",
2738                         .maxlen         = sizeof(int),
2739                         .mode           = 0644,
2740                         .proc_handler   = proc_dointvec,
2741                 },
2742                 {},
2743         },
2744 };
2745
2746 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2747                           char *p_name, proc_handler *handler)
2748 {
2749         struct neigh_sysctl_table *t;
2750         const char *dev_name_source = NULL;
2751
2752 #define NEIGH_CTL_PATH_ROOT     0
2753 #define NEIGH_CTL_PATH_PROTO    1
2754 #define NEIGH_CTL_PATH_NEIGH    2
2755 #define NEIGH_CTL_PATH_DEV      3
2756
2757         struct ctl_path neigh_path[] = {
2758                 { .procname = "net",     },
2759                 { .procname = "proto",   },
2760                 { .procname = "neigh",   },
2761                 { .procname = "default", },
2762                 { },
2763         };
2764
2765         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2766         if (!t)
2767                 goto err;
2768
2769         t->neigh_vars[0].data  = &p->mcast_probes;
2770         t->neigh_vars[1].data  = &p->ucast_probes;
2771         t->neigh_vars[2].data  = &p->app_probes;
2772         t->neigh_vars[3].data  = &p->retrans_time;
2773         t->neigh_vars[4].data  = &p->base_reachable_time;
2774         t->neigh_vars[5].data  = &p->delay_probe_time;
2775         t->neigh_vars[6].data  = &p->gc_staletime;
2776         t->neigh_vars[7].data  = &p->queue_len;
2777         t->neigh_vars[8].data  = &p->proxy_qlen;
2778         t->neigh_vars[9].data  = &p->anycast_delay;
2779         t->neigh_vars[10].data = &p->proxy_delay;
2780         t->neigh_vars[11].data = &p->locktime;
2781         t->neigh_vars[12].data  = &p->retrans_time;
2782         t->neigh_vars[13].data  = &p->base_reachable_time;
2783
2784         if (dev) {
2785                 dev_name_source = dev->name;
2786                 /* Terminate the table early */
2787                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2788         } else {
2789                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2790                 t->neigh_vars[14].data = (int *)(p + 1);
2791                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2792                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2793                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2794         }
2795
2796
2797         if (handler) {
2798                 /* RetransTime */
2799                 t->neigh_vars[3].proc_handler = handler;
2800                 t->neigh_vars[3].extra1 = dev;
2801                 /* ReachableTime */
2802                 t->neigh_vars[4].proc_handler = handler;
2803                 t->neigh_vars[4].extra1 = dev;
2804                 /* RetransTime (in milliseconds)*/
2805                 t->neigh_vars[12].proc_handler = handler;
2806                 t->neigh_vars[12].extra1 = dev;
2807                 /* ReachableTime (in milliseconds) */
2808                 t->neigh_vars[13].proc_handler = handler;
2809                 t->neigh_vars[13].extra1 = dev;
2810         }
2811
2812         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2813         if (!t->dev_name)
2814                 goto free;
2815
2816         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2817         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2818
2819         t->sysctl_header =
2820                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2821         if (!t->sysctl_header)
2822                 goto free_procname;
2823
2824         p->sysctl_table = t;
2825         return 0;
2826
2827 free_procname:
2828         kfree(t->dev_name);
2829 free:
2830         kfree(t);
2831 err:
2832         return -ENOBUFS;
2833 }
2834 EXPORT_SYMBOL(neigh_sysctl_register);
2835
2836 void neigh_sysctl_unregister(struct neigh_parms *p)
2837 {
2838         if (p->sysctl_table) {
2839                 struct neigh_sysctl_table *t = p->sysctl_table;
2840                 p->sysctl_table = NULL;
2841                 unregister_sysctl_table(t->sysctl_header);
2842                 kfree(t->dev_name);
2843                 kfree(t);
2844         }
2845 }
2846 EXPORT_SYMBOL(neigh_sysctl_unregister);
2847
2848 #endif  /* CONFIG_SYSCTL */
2849
2850 static int __init neigh_init(void)
2851 {
2852         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2853         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2854         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2855
2856         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2857                       NULL);
2858         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2859
2860         return 0;
2861 }
2862
2863 subsys_initcall(neigh_init);
2864