net: fix race on decreasing number of TX queues
[pandora-kernel.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 __skb_queue_purge(&n->arp_queue);
241                                 n->output = neigh_blackhole;
242                                 if (n->nud_state & NUD_VALID)
243                                         n->nud_state = NUD_NOARP;
244                                 else
245                                         n->nud_state = NUD_NONE;
246                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
247                         }
248                         write_unlock(&n->lock);
249                         neigh_cleanup_and_release(n);
250                 }
251         }
252 }
253
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256         write_lock_bh(&tbl->lock);
257         neigh_flush_dev(tbl, dev);
258         write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264         write_lock_bh(&tbl->lock);
265         neigh_flush_dev(tbl, dev);
266         pneigh_ifdown(tbl, dev);
267         write_unlock_bh(&tbl->lock);
268
269         del_timer_sync(&tbl->proxy_timer);
270         pneigh_queue_purge(&tbl->proxy_queue);
271         return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277         struct neighbour *n = NULL;
278         unsigned long now = jiffies;
279         int entries;
280
281         entries = atomic_inc_return(&tbl->entries) - 1;
282         if (entries >= tbl->gc_thresh3 ||
283             (entries >= tbl->gc_thresh2 &&
284              time_after(now, tbl->last_flush + 5 * HZ))) {
285                 if (!neigh_forced_gc(tbl) &&
286                     entries >= tbl->gc_thresh3)
287                         goto out_entries;
288         }
289
290         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291         if (!n)
292                 goto out_entries;
293
294         __skb_queue_head_init(&n->arp_queue);
295         rwlock_init(&n->lock);
296         seqlock_init(&n->ha_lock);
297         n->updated        = n->used = now;
298         n->nud_state      = NUD_NONE;
299         n->output         = neigh_blackhole;
300         seqlock_init(&n->hh.hh_lock);
301         n->parms          = neigh_parms_clone(&tbl->parms);
302         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303
304         NEIGH_CACHE_STAT_INC(tbl, allocs);
305         n->tbl            = tbl;
306         atomic_set(&n->refcnt, 1);
307         n->dead           = 1;
308 out:
309         return n;
310
311 out_entries:
312         atomic_dec(&tbl->entries);
313         goto out;
314 }
315
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
317 {
318         size_t size = (1 << shift) * sizeof(struct neighbour *);
319         struct neigh_hash_table *ret;
320         struct neighbour __rcu **buckets;
321
322         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323         if (!ret)
324                 return NULL;
325         if (size <= PAGE_SIZE)
326                 buckets = kzalloc(size, GFP_ATOMIC);
327         else
328                 buckets = (struct neighbour __rcu **)
329                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330                                            get_order(size));
331         if (!buckets) {
332                 kfree(ret);
333                 return NULL;
334         }
335         ret->hash_buckets = buckets;
336         ret->hash_shift = shift;
337         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338         ret->hash_rnd |= 1;
339         return ret;
340 }
341
342 static void neigh_hash_free_rcu(struct rcu_head *head)
343 {
344         struct neigh_hash_table *nht = container_of(head,
345                                                     struct neigh_hash_table,
346                                                     rcu);
347         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
348         struct neighbour __rcu **buckets = nht->hash_buckets;
349
350         if (size <= PAGE_SIZE)
351                 kfree(buckets);
352         else
353                 free_pages((unsigned long)buckets, get_order(size));
354         kfree(nht);
355 }
356
357 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
358                                                 unsigned long new_shift)
359 {
360         unsigned int i, hash;
361         struct neigh_hash_table *new_nht, *old_nht;
362
363         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
364
365         old_nht = rcu_dereference_protected(tbl->nht,
366                                             lockdep_is_held(&tbl->lock));
367         new_nht = neigh_hash_alloc(new_shift);
368         if (!new_nht)
369                 return old_nht;
370
371         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
372                 struct neighbour *n, *next;
373
374                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375                                                    lockdep_is_held(&tbl->lock));
376                      n != NULL;
377                      n = next) {
378                         hash = tbl->hash(n->primary_key, n->dev,
379                                          new_nht->hash_rnd);
380
381                         hash >>= (32 - new_nht->hash_shift);
382                         next = rcu_dereference_protected(n->next,
383                                                 lockdep_is_held(&tbl->lock));
384
385                         rcu_assign_pointer(n->next,
386                                            rcu_dereference_protected(
387                                                 new_nht->hash_buckets[hash],
388                                                 lockdep_is_held(&tbl->lock)));
389                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390                 }
391         }
392
393         rcu_assign_pointer(tbl->nht, new_nht);
394         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395         return new_nht;
396 }
397
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399                                struct net_device *dev)
400 {
401         struct neighbour *n;
402         int key_len = tbl->key_len;
403         u32 hash_val;
404         struct neigh_hash_table *nht;
405
406         NEIGH_CACHE_STAT_INC(tbl, lookups);
407
408         rcu_read_lock_bh();
409         nht = rcu_dereference_bh(tbl->nht);
410         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
411
412         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413              n != NULL;
414              n = rcu_dereference_bh(n->next)) {
415                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416                         if (!atomic_inc_not_zero(&n->refcnt))
417                                 n = NULL;
418                         NEIGH_CACHE_STAT_INC(tbl, hits);
419                         break;
420                 }
421         }
422
423         rcu_read_unlock_bh();
424         return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429                                      const void *pkey)
430 {
431         struct neighbour *n;
432         int key_len = tbl->key_len;
433         u32 hash_val;
434         struct neigh_hash_table *nht;
435
436         NEIGH_CACHE_STAT_INC(tbl, lookups);
437
438         rcu_read_lock_bh();
439         nht = rcu_dereference_bh(tbl->nht);
440         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
441
442         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443              n != NULL;
444              n = rcu_dereference_bh(n->next)) {
445                 if (!memcmp(n->primary_key, pkey, key_len) &&
446                     net_eq(dev_net(n->dev), net)) {
447                         if (!atomic_inc_not_zero(&n->refcnt))
448                                 n = NULL;
449                         NEIGH_CACHE_STAT_INC(tbl, hits);
450                         break;
451                 }
452         }
453
454         rcu_read_unlock_bh();
455         return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         u32 hash_val;
463         int key_len = tbl->key_len;
464         int error;
465         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466         struct neigh_hash_table *nht;
467
468         if (!n) {
469                 rc = ERR_PTR(-ENOBUFS);
470                 goto out;
471         }
472
473         memcpy(n->primary_key, pkey, key_len);
474         n->dev = dev;
475         dev_hold(dev);
476
477         /* Protocol specific setup. */
478         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
479                 rc = ERR_PTR(error);
480                 goto out_neigh_release;
481         }
482
483         /* Device specific setup. */
484         if (n->parms->neigh_setup &&
485             (error = n->parms->neigh_setup(n)) < 0) {
486                 rc = ERR_PTR(error);
487                 goto out_neigh_release;
488         }
489
490         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491
492         write_lock_bh(&tbl->lock);
493         nht = rcu_dereference_protected(tbl->nht,
494                                         lockdep_is_held(&tbl->lock));
495
496         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
497                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
498
499         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
500
501         if (n->parms->dead) {
502                 rc = ERR_PTR(-EINVAL);
503                 goto out_tbl_unlock;
504         }
505
506         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507                                             lockdep_is_held(&tbl->lock));
508              n1 != NULL;
509              n1 = rcu_dereference_protected(n1->next,
510                         lockdep_is_held(&tbl->lock))) {
511                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512                         neigh_hold(n1);
513                         rc = n1;
514                         goto out_tbl_unlock;
515                 }
516         }
517
518         n->dead = 0;
519         neigh_hold(n);
520         rcu_assign_pointer(n->next,
521                            rcu_dereference_protected(nht->hash_buckets[hash_val],
522                                                      lockdep_is_held(&tbl->lock)));
523         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524         write_unlock_bh(&tbl->lock);
525         NEIGH_PRINTK2("neigh %p is created.\n", n);
526         rc = n;
527 out:
528         return rc;
529 out_tbl_unlock:
530         write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532         neigh_release(n);
533         goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539         u32 hash_val = *(u32 *)(pkey + key_len - 4);
540         hash_val ^= (hash_val >> 16);
541         hash_val ^= hash_val >> 8;
542         hash_val ^= hash_val >> 4;
543         hash_val &= PNEIGH_HASHMASK;
544         return hash_val;
545 }
546
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548                                               struct net *net,
549                                               const void *pkey,
550                                               int key_len,
551                                               struct net_device *dev)
552 {
553         while (n) {
554                 if (!memcmp(n->key, pkey, key_len) &&
555                     net_eq(pneigh_net(n), net) &&
556                     (n->dev == dev || !n->dev))
557                         return n;
558                 n = n->next;
559         }
560         return NULL;
561 }
562
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564                 struct net *net, const void *pkey, struct net_device *dev)
565 {
566         int key_len = tbl->key_len;
567         u32 hash_val = pneigh_hash(pkey, key_len);
568
569         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570                                  net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575                                     struct net *net, const void *pkey,
576                                     struct net_device *dev, int creat)
577 {
578         struct pneigh_entry *n;
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         read_lock_bh(&tbl->lock);
583         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584                               net, pkey, key_len, dev);
585         read_unlock_bh(&tbl->lock);
586
587         if (n || !creat)
588                 goto out;
589
590         ASSERT_RTNL();
591
592         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593         if (!n)
594                 goto out;
595
596         write_pnet(&n->net, hold_net(net));
597         memcpy(n->key, pkey, key_len);
598         n->dev = dev;
599         if (dev)
600                 dev_hold(dev);
601
602         if (tbl->pconstructor && tbl->pconstructor(n)) {
603                 if (dev)
604                         dev_put(dev);
605                 release_net(net);
606                 kfree(n);
607                 n = NULL;
608                 goto out;
609         }
610
611         write_lock_bh(&tbl->lock);
612         n->next = tbl->phash_buckets[hash_val];
613         tbl->phash_buckets[hash_val] = n;
614         write_unlock_bh(&tbl->lock);
615 out:
616         return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619
620
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622                   struct net_device *dev)
623 {
624         struct pneigh_entry *n, **np;
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         write_lock_bh(&tbl->lock);
629         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630              np = &n->next) {
631                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632                     net_eq(pneigh_net(n), net)) {
633                         *np = n->next;
634                         write_unlock_bh(&tbl->lock);
635                         if (tbl->pdestructor)
636                                 tbl->pdestructor(n);
637                         if (n->dev)
638                                 dev_put(n->dev);
639                         release_net(pneigh_net(n));
640                         kfree(n);
641                         return 0;
642                 }
643         }
644         write_unlock_bh(&tbl->lock);
645         return -ENOENT;
646 }
647
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         u32 h;
652
653         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654                 np = &tbl->phash_buckets[h];
655                 while ((n = *np) != NULL) {
656                         if (!dev || n->dev == dev) {
657                                 *np = n->next;
658                                 if (tbl->pdestructor)
659                                         tbl->pdestructor(n);
660                                 if (n->dev)
661                                         dev_put(n->dev);
662                                 release_net(pneigh_net(n));
663                                 kfree(n);
664                                 continue;
665                         }
666                         np = &n->next;
667                 }
668         }
669         return -ENOENT;
670 }
671
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676         if (atomic_dec_and_test(&parms->refcnt))
677                 neigh_parms_destroy(parms);
678 }
679
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *      neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
693
694         if (!neigh->dead) {
695                 printk(KERN_WARNING
696                        "Destroying alive neighbour %p\n", neigh);
697                 dump_stack();
698                 return;
699         }
700
701         if (neigh_del_timer(neigh))
702                 printk(KERN_WARNING "Impossible event.\n");
703
704         write_lock_bh(&neigh->lock);
705         __skb_queue_purge(&neigh->arp_queue);
706         write_unlock_bh(&neigh->lock);
707
708         dev_put(neigh->dev);
709         neigh_parms_put(neigh->parms);
710
711         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
712
713         atomic_dec(&neigh->tbl->entries);
714         call_rcu(&neigh->rcu, neigh_destroy_rcu);
715 }
716 EXPORT_SYMBOL(neigh_destroy);
717
718 /* Neighbour state is suspicious;
719    disable fast path.
720
721    Called with write_locked neigh.
722  */
723 static void neigh_suspect(struct neighbour *neigh)
724 {
725         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
726
727         neigh->output = neigh->ops->output;
728 }
729
730 /* Neighbour state is OK;
731    enable fast path.
732
733    Called with write_locked neigh.
734  */
735 static void neigh_connect(struct neighbour *neigh)
736 {
737         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
738
739         neigh->output = neigh->ops->connected_output;
740 }
741
742 static void neigh_periodic_work(struct work_struct *work)
743 {
744         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
745         struct neighbour *n;
746         struct neighbour __rcu **np;
747         unsigned int i;
748         struct neigh_hash_table *nht;
749
750         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
751
752         write_lock_bh(&tbl->lock);
753         nht = rcu_dereference_protected(tbl->nht,
754                                         lockdep_is_held(&tbl->lock));
755
756         /*
757          *      periodically recompute ReachableTime from random function
758          */
759
760         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
761                 struct neigh_parms *p;
762                 tbl->last_rand = jiffies;
763                 for (p = &tbl->parms; p; p = p->next)
764                         p->reachable_time =
765                                 neigh_rand_reach_time(p->base_reachable_time);
766         }
767
768         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
769                 np = &nht->hash_buckets[i];
770
771                 while ((n = rcu_dereference_protected(*np,
772                                 lockdep_is_held(&tbl->lock))) != NULL) {
773                         unsigned int state;
774
775                         write_lock(&n->lock);
776
777                         state = n->nud_state;
778                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
779                                 write_unlock(&n->lock);
780                                 goto next_elt;
781                         }
782
783                         if (time_before(n->used, n->confirmed))
784                                 n->used = n->confirmed;
785
786                         if (atomic_read(&n->refcnt) == 1 &&
787                             (state == NUD_FAILED ||
788                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
789                                 *np = n->next;
790                                 n->dead = 1;
791                                 write_unlock(&n->lock);
792                                 neigh_cleanup_and_release(n);
793                                 continue;
794                         }
795                         write_unlock(&n->lock);
796
797 next_elt:
798                         np = &n->next;
799                 }
800                 /*
801                  * It's fine to release lock here, even if hash table
802                  * grows while we are preempted.
803                  */
804                 write_unlock_bh(&tbl->lock);
805                 cond_resched();
806                 write_lock_bh(&tbl->lock);
807                 nht = rcu_dereference_protected(tbl->nht,
808                                                 lockdep_is_held(&tbl->lock));
809         }
810         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
811          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
812          * base_reachable_time.
813          */
814         schedule_delayed_work(&tbl->gc_work,
815                               tbl->parms.base_reachable_time >> 1);
816         write_unlock_bh(&tbl->lock);
817 }
818
819 static __inline__ int neigh_max_probes(struct neighbour *n)
820 {
821         struct neigh_parms *p = n->parms;
822         return (n->nud_state & NUD_PROBE) ?
823                 p->ucast_probes :
824                 p->ucast_probes + p->app_probes + p->mcast_probes;
825 }
826
827 static void neigh_invalidate(struct neighbour *neigh)
828         __releases(neigh->lock)
829         __acquires(neigh->lock)
830 {
831         struct sk_buff *skb;
832
833         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
834         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
835         neigh->updated = jiffies;
836
837         /* It is very thin place. report_unreachable is very complicated
838            routine. Particularly, it can hit the same neighbour entry!
839
840            So that, we try to be accurate and avoid dead loop. --ANK
841          */
842         while (neigh->nud_state == NUD_FAILED &&
843                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
844                 write_unlock(&neigh->lock);
845                 neigh->ops->error_report(neigh, skb);
846                 write_lock(&neigh->lock);
847         }
848         __skb_queue_purge(&neigh->arp_queue);
849 }
850
851 static void neigh_probe(struct neighbour *neigh)
852         __releases(neigh->lock)
853 {
854         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
855         /* keep skb alive even if arp_queue overflows */
856         if (skb)
857                 skb = skb_copy(skb, GFP_ATOMIC);
858         write_unlock(&neigh->lock);
859         neigh->ops->solicit(neigh, skb);
860         atomic_inc(&neigh->probes);
861         kfree_skb(skb);
862 }
863
864 /* Called when a timer expires for a neighbour entry. */
865
866 static void neigh_timer_handler(unsigned long arg)
867 {
868         unsigned long now, next;
869         struct neighbour *neigh = (struct neighbour *)arg;
870         unsigned state;
871         int notify = 0;
872
873         write_lock(&neigh->lock);
874
875         state = neigh->nud_state;
876         now = jiffies;
877         next = now + HZ;
878
879         if (!(state & NUD_IN_TIMER))
880                 goto out;
881
882         if (state & NUD_REACHABLE) {
883                 if (time_before_eq(now,
884                                    neigh->confirmed + neigh->parms->reachable_time)) {
885                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
886                         next = neigh->confirmed + neigh->parms->reachable_time;
887                 } else if (time_before_eq(now,
888                                           neigh->used + neigh->parms->delay_probe_time)) {
889                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
890                         neigh->nud_state = NUD_DELAY;
891                         neigh->updated = jiffies;
892                         neigh_suspect(neigh);
893                         next = now + neigh->parms->delay_probe_time;
894                 } else {
895                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
896                         neigh->nud_state = NUD_STALE;
897                         neigh->updated = jiffies;
898                         neigh_suspect(neigh);
899                         notify = 1;
900                 }
901         } else if (state & NUD_DELAY) {
902                 if (time_before_eq(now,
903                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
904                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
905                         neigh->nud_state = NUD_REACHABLE;
906                         neigh->updated = jiffies;
907                         neigh_connect(neigh);
908                         notify = 1;
909                         next = neigh->confirmed + neigh->parms->reachable_time;
910                 } else {
911                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
912                         neigh->nud_state = NUD_PROBE;
913                         neigh->updated = jiffies;
914                         atomic_set(&neigh->probes, 0);
915                         next = now + neigh->parms->retrans_time;
916                 }
917         } else {
918                 /* NUD_PROBE|NUD_INCOMPLETE */
919                 next = now + neigh->parms->retrans_time;
920         }
921
922         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
923             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
924                 neigh->nud_state = NUD_FAILED;
925                 notify = 1;
926                 neigh_invalidate(neigh);
927         }
928
929         if (neigh->nud_state & NUD_IN_TIMER) {
930                 if (time_before(next, jiffies + HZ/2))
931                         next = jiffies + HZ/2;
932                 if (!mod_timer(&neigh->timer, next))
933                         neigh_hold(neigh);
934         }
935         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
936                 neigh_probe(neigh);
937         } else {
938 out:
939                 write_unlock(&neigh->lock);
940         }
941
942         if (notify)
943                 neigh_update_notify(neigh);
944
945         neigh_release(neigh);
946 }
947
948 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
949 {
950         int rc;
951         bool immediate_probe = false;
952
953         write_lock_bh(&neigh->lock);
954
955         rc = 0;
956         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
957                 goto out_unlock_bh;
958         if (neigh->dead)
959                 goto out_dead;
960
961         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
962                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
963                         unsigned long next, now = jiffies;
964
965                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
966                         neigh->nud_state     = NUD_INCOMPLETE;
967                         neigh->updated = now;
968                         next = now + max(neigh->parms->retrans_time, HZ/2);
969                         neigh_add_timer(neigh, next);
970                         immediate_probe = true;
971                 } else {
972                         neigh->nud_state = NUD_FAILED;
973                         neigh->updated = jiffies;
974                         write_unlock_bh(&neigh->lock);
975
976                         kfree_skb(skb);
977                         return 1;
978                 }
979         } else if (neigh->nud_state & NUD_STALE) {
980                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
981                 neigh->nud_state = NUD_DELAY;
982                 neigh->updated = jiffies;
983                 neigh_add_timer(neigh,
984                                 jiffies + neigh->parms->delay_probe_time);
985         }
986
987         if (neigh->nud_state == NUD_INCOMPLETE) {
988                 if (skb) {
989                         if (skb_queue_len(&neigh->arp_queue) >=
990                             neigh->parms->queue_len) {
991                                 struct sk_buff *buff;
992                                 buff = __skb_dequeue(&neigh->arp_queue);
993                                 kfree_skb(buff);
994                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
995                         }
996                         skb_dst_force(skb);
997                         __skb_queue_tail(&neigh->arp_queue, skb);
998                 }
999                 rc = 1;
1000         }
1001 out_unlock_bh:
1002         if (immediate_probe)
1003                 neigh_probe(neigh);
1004         else
1005                 write_unlock(&neigh->lock);
1006         local_bh_enable();
1007         return rc;
1008
1009 out_dead:
1010         if (neigh->nud_state & NUD_STALE)
1011                 goto out_unlock_bh;
1012         write_unlock_bh(&neigh->lock);
1013         kfree_skb(skb);
1014         return 1;
1015 }
1016 EXPORT_SYMBOL(__neigh_event_send);
1017
1018 static void neigh_update_hhs(struct neighbour *neigh)
1019 {
1020         struct hh_cache *hh;
1021         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1022                 = NULL;
1023
1024         if (neigh->dev->header_ops)
1025                 update = neigh->dev->header_ops->cache_update;
1026
1027         if (update) {
1028                 hh = &neigh->hh;
1029                 if (hh->hh_len) {
1030                         write_seqlock_bh(&hh->hh_lock);
1031                         update(hh, neigh->dev, neigh->ha);
1032                         write_sequnlock_bh(&hh->hh_lock);
1033                 }
1034         }
1035 }
1036
1037
1038
1039 /* Generic update routine.
1040    -- lladdr is new lladdr or NULL, if it is not supplied.
1041    -- new    is new state.
1042    -- flags
1043         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1044                                 if it is different.
1045         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1046                                 lladdr instead of overriding it
1047                                 if it is different.
1048                                 It also allows to retain current state
1049                                 if lladdr is unchanged.
1050         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1051
1052         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1053                                 NTF_ROUTER flag.
1054         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1055                                 a router.
1056
1057    Caller MUST hold reference count on the entry.
1058  */
1059
1060 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1061                  u32 flags)
1062 {
1063         u8 old;
1064         int err;
1065         int notify = 0;
1066         struct net_device *dev;
1067         int update_isrouter = 0;
1068
1069         write_lock_bh(&neigh->lock);
1070
1071         dev    = neigh->dev;
1072         old    = neigh->nud_state;
1073         err    = -EPERM;
1074
1075         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1076             (old & (NUD_NOARP | NUD_PERMANENT)))
1077                 goto out;
1078         if (neigh->dead)
1079                 goto out;
1080
1081         if (!(new & NUD_VALID)) {
1082                 neigh_del_timer(neigh);
1083                 if (old & NUD_CONNECTED)
1084                         neigh_suspect(neigh);
1085                 neigh->nud_state = new;
1086                 err = 0;
1087                 notify = old & NUD_VALID;
1088                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1089                     (new & NUD_FAILED)) {
1090                         neigh_invalidate(neigh);
1091                         notify = 1;
1092                 }
1093                 goto out;
1094         }
1095
1096         /* Compare new lladdr with cached one */
1097         if (!dev->addr_len) {
1098                 /* First case: device needs no address. */
1099                 lladdr = neigh->ha;
1100         } else if (lladdr) {
1101                 /* The second case: if something is already cached
1102                    and a new address is proposed:
1103                    - compare new & old
1104                    - if they are different, check override flag
1105                  */
1106                 if ((old & NUD_VALID) &&
1107                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1108                         lladdr = neigh->ha;
1109         } else {
1110                 /* No address is supplied; if we know something,
1111                    use it, otherwise discard the request.
1112                  */
1113                 err = -EINVAL;
1114                 if (!(old & NUD_VALID))
1115                         goto out;
1116                 lladdr = neigh->ha;
1117         }
1118
1119         if (new & NUD_CONNECTED)
1120                 neigh->confirmed = jiffies;
1121         neigh->updated = jiffies;
1122
1123         /* If entry was valid and address is not changed,
1124            do not change entry state, if new one is STALE.
1125          */
1126         err = 0;
1127         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1128         if (old & NUD_VALID) {
1129                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1130                         update_isrouter = 0;
1131                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1132                             (old & NUD_CONNECTED)) {
1133                                 lladdr = neigh->ha;
1134                                 new = NUD_STALE;
1135                         } else
1136                                 goto out;
1137                 } else {
1138                         if (lladdr == neigh->ha && new == NUD_STALE &&
1139                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1140                              (old & NUD_CONNECTED))
1141                             )
1142                                 new = old;
1143                 }
1144         }
1145
1146         if (new != old) {
1147                 neigh_del_timer(neigh);
1148                 if (new & NUD_IN_TIMER)
1149                         neigh_add_timer(neigh, (jiffies +
1150                                                 ((new & NUD_REACHABLE) ?
1151                                                  neigh->parms->reachable_time :
1152                                                  0)));
1153                 neigh->nud_state = new;
1154         }
1155
1156         if (lladdr != neigh->ha) {
1157                 write_seqlock(&neigh->ha_lock);
1158                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1159                 write_sequnlock(&neigh->ha_lock);
1160                 neigh_update_hhs(neigh);
1161                 if (!(new & NUD_CONNECTED))
1162                         neigh->confirmed = jiffies -
1163                                       (neigh->parms->base_reachable_time << 1);
1164                 notify = 1;
1165         }
1166         if (new == old)
1167                 goto out;
1168         if (new & NUD_CONNECTED)
1169                 neigh_connect(neigh);
1170         else
1171                 neigh_suspect(neigh);
1172         if (!(old & NUD_VALID)) {
1173                 struct sk_buff *skb;
1174
1175                 /* Again: avoid dead loop if something went wrong */
1176
1177                 while (neigh->nud_state & NUD_VALID &&
1178                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1179                         struct dst_entry *dst = skb_dst(skb);
1180                         struct neighbour *n2, *n1 = neigh;
1181                         write_unlock_bh(&neigh->lock);
1182
1183                         rcu_read_lock();
1184                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1185                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1186                                 n1 = n2;
1187                         n1->output(n1, skb);
1188                         rcu_read_unlock();
1189
1190                         write_lock_bh(&neigh->lock);
1191                 }
1192                 __skb_queue_purge(&neigh->arp_queue);
1193         }
1194 out:
1195         if (update_isrouter) {
1196                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1197                         (neigh->flags | NTF_ROUTER) :
1198                         (neigh->flags & ~NTF_ROUTER);
1199         }
1200         write_unlock_bh(&neigh->lock);
1201
1202         if (notify)
1203                 neigh_update_notify(neigh);
1204
1205         return err;
1206 }
1207 EXPORT_SYMBOL(neigh_update);
1208
1209 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1210                                  u8 *lladdr, void *saddr,
1211                                  struct net_device *dev)
1212 {
1213         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1214                                                  lladdr || !dev->addr_len);
1215         if (neigh)
1216                 neigh_update(neigh, lladdr, NUD_STALE,
1217                              NEIGH_UPDATE_F_OVERRIDE);
1218         return neigh;
1219 }
1220 EXPORT_SYMBOL(neigh_event_ns);
1221
1222 /* called with read_lock_bh(&n->lock); */
1223 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1224 {
1225         struct net_device *dev = dst->dev;
1226         __be16 prot = dst->ops->protocol;
1227         struct hh_cache *hh = &n->hh;
1228
1229         write_lock_bh(&n->lock);
1230
1231         /* Only one thread can come in here and initialize the
1232          * hh_cache entry.
1233          */
1234         if (!hh->hh_len)
1235                 dev->header_ops->cache(n, hh, prot);
1236
1237         write_unlock_bh(&n->lock);
1238 }
1239
1240 /* This function can be used in contexts, where only old dev_queue_xmit
1241  * worked, f.e. if you want to override normal output path (eql, shaper),
1242  * but resolution is not made yet.
1243  */
1244
1245 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1246 {
1247         struct net_device *dev = skb->dev;
1248
1249         __skb_pull(skb, skb_network_offset(skb));
1250
1251         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1252                             skb->len) < 0 &&
1253             dev->header_ops->rebuild(skb))
1254                 return 0;
1255
1256         return dev_queue_xmit(skb);
1257 }
1258 EXPORT_SYMBOL(neigh_compat_output);
1259
1260 /* Slow and careful. */
1261
1262 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1263 {
1264         struct dst_entry *dst = skb_dst(skb);
1265         int rc = 0;
1266
1267         if (!dst)
1268                 goto discard;
1269
1270         if (!neigh_event_send(neigh, skb)) {
1271                 int err;
1272                 struct net_device *dev = neigh->dev;
1273                 unsigned int seq;
1274
1275                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1276                         neigh_hh_init(neigh, dst);
1277
1278                 do {
1279                         __skb_pull(skb, skb_network_offset(skb));
1280                         seq = read_seqbegin(&neigh->ha_lock);
1281                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1282                                               neigh->ha, NULL, skb->len);
1283                 } while (read_seqretry(&neigh->ha_lock, seq));
1284
1285                 if (err >= 0)
1286                         rc = dev_queue_xmit(skb);
1287                 else
1288                         goto out_kfree_skb;
1289         }
1290 out:
1291         return rc;
1292 discard:
1293         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1294                       dst, neigh);
1295 out_kfree_skb:
1296         rc = -EINVAL;
1297         kfree_skb(skb);
1298         goto out;
1299 }
1300 EXPORT_SYMBOL(neigh_resolve_output);
1301
1302 /* As fast as possible without hh cache */
1303
1304 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1305 {
1306         struct net_device *dev = neigh->dev;
1307         unsigned int seq;
1308         int err;
1309
1310         do {
1311                 __skb_pull(skb, skb_network_offset(skb));
1312                 seq = read_seqbegin(&neigh->ha_lock);
1313                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1314                                       neigh->ha, NULL, skb->len);
1315         } while (read_seqretry(&neigh->ha_lock, seq));
1316
1317         if (err >= 0)
1318                 err = dev_queue_xmit(skb);
1319         else {
1320                 err = -EINVAL;
1321                 kfree_skb(skb);
1322         }
1323         return err;
1324 }
1325 EXPORT_SYMBOL(neigh_connected_output);
1326
1327 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1328 {
1329         return dev_queue_xmit(skb);
1330 }
1331 EXPORT_SYMBOL(neigh_direct_output);
1332
1333 static void neigh_proxy_process(unsigned long arg)
1334 {
1335         struct neigh_table *tbl = (struct neigh_table *)arg;
1336         long sched_next = 0;
1337         unsigned long now = jiffies;
1338         struct sk_buff *skb, *n;
1339
1340         spin_lock(&tbl->proxy_queue.lock);
1341
1342         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1343                 long tdif = NEIGH_CB(skb)->sched_next - now;
1344
1345                 if (tdif <= 0) {
1346                         struct net_device *dev = skb->dev;
1347
1348                         __skb_unlink(skb, &tbl->proxy_queue);
1349                         if (tbl->proxy_redo && netif_running(dev)) {
1350                                 rcu_read_lock();
1351                                 tbl->proxy_redo(skb);
1352                                 rcu_read_unlock();
1353                         } else {
1354                                 kfree_skb(skb);
1355                         }
1356
1357                         dev_put(dev);
1358                 } else if (!sched_next || tdif < sched_next)
1359                         sched_next = tdif;
1360         }
1361         del_timer(&tbl->proxy_timer);
1362         if (sched_next)
1363                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1364         spin_unlock(&tbl->proxy_queue.lock);
1365 }
1366
1367 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1368                     struct sk_buff *skb)
1369 {
1370         unsigned long now = jiffies;
1371         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1372
1373         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1374                 kfree_skb(skb);
1375                 return;
1376         }
1377
1378         NEIGH_CB(skb)->sched_next = sched_next;
1379         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1380
1381         spin_lock(&tbl->proxy_queue.lock);
1382         if (del_timer(&tbl->proxy_timer)) {
1383                 if (time_before(tbl->proxy_timer.expires, sched_next))
1384                         sched_next = tbl->proxy_timer.expires;
1385         }
1386         skb_dst_drop(skb);
1387         dev_hold(skb->dev);
1388         __skb_queue_tail(&tbl->proxy_queue, skb);
1389         mod_timer(&tbl->proxy_timer, sched_next);
1390         spin_unlock(&tbl->proxy_queue.lock);
1391 }
1392 EXPORT_SYMBOL(pneigh_enqueue);
1393
1394 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1395                                                       struct net *net, int ifindex)
1396 {
1397         struct neigh_parms *p;
1398
1399         for (p = &tbl->parms; p; p = p->next) {
1400                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1401                     (!p->dev && !ifindex))
1402                         return p;
1403         }
1404
1405         return NULL;
1406 }
1407
1408 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1409                                       struct neigh_table *tbl)
1410 {
1411         struct neigh_parms *p, *ref;
1412         struct net *net = dev_net(dev);
1413         const struct net_device_ops *ops = dev->netdev_ops;
1414
1415         ref = lookup_neigh_parms(tbl, net, 0);
1416         if (!ref)
1417                 return NULL;
1418
1419         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1420         if (p) {
1421                 p->tbl            = tbl;
1422                 atomic_set(&p->refcnt, 1);
1423                 p->reachable_time =
1424                                 neigh_rand_reach_time(p->base_reachable_time);
1425
1426                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1427                         kfree(p);
1428                         return NULL;
1429                 }
1430
1431                 dev_hold(dev);
1432                 p->dev = dev;
1433                 write_pnet(&p->net, hold_net(net));
1434                 p->sysctl_table = NULL;
1435                 write_lock_bh(&tbl->lock);
1436                 p->next         = tbl->parms.next;
1437                 tbl->parms.next = p;
1438                 write_unlock_bh(&tbl->lock);
1439         }
1440         return p;
1441 }
1442 EXPORT_SYMBOL(neigh_parms_alloc);
1443
1444 static void neigh_rcu_free_parms(struct rcu_head *head)
1445 {
1446         struct neigh_parms *parms =
1447                 container_of(head, struct neigh_parms, rcu_head);
1448
1449         neigh_parms_put(parms);
1450 }
1451
1452 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1453 {
1454         struct neigh_parms **p;
1455
1456         if (!parms || parms == &tbl->parms)
1457                 return;
1458         write_lock_bh(&tbl->lock);
1459         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1460                 if (*p == parms) {
1461                         *p = parms->next;
1462                         parms->dead = 1;
1463                         write_unlock_bh(&tbl->lock);
1464                         if (parms->dev)
1465                                 dev_put(parms->dev);
1466                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1467                         return;
1468                 }
1469         }
1470         write_unlock_bh(&tbl->lock);
1471         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1472 }
1473 EXPORT_SYMBOL(neigh_parms_release);
1474
1475 static void neigh_parms_destroy(struct neigh_parms *parms)
1476 {
1477         release_net(neigh_parms_net(parms));
1478         kfree(parms);
1479 }
1480
1481 static struct lock_class_key neigh_table_proxy_queue_class;
1482
1483 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1484 {
1485         unsigned long now = jiffies;
1486         unsigned long phsize;
1487
1488         write_pnet(&tbl->parms.net, &init_net);
1489         atomic_set(&tbl->parms.refcnt, 1);
1490         tbl->parms.reachable_time =
1491                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1492
1493         if (!tbl->kmem_cachep)
1494                 tbl->kmem_cachep =
1495                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1496                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1497                                           NULL);
1498         tbl->stats = alloc_percpu(struct neigh_statistics);
1499         if (!tbl->stats)
1500                 panic("cannot create neighbour cache statistics");
1501
1502 #ifdef CONFIG_PROC_FS
1503         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1504                               &neigh_stat_seq_fops, tbl))
1505                 panic("cannot create neighbour proc dir entry");
1506 #endif
1507
1508         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1509
1510         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1511         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1512
1513         if (!tbl->nht || !tbl->phash_buckets)
1514                 panic("cannot allocate neighbour cache hashes");
1515
1516         rwlock_init(&tbl->lock);
1517         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1518         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1519         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1520         skb_queue_head_init_class(&tbl->proxy_queue,
1521                         &neigh_table_proxy_queue_class);
1522
1523         tbl->last_flush = now;
1524         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1525 }
1526 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1527
1528 void neigh_table_init(struct neigh_table *tbl)
1529 {
1530         struct neigh_table *tmp;
1531
1532         neigh_table_init_no_netlink(tbl);
1533         write_lock(&neigh_tbl_lock);
1534         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1535                 if (tmp->family == tbl->family)
1536                         break;
1537         }
1538         tbl->next       = neigh_tables;
1539         neigh_tables    = tbl;
1540         write_unlock(&neigh_tbl_lock);
1541
1542         if (unlikely(tmp)) {
1543                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1544                        "family %d\n", tbl->family);
1545                 dump_stack();
1546         }
1547 }
1548 EXPORT_SYMBOL(neigh_table_init);
1549
1550 int neigh_table_clear(struct neigh_table *tbl)
1551 {
1552         struct neigh_table **tp;
1553
1554         /* It is not clean... Fix it to unload IPv6 module safely */
1555         cancel_delayed_work_sync(&tbl->gc_work);
1556         del_timer_sync(&tbl->proxy_timer);
1557         pneigh_queue_purge(&tbl->proxy_queue);
1558         neigh_ifdown(tbl, NULL);
1559         if (atomic_read(&tbl->entries))
1560                 printk(KERN_CRIT "neighbour leakage\n");
1561         write_lock(&neigh_tbl_lock);
1562         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1563                 if (*tp == tbl) {
1564                         *tp = tbl->next;
1565                         break;
1566                 }
1567         }
1568         write_unlock(&neigh_tbl_lock);
1569
1570         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1571                  neigh_hash_free_rcu);
1572         tbl->nht = NULL;
1573
1574         kfree(tbl->phash_buckets);
1575         tbl->phash_buckets = NULL;
1576
1577         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1578
1579         free_percpu(tbl->stats);
1580         tbl->stats = NULL;
1581
1582         kmem_cache_destroy(tbl->kmem_cachep);
1583         tbl->kmem_cachep = NULL;
1584
1585         return 0;
1586 }
1587 EXPORT_SYMBOL(neigh_table_clear);
1588
1589 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1590 {
1591         struct net *net = sock_net(skb->sk);
1592         struct ndmsg *ndm;
1593         struct nlattr *dst_attr;
1594         struct neigh_table *tbl;
1595         struct net_device *dev = NULL;
1596         int err = -EINVAL;
1597
1598         ASSERT_RTNL();
1599         if (nlmsg_len(nlh) < sizeof(*ndm))
1600                 goto out;
1601
1602         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1603         if (dst_attr == NULL)
1604                 goto out;
1605
1606         ndm = nlmsg_data(nlh);
1607         if (ndm->ndm_ifindex) {
1608                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1609                 if (dev == NULL) {
1610                         err = -ENODEV;
1611                         goto out;
1612                 }
1613         }
1614
1615         read_lock(&neigh_tbl_lock);
1616         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1617                 struct neighbour *neigh;
1618
1619                 if (tbl->family != ndm->ndm_family)
1620                         continue;
1621                 read_unlock(&neigh_tbl_lock);
1622
1623                 if (nla_len(dst_attr) < tbl->key_len)
1624                         goto out;
1625
1626                 if (ndm->ndm_flags & NTF_PROXY) {
1627                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1628                         goto out;
1629                 }
1630
1631                 if (dev == NULL)
1632                         goto out;
1633
1634                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1635                 if (neigh == NULL) {
1636                         err = -ENOENT;
1637                         goto out;
1638                 }
1639
1640                 err = neigh_update(neigh, NULL, NUD_FAILED,
1641                                    NEIGH_UPDATE_F_OVERRIDE |
1642                                    NEIGH_UPDATE_F_ADMIN);
1643                 neigh_release(neigh);
1644                 goto out;
1645         }
1646         read_unlock(&neigh_tbl_lock);
1647         err = -EAFNOSUPPORT;
1648
1649 out:
1650         return err;
1651 }
1652
1653 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1654 {
1655         struct net *net = sock_net(skb->sk);
1656         struct ndmsg *ndm;
1657         struct nlattr *tb[NDA_MAX+1];
1658         struct neigh_table *tbl;
1659         struct net_device *dev = NULL;
1660         int err;
1661
1662         ASSERT_RTNL();
1663         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1664         if (err < 0)
1665                 goto out;
1666
1667         err = -EINVAL;
1668         if (tb[NDA_DST] == NULL)
1669                 goto out;
1670
1671         ndm = nlmsg_data(nlh);
1672         if (ndm->ndm_ifindex) {
1673                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1674                 if (dev == NULL) {
1675                         err = -ENODEV;
1676                         goto out;
1677                 }
1678
1679                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1680                         goto out;
1681         }
1682
1683         read_lock(&neigh_tbl_lock);
1684         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1685                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1686                 struct neighbour *neigh;
1687                 void *dst, *lladdr;
1688
1689                 if (tbl->family != ndm->ndm_family)
1690                         continue;
1691                 read_unlock(&neigh_tbl_lock);
1692
1693                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1694                         goto out;
1695                 dst = nla_data(tb[NDA_DST]);
1696                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1697
1698                 if (ndm->ndm_flags & NTF_PROXY) {
1699                         struct pneigh_entry *pn;
1700
1701                         err = -ENOBUFS;
1702                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1703                         if (pn) {
1704                                 pn->flags = ndm->ndm_flags;
1705                                 err = 0;
1706                         }
1707                         goto out;
1708                 }
1709
1710                 if (dev == NULL)
1711                         goto out;
1712
1713                 neigh = neigh_lookup(tbl, dst, dev);
1714                 if (neigh == NULL) {
1715                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1716                                 err = -ENOENT;
1717                                 goto out;
1718                         }
1719
1720                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1721                         if (IS_ERR(neigh)) {
1722                                 err = PTR_ERR(neigh);
1723                                 goto out;
1724                         }
1725                 } else {
1726                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1727                                 err = -EEXIST;
1728                                 neigh_release(neigh);
1729                                 goto out;
1730                         }
1731
1732                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1733                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1734                 }
1735
1736                 if (ndm->ndm_flags & NTF_USE) {
1737                         neigh_event_send(neigh, NULL);
1738                         err = 0;
1739                 } else
1740                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1741                 neigh_release(neigh);
1742                 goto out;
1743         }
1744
1745         read_unlock(&neigh_tbl_lock);
1746         err = -EAFNOSUPPORT;
1747 out:
1748         return err;
1749 }
1750
1751 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1752 {
1753         struct nlattr *nest;
1754
1755         nest = nla_nest_start(skb, NDTA_PARMS);
1756         if (nest == NULL)
1757                 return -ENOBUFS;
1758
1759         if (parms->dev)
1760                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1761
1762         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1763         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1764         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1765         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1766         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1767         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1768         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1769         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1770                       parms->base_reachable_time);
1771         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1772         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1773         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1774         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1775         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1776         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1777
1778         return nla_nest_end(skb, nest);
1779
1780 nla_put_failure:
1781         nla_nest_cancel(skb, nest);
1782         return -EMSGSIZE;
1783 }
1784
1785 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1786                               u32 pid, u32 seq, int type, int flags)
1787 {
1788         struct nlmsghdr *nlh;
1789         struct ndtmsg *ndtmsg;
1790
1791         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1792         if (nlh == NULL)
1793                 return -EMSGSIZE;
1794
1795         ndtmsg = nlmsg_data(nlh);
1796
1797         read_lock_bh(&tbl->lock);
1798         ndtmsg->ndtm_family = tbl->family;
1799         ndtmsg->ndtm_pad1   = 0;
1800         ndtmsg->ndtm_pad2   = 0;
1801
1802         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1803         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1804         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1805         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1806         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1807
1808         {
1809                 unsigned long now = jiffies;
1810                 unsigned int flush_delta = now - tbl->last_flush;
1811                 unsigned int rand_delta = now - tbl->last_rand;
1812                 struct neigh_hash_table *nht;
1813                 struct ndt_config ndc = {
1814                         .ndtc_key_len           = tbl->key_len,
1815                         .ndtc_entry_size        = tbl->entry_size,
1816                         .ndtc_entries           = atomic_read(&tbl->entries),
1817                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1818                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1819                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1820                 };
1821
1822                 rcu_read_lock_bh();
1823                 nht = rcu_dereference_bh(tbl->nht);
1824                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1825                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1826                 rcu_read_unlock_bh();
1827
1828                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1829         }
1830
1831         {
1832                 int cpu;
1833                 struct ndt_stats ndst;
1834
1835                 memset(&ndst, 0, sizeof(ndst));
1836
1837                 for_each_possible_cpu(cpu) {
1838                         struct neigh_statistics *st;
1839
1840                         st = per_cpu_ptr(tbl->stats, cpu);
1841                         ndst.ndts_allocs                += st->allocs;
1842                         ndst.ndts_destroys              += st->destroys;
1843                         ndst.ndts_hash_grows            += st->hash_grows;
1844                         ndst.ndts_res_failed            += st->res_failed;
1845                         ndst.ndts_lookups               += st->lookups;
1846                         ndst.ndts_hits                  += st->hits;
1847                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1848                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1849                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1850                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1851                 }
1852
1853                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1854         }
1855
1856         BUG_ON(tbl->parms.dev);
1857         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1858                 goto nla_put_failure;
1859
1860         read_unlock_bh(&tbl->lock);
1861         return nlmsg_end(skb, nlh);
1862
1863 nla_put_failure:
1864         read_unlock_bh(&tbl->lock);
1865         nlmsg_cancel(skb, nlh);
1866         return -EMSGSIZE;
1867 }
1868
1869 static int neightbl_fill_param_info(struct sk_buff *skb,
1870                                     struct neigh_table *tbl,
1871                                     struct neigh_parms *parms,
1872                                     u32 pid, u32 seq, int type,
1873                                     unsigned int flags)
1874 {
1875         struct ndtmsg *ndtmsg;
1876         struct nlmsghdr *nlh;
1877
1878         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1879         if (nlh == NULL)
1880                 return -EMSGSIZE;
1881
1882         ndtmsg = nlmsg_data(nlh);
1883
1884         read_lock_bh(&tbl->lock);
1885         ndtmsg->ndtm_family = tbl->family;
1886         ndtmsg->ndtm_pad1   = 0;
1887         ndtmsg->ndtm_pad2   = 0;
1888
1889         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1890             neightbl_fill_parms(skb, parms) < 0)
1891                 goto errout;
1892
1893         read_unlock_bh(&tbl->lock);
1894         return nlmsg_end(skb, nlh);
1895 errout:
1896         read_unlock_bh(&tbl->lock);
1897         nlmsg_cancel(skb, nlh);
1898         return -EMSGSIZE;
1899 }
1900
1901 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1902         [NDTA_NAME]             = { .type = NLA_STRING },
1903         [NDTA_THRESH1]          = { .type = NLA_U32 },
1904         [NDTA_THRESH2]          = { .type = NLA_U32 },
1905         [NDTA_THRESH3]          = { .type = NLA_U32 },
1906         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1907         [NDTA_PARMS]            = { .type = NLA_NESTED },
1908 };
1909
1910 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1911         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1912         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1913         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1914         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1915         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1916         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1917         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1918         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1919         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1920         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1921         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1922         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1923         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1924 };
1925
1926 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1927 {
1928         struct net *net = sock_net(skb->sk);
1929         struct neigh_table *tbl;
1930         struct ndtmsg *ndtmsg;
1931         struct nlattr *tb[NDTA_MAX+1];
1932         int err;
1933
1934         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1935                           nl_neightbl_policy);
1936         if (err < 0)
1937                 goto errout;
1938
1939         if (tb[NDTA_NAME] == NULL) {
1940                 err = -EINVAL;
1941                 goto errout;
1942         }
1943
1944         ndtmsg = nlmsg_data(nlh);
1945         read_lock(&neigh_tbl_lock);
1946         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1947                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1948                         continue;
1949
1950                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1951                         break;
1952         }
1953
1954         if (tbl == NULL) {
1955                 err = -ENOENT;
1956                 goto errout_locked;
1957         }
1958
1959         /*
1960          * We acquire tbl->lock to be nice to the periodic timers and
1961          * make sure they always see a consistent set of values.
1962          */
1963         write_lock_bh(&tbl->lock);
1964
1965         if (tb[NDTA_PARMS]) {
1966                 struct nlattr *tbp[NDTPA_MAX+1];
1967                 struct neigh_parms *p;
1968                 int i, ifindex = 0;
1969
1970                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1971                                        nl_ntbl_parm_policy);
1972                 if (err < 0)
1973                         goto errout_tbl_lock;
1974
1975                 if (tbp[NDTPA_IFINDEX])
1976                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1977
1978                 p = lookup_neigh_parms(tbl, net, ifindex);
1979                 if (p == NULL) {
1980                         err = -ENOENT;
1981                         goto errout_tbl_lock;
1982                 }
1983
1984                 for (i = 1; i <= NDTPA_MAX; i++) {
1985                         if (tbp[i] == NULL)
1986                                 continue;
1987
1988                         switch (i) {
1989                         case NDTPA_QUEUE_LEN:
1990                                 p->queue_len = nla_get_u32(tbp[i]);
1991                                 break;
1992                         case NDTPA_PROXY_QLEN:
1993                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1994                                 break;
1995                         case NDTPA_APP_PROBES:
1996                                 p->app_probes = nla_get_u32(tbp[i]);
1997                                 break;
1998                         case NDTPA_UCAST_PROBES:
1999                                 p->ucast_probes = nla_get_u32(tbp[i]);
2000                                 break;
2001                         case NDTPA_MCAST_PROBES:
2002                                 p->mcast_probes = nla_get_u32(tbp[i]);
2003                                 break;
2004                         case NDTPA_BASE_REACHABLE_TIME:
2005                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2006                                 break;
2007                         case NDTPA_GC_STALETIME:
2008                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2009                                 break;
2010                         case NDTPA_DELAY_PROBE_TIME:
2011                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2012                                 break;
2013                         case NDTPA_RETRANS_TIME:
2014                                 p->retrans_time = nla_get_msecs(tbp[i]);
2015                                 break;
2016                         case NDTPA_ANYCAST_DELAY:
2017                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2018                                 break;
2019                         case NDTPA_PROXY_DELAY:
2020                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2021                                 break;
2022                         case NDTPA_LOCKTIME:
2023                                 p->locktime = nla_get_msecs(tbp[i]);
2024                                 break;
2025                         }
2026                 }
2027         }
2028
2029         if (tb[NDTA_THRESH1])
2030                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2031
2032         if (tb[NDTA_THRESH2])
2033                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2034
2035         if (tb[NDTA_THRESH3])
2036                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2037
2038         if (tb[NDTA_GC_INTERVAL])
2039                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2040
2041         err = 0;
2042
2043 errout_tbl_lock:
2044         write_unlock_bh(&tbl->lock);
2045 errout_locked:
2046         read_unlock(&neigh_tbl_lock);
2047 errout:
2048         return err;
2049 }
2050
2051 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2052 {
2053         struct net *net = sock_net(skb->sk);
2054         int family, tidx, nidx = 0;
2055         int tbl_skip = cb->args[0];
2056         int neigh_skip = cb->args[1];
2057         struct neigh_table *tbl;
2058
2059         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2060
2061         read_lock(&neigh_tbl_lock);
2062         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2063                 struct neigh_parms *p;
2064
2065                 if (tidx < tbl_skip || (family && tbl->family != family))
2066                         continue;
2067
2068                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2069                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2070                                        NLM_F_MULTI) <= 0)
2071                         break;
2072
2073                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2074                         if (!net_eq(neigh_parms_net(p), net))
2075                                 continue;
2076
2077                         if (nidx < neigh_skip)
2078                                 goto next;
2079
2080                         if (neightbl_fill_param_info(skb, tbl, p,
2081                                                      NETLINK_CB(cb->skb).pid,
2082                                                      cb->nlh->nlmsg_seq,
2083                                                      RTM_NEWNEIGHTBL,
2084                                                      NLM_F_MULTI) <= 0)
2085                                 goto out;
2086                 next:
2087                         nidx++;
2088                 }
2089
2090                 neigh_skip = 0;
2091         }
2092 out:
2093         read_unlock(&neigh_tbl_lock);
2094         cb->args[0] = tidx;
2095         cb->args[1] = nidx;
2096
2097         return skb->len;
2098 }
2099
2100 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2101                            u32 pid, u32 seq, int type, unsigned int flags)
2102 {
2103         unsigned long now = jiffies;
2104         struct nda_cacheinfo ci;
2105         struct nlmsghdr *nlh;
2106         struct ndmsg *ndm;
2107
2108         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2109         if (nlh == NULL)
2110                 return -EMSGSIZE;
2111
2112         ndm = nlmsg_data(nlh);
2113         ndm->ndm_family  = neigh->ops->family;
2114         ndm->ndm_pad1    = 0;
2115         ndm->ndm_pad2    = 0;
2116         ndm->ndm_flags   = neigh->flags;
2117         ndm->ndm_type    = neigh->type;
2118         ndm->ndm_ifindex = neigh->dev->ifindex;
2119
2120         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2121
2122         read_lock_bh(&neigh->lock);
2123         ndm->ndm_state   = neigh->nud_state;
2124         if (neigh->nud_state & NUD_VALID) {
2125                 char haddr[MAX_ADDR_LEN];
2126
2127                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2128                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2129                         read_unlock_bh(&neigh->lock);
2130                         goto nla_put_failure;
2131                 }
2132         }
2133
2134         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2135         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2136         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2137         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2138         read_unlock_bh(&neigh->lock);
2139
2140         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2141         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2142
2143         return nlmsg_end(skb, nlh);
2144
2145 nla_put_failure:
2146         nlmsg_cancel(skb, nlh);
2147         return -EMSGSIZE;
2148 }
2149
2150 static void neigh_update_notify(struct neighbour *neigh)
2151 {
2152         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2153         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2154 }
2155
2156 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2157                             struct netlink_callback *cb)
2158 {
2159         struct net *net = sock_net(skb->sk);
2160         struct neighbour *n;
2161         int rc, h, s_h = cb->args[1];
2162         int idx, s_idx = idx = cb->args[2];
2163         struct neigh_hash_table *nht;
2164
2165         rcu_read_lock_bh();
2166         nht = rcu_dereference_bh(tbl->nht);
2167
2168         for (h = 0; h < (1 << nht->hash_shift); h++) {
2169                 if (h < s_h)
2170                         continue;
2171                 if (h > s_h)
2172                         s_idx = 0;
2173                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2174                      n != NULL;
2175                      n = rcu_dereference_bh(n->next)) {
2176                         if (!net_eq(dev_net(n->dev), net))
2177                                 continue;
2178                         if (idx < s_idx)
2179                                 goto next;
2180                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2181                                             cb->nlh->nlmsg_seq,
2182                                             RTM_NEWNEIGH,
2183                                             NLM_F_MULTI) <= 0) {
2184                                 rc = -1;
2185                                 goto out;
2186                         }
2187 next:
2188                         idx++;
2189                 }
2190         }
2191         rc = skb->len;
2192 out:
2193         rcu_read_unlock_bh();
2194         cb->args[1] = h;
2195         cb->args[2] = idx;
2196         return rc;
2197 }
2198
2199 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2200 {
2201         struct neigh_table *tbl;
2202         int t, family, s_t;
2203
2204         read_lock(&neigh_tbl_lock);
2205         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2206         s_t = cb->args[0];
2207
2208         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2209                 if (t < s_t || (family && tbl->family != family))
2210                         continue;
2211                 if (t > s_t)
2212                         memset(&cb->args[1], 0, sizeof(cb->args) -
2213                                                 sizeof(cb->args[0]));
2214                 if (neigh_dump_table(tbl, skb, cb) < 0)
2215                         break;
2216         }
2217         read_unlock(&neigh_tbl_lock);
2218
2219         cb->args[0] = t;
2220         return skb->len;
2221 }
2222
2223 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2224 {
2225         int chain;
2226         struct neigh_hash_table *nht;
2227
2228         rcu_read_lock_bh();
2229         nht = rcu_dereference_bh(tbl->nht);
2230
2231         read_lock(&tbl->lock); /* avoid resizes */
2232         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2233                 struct neighbour *n;
2234
2235                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2236                      n != NULL;
2237                      n = rcu_dereference_bh(n->next))
2238                         cb(n, cookie);
2239         }
2240         read_unlock(&tbl->lock);
2241         rcu_read_unlock_bh();
2242 }
2243 EXPORT_SYMBOL(neigh_for_each);
2244
2245 /* The tbl->lock must be held as a writer and BH disabled. */
2246 void __neigh_for_each_release(struct neigh_table *tbl,
2247                               int (*cb)(struct neighbour *))
2248 {
2249         int chain;
2250         struct neigh_hash_table *nht;
2251
2252         nht = rcu_dereference_protected(tbl->nht,
2253                                         lockdep_is_held(&tbl->lock));
2254         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2255                 struct neighbour *n;
2256                 struct neighbour __rcu **np;
2257
2258                 np = &nht->hash_buckets[chain];
2259                 while ((n = rcu_dereference_protected(*np,
2260                                         lockdep_is_held(&tbl->lock))) != NULL) {
2261                         int release;
2262
2263                         write_lock(&n->lock);
2264                         release = cb(n);
2265                         if (release) {
2266                                 rcu_assign_pointer(*np,
2267                                         rcu_dereference_protected(n->next,
2268                                                 lockdep_is_held(&tbl->lock)));
2269                                 n->dead = 1;
2270                         } else
2271                                 np = &n->next;
2272                         write_unlock(&n->lock);
2273                         if (release)
2274                                 neigh_cleanup_and_release(n);
2275                 }
2276         }
2277 }
2278 EXPORT_SYMBOL(__neigh_for_each_release);
2279
2280 #ifdef CONFIG_PROC_FS
2281
2282 static struct neighbour *neigh_get_first(struct seq_file *seq)
2283 {
2284         struct neigh_seq_state *state = seq->private;
2285         struct net *net = seq_file_net(seq);
2286         struct neigh_hash_table *nht = state->nht;
2287         struct neighbour *n = NULL;
2288         int bucket = state->bucket;
2289
2290         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2291         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2292                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2293
2294                 while (n) {
2295                         if (!net_eq(dev_net(n->dev), net))
2296                                 goto next;
2297                         if (state->neigh_sub_iter) {
2298                                 loff_t fakep = 0;
2299                                 void *v;
2300
2301                                 v = state->neigh_sub_iter(state, n, &fakep);
2302                                 if (!v)
2303                                         goto next;
2304                         }
2305                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2306                                 break;
2307                         if (n->nud_state & ~NUD_NOARP)
2308                                 break;
2309 next:
2310                         n = rcu_dereference_bh(n->next);
2311                 }
2312
2313                 if (n)
2314                         break;
2315         }
2316         state->bucket = bucket;
2317
2318         return n;
2319 }
2320
2321 static struct neighbour *neigh_get_next(struct seq_file *seq,
2322                                         struct neighbour *n,
2323                                         loff_t *pos)
2324 {
2325         struct neigh_seq_state *state = seq->private;
2326         struct net *net = seq_file_net(seq);
2327         struct neigh_hash_table *nht = state->nht;
2328
2329         if (state->neigh_sub_iter) {
2330                 void *v = state->neigh_sub_iter(state, n, pos);
2331                 if (v)
2332                         return n;
2333         }
2334         n = rcu_dereference_bh(n->next);
2335
2336         while (1) {
2337                 while (n) {
2338                         if (!net_eq(dev_net(n->dev), net))
2339                                 goto next;
2340                         if (state->neigh_sub_iter) {
2341                                 void *v = state->neigh_sub_iter(state, n, pos);
2342                                 if (v)
2343                                         return n;
2344                                 goto next;
2345                         }
2346                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2347                                 break;
2348
2349                         if (n->nud_state & ~NUD_NOARP)
2350                                 break;
2351 next:
2352                         n = rcu_dereference_bh(n->next);
2353                 }
2354
2355                 if (n)
2356                         break;
2357
2358                 if (++state->bucket >= (1 << nht->hash_shift))
2359                         break;
2360
2361                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2362         }
2363
2364         if (n && pos)
2365                 --(*pos);
2366         return n;
2367 }
2368
2369 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2370 {
2371         struct neighbour *n = neigh_get_first(seq);
2372
2373         if (n) {
2374                 --(*pos);
2375                 while (*pos) {
2376                         n = neigh_get_next(seq, n, pos);
2377                         if (!n)
2378                                 break;
2379                 }
2380         }
2381         return *pos ? NULL : n;
2382 }
2383
2384 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2385 {
2386         struct neigh_seq_state *state = seq->private;
2387         struct net *net = seq_file_net(seq);
2388         struct neigh_table *tbl = state->tbl;
2389         struct pneigh_entry *pn = NULL;
2390         int bucket = state->bucket;
2391
2392         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2393         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2394                 pn = tbl->phash_buckets[bucket];
2395                 while (pn && !net_eq(pneigh_net(pn), net))
2396                         pn = pn->next;
2397                 if (pn)
2398                         break;
2399         }
2400         state->bucket = bucket;
2401
2402         return pn;
2403 }
2404
2405 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2406                                             struct pneigh_entry *pn,
2407                                             loff_t *pos)
2408 {
2409         struct neigh_seq_state *state = seq->private;
2410         struct net *net = seq_file_net(seq);
2411         struct neigh_table *tbl = state->tbl;
2412
2413         do {
2414                 pn = pn->next;
2415         } while (pn && !net_eq(pneigh_net(pn), net));
2416
2417         while (!pn) {
2418                 if (++state->bucket > PNEIGH_HASHMASK)
2419                         break;
2420                 pn = tbl->phash_buckets[state->bucket];
2421                 while (pn && !net_eq(pneigh_net(pn), net))
2422                         pn = pn->next;
2423                 if (pn)
2424                         break;
2425         }
2426
2427         if (pn && pos)
2428                 --(*pos);
2429
2430         return pn;
2431 }
2432
2433 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2434 {
2435         struct pneigh_entry *pn = pneigh_get_first(seq);
2436
2437         if (pn) {
2438                 --(*pos);
2439                 while (*pos) {
2440                         pn = pneigh_get_next(seq, pn, pos);
2441                         if (!pn)
2442                                 break;
2443                 }
2444         }
2445         return *pos ? NULL : pn;
2446 }
2447
2448 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2449 {
2450         struct neigh_seq_state *state = seq->private;
2451         void *rc;
2452         loff_t idxpos = *pos;
2453
2454         rc = neigh_get_idx(seq, &idxpos);
2455         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2456                 rc = pneigh_get_idx(seq, &idxpos);
2457
2458         return rc;
2459 }
2460
2461 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2462         __acquires(rcu_bh)
2463 {
2464         struct neigh_seq_state *state = seq->private;
2465
2466         state->tbl = tbl;
2467         state->bucket = 0;
2468         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2469
2470         rcu_read_lock_bh();
2471         state->nht = rcu_dereference_bh(tbl->nht);
2472
2473         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2474 }
2475 EXPORT_SYMBOL(neigh_seq_start);
2476
2477 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2478 {
2479         struct neigh_seq_state *state;
2480         void *rc;
2481
2482         if (v == SEQ_START_TOKEN) {
2483                 rc = neigh_get_first(seq);
2484                 goto out;
2485         }
2486
2487         state = seq->private;
2488         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2489                 rc = neigh_get_next(seq, v, NULL);
2490                 if (rc)
2491                         goto out;
2492                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2493                         rc = pneigh_get_first(seq);
2494         } else {
2495                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2496                 rc = pneigh_get_next(seq, v, NULL);
2497         }
2498 out:
2499         ++(*pos);
2500         return rc;
2501 }
2502 EXPORT_SYMBOL(neigh_seq_next);
2503
2504 void neigh_seq_stop(struct seq_file *seq, void *v)
2505         __releases(rcu_bh)
2506 {
2507         rcu_read_unlock_bh();
2508 }
2509 EXPORT_SYMBOL(neigh_seq_stop);
2510
2511 /* statistics via seq_file */
2512
2513 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2514 {
2515         struct neigh_table *tbl = seq->private;
2516         int cpu;
2517
2518         if (*pos == 0)
2519                 return SEQ_START_TOKEN;
2520
2521         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2522                 if (!cpu_possible(cpu))
2523                         continue;
2524                 *pos = cpu+1;
2525                 return per_cpu_ptr(tbl->stats, cpu);
2526         }
2527         return NULL;
2528 }
2529
2530 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2531 {
2532         struct neigh_table *tbl = seq->private;
2533         int cpu;
2534
2535         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2536                 if (!cpu_possible(cpu))
2537                         continue;
2538                 *pos = cpu+1;
2539                 return per_cpu_ptr(tbl->stats, cpu);
2540         }
2541         return NULL;
2542 }
2543
2544 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2545 {
2546
2547 }
2548
2549 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2550 {
2551         struct neigh_table *tbl = seq->private;
2552         struct neigh_statistics *st = v;
2553
2554         if (v == SEQ_START_TOKEN) {
2555                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2556                 return 0;
2557         }
2558
2559         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2560                         "%08lx %08lx  %08lx %08lx %08lx\n",
2561                    atomic_read(&tbl->entries),
2562
2563                    st->allocs,
2564                    st->destroys,
2565                    st->hash_grows,
2566
2567                    st->lookups,
2568                    st->hits,
2569
2570                    st->res_failed,
2571
2572                    st->rcv_probes_mcast,
2573                    st->rcv_probes_ucast,
2574
2575                    st->periodic_gc_runs,
2576                    st->forced_gc_runs,
2577                    st->unres_discards
2578                    );
2579
2580         return 0;
2581 }
2582
2583 static const struct seq_operations neigh_stat_seq_ops = {
2584         .start  = neigh_stat_seq_start,
2585         .next   = neigh_stat_seq_next,
2586         .stop   = neigh_stat_seq_stop,
2587         .show   = neigh_stat_seq_show,
2588 };
2589
2590 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2591 {
2592         int ret = seq_open(file, &neigh_stat_seq_ops);
2593
2594         if (!ret) {
2595                 struct seq_file *sf = file->private_data;
2596                 sf->private = PDE(inode)->data;
2597         }
2598         return ret;
2599 };
2600
2601 static const struct file_operations neigh_stat_seq_fops = {
2602         .owner   = THIS_MODULE,
2603         .open    = neigh_stat_seq_open,
2604         .read    = seq_read,
2605         .llseek  = seq_lseek,
2606         .release = seq_release,
2607 };
2608
2609 #endif /* CONFIG_PROC_FS */
2610
2611 static inline size_t neigh_nlmsg_size(void)
2612 {
2613         return NLMSG_ALIGN(sizeof(struct ndmsg))
2614                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2615                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2616                + nla_total_size(sizeof(struct nda_cacheinfo))
2617                + nla_total_size(4); /* NDA_PROBES */
2618 }
2619
2620 static void __neigh_notify(struct neighbour *n, int type, int flags)
2621 {
2622         struct net *net = dev_net(n->dev);
2623         struct sk_buff *skb;
2624         int err = -ENOBUFS;
2625
2626         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2627         if (skb == NULL)
2628                 goto errout;
2629
2630         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2631         if (err < 0) {
2632                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2633                 WARN_ON(err == -EMSGSIZE);
2634                 kfree_skb(skb);
2635                 goto errout;
2636         }
2637         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2638         return;
2639 errout:
2640         if (err < 0)
2641                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2642 }
2643
2644 #ifdef CONFIG_ARPD
2645 void neigh_app_ns(struct neighbour *n)
2646 {
2647         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2648 }
2649 EXPORT_SYMBOL(neigh_app_ns);
2650 #endif /* CONFIG_ARPD */
2651
2652 #ifdef CONFIG_SYSCTL
2653
2654 #define NEIGH_VARS_MAX 19
2655
2656 static struct neigh_sysctl_table {
2657         struct ctl_table_header *sysctl_header;
2658         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2659         char *dev_name;
2660 } neigh_sysctl_template __read_mostly = {
2661         .neigh_vars = {
2662                 {
2663                         .procname       = "mcast_solicit",
2664                         .maxlen         = sizeof(int),
2665                         .mode           = 0644,
2666                         .proc_handler   = proc_dointvec,
2667                 },
2668                 {
2669                         .procname       = "ucast_solicit",
2670                         .maxlen         = sizeof(int),
2671                         .mode           = 0644,
2672                         .proc_handler   = proc_dointvec,
2673                 },
2674                 {
2675                         .procname       = "app_solicit",
2676                         .maxlen         = sizeof(int),
2677                         .mode           = 0644,
2678                         .proc_handler   = proc_dointvec,
2679                 },
2680                 {
2681                         .procname       = "retrans_time",
2682                         .maxlen         = sizeof(int),
2683                         .mode           = 0644,
2684                         .proc_handler   = proc_dointvec_userhz_jiffies,
2685                 },
2686                 {
2687                         .procname       = "base_reachable_time",
2688                         .maxlen         = sizeof(int),
2689                         .mode           = 0644,
2690                         .proc_handler   = proc_dointvec_jiffies,
2691                 },
2692                 {
2693                         .procname       = "delay_first_probe_time",
2694                         .maxlen         = sizeof(int),
2695                         .mode           = 0644,
2696                         .proc_handler   = proc_dointvec_jiffies,
2697                 },
2698                 {
2699                         .procname       = "gc_stale_time",
2700                         .maxlen         = sizeof(int),
2701                         .mode           = 0644,
2702                         .proc_handler   = proc_dointvec_jiffies,
2703                 },
2704                 {
2705                         .procname       = "unres_qlen",
2706                         .maxlen         = sizeof(int),
2707                         .mode           = 0644,
2708                         .proc_handler   = proc_dointvec,
2709                 },
2710                 {
2711                         .procname       = "proxy_qlen",
2712                         .maxlen         = sizeof(int),
2713                         .mode           = 0644,
2714                         .proc_handler   = proc_dointvec,
2715                 },
2716                 {
2717                         .procname       = "anycast_delay",
2718                         .maxlen         = sizeof(int),
2719                         .mode           = 0644,
2720                         .proc_handler   = proc_dointvec_userhz_jiffies,
2721                 },
2722                 {
2723                         .procname       = "proxy_delay",
2724                         .maxlen         = sizeof(int),
2725                         .mode           = 0644,
2726                         .proc_handler   = proc_dointvec_userhz_jiffies,
2727                 },
2728                 {
2729                         .procname       = "locktime",
2730                         .maxlen         = sizeof(int),
2731                         .mode           = 0644,
2732                         .proc_handler   = proc_dointvec_userhz_jiffies,
2733                 },
2734                 {
2735                         .procname       = "retrans_time_ms",
2736                         .maxlen         = sizeof(int),
2737                         .mode           = 0644,
2738                         .proc_handler   = proc_dointvec_ms_jiffies,
2739                 },
2740                 {
2741                         .procname       = "base_reachable_time_ms",
2742                         .maxlen         = sizeof(int),
2743                         .mode           = 0644,
2744                         .proc_handler   = proc_dointvec_ms_jiffies,
2745                 },
2746                 {
2747                         .procname       = "gc_interval",
2748                         .maxlen         = sizeof(int),
2749                         .mode           = 0644,
2750                         .proc_handler   = proc_dointvec_jiffies,
2751                 },
2752                 {
2753                         .procname       = "gc_thresh1",
2754                         .maxlen         = sizeof(int),
2755                         .mode           = 0644,
2756                         .proc_handler   = proc_dointvec,
2757                 },
2758                 {
2759                         .procname       = "gc_thresh2",
2760                         .maxlen         = sizeof(int),
2761                         .mode           = 0644,
2762                         .proc_handler   = proc_dointvec,
2763                 },
2764                 {
2765                         .procname       = "gc_thresh3",
2766                         .maxlen         = sizeof(int),
2767                         .mode           = 0644,
2768                         .proc_handler   = proc_dointvec,
2769                 },
2770                 {},
2771         },
2772 };
2773
2774 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2775                           char *p_name, proc_handler *handler)
2776 {
2777         struct neigh_sysctl_table *t;
2778         const char *dev_name_source = NULL;
2779
2780 #define NEIGH_CTL_PATH_ROOT     0
2781 #define NEIGH_CTL_PATH_PROTO    1
2782 #define NEIGH_CTL_PATH_NEIGH    2
2783 #define NEIGH_CTL_PATH_DEV      3
2784
2785         struct ctl_path neigh_path[] = {
2786                 { .procname = "net",     },
2787                 { .procname = "proto",   },
2788                 { .procname = "neigh",   },
2789                 { .procname = "default", },
2790                 { },
2791         };
2792
2793         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2794         if (!t)
2795                 goto err;
2796
2797         t->neigh_vars[0].data  = &p->mcast_probes;
2798         t->neigh_vars[1].data  = &p->ucast_probes;
2799         t->neigh_vars[2].data  = &p->app_probes;
2800         t->neigh_vars[3].data  = &p->retrans_time;
2801         t->neigh_vars[4].data  = &p->base_reachable_time;
2802         t->neigh_vars[5].data  = &p->delay_probe_time;
2803         t->neigh_vars[6].data  = &p->gc_staletime;
2804         t->neigh_vars[7].data  = &p->queue_len;
2805         t->neigh_vars[8].data  = &p->proxy_qlen;
2806         t->neigh_vars[9].data  = &p->anycast_delay;
2807         t->neigh_vars[10].data = &p->proxy_delay;
2808         t->neigh_vars[11].data = &p->locktime;
2809         t->neigh_vars[12].data  = &p->retrans_time;
2810         t->neigh_vars[13].data  = &p->base_reachable_time;
2811
2812         if (dev) {
2813                 dev_name_source = dev->name;
2814                 /* Terminate the table early */
2815                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2816         } else {
2817                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2818                 t->neigh_vars[14].data = (int *)(p + 1);
2819                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2820                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2821                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2822         }
2823
2824
2825         if (handler) {
2826                 /* RetransTime */
2827                 t->neigh_vars[3].proc_handler = handler;
2828                 t->neigh_vars[3].extra1 = dev;
2829                 /* ReachableTime */
2830                 t->neigh_vars[4].proc_handler = handler;
2831                 t->neigh_vars[4].extra1 = dev;
2832                 /* RetransTime (in milliseconds)*/
2833                 t->neigh_vars[12].proc_handler = handler;
2834                 t->neigh_vars[12].extra1 = dev;
2835                 /* ReachableTime (in milliseconds) */
2836                 t->neigh_vars[13].proc_handler = handler;
2837                 t->neigh_vars[13].extra1 = dev;
2838         }
2839
2840         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2841         if (!t->dev_name)
2842                 goto free;
2843
2844         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2845         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2846
2847         t->sysctl_header =
2848                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2849         if (!t->sysctl_header)
2850                 goto free_procname;
2851
2852         p->sysctl_table = t;
2853         return 0;
2854
2855 free_procname:
2856         kfree(t->dev_name);
2857 free:
2858         kfree(t);
2859 err:
2860         return -ENOBUFS;
2861 }
2862 EXPORT_SYMBOL(neigh_sysctl_register);
2863
2864 void neigh_sysctl_unregister(struct neigh_parms *p)
2865 {
2866         if (p->sysctl_table) {
2867                 struct neigh_sysctl_table *t = p->sysctl_table;
2868                 p->sysctl_table = NULL;
2869                 unregister_sysctl_table(t->sysctl_header);
2870                 kfree(t->dev_name);
2871                 kfree(t);
2872         }
2873 }
2874 EXPORT_SYMBOL(neigh_sysctl_unregister);
2875
2876 #endif  /* CONFIG_SYSCTL */
2877
2878 static int __init neigh_init(void)
2879 {
2880         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2881         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2882         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2883
2884         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2885                       NULL);
2886         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2887
2888         return 0;
2889 }
2890
2891 subsys_initcall(neigh_init);
2892