xfrm: Reinject transport-mode packets through tasklet
[pandora-kernel.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 #include "fib_lookup.h"
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75         },
76 };
77
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79         .data = {
80                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85         },
86 };
87
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92         [IFA_LOCAL]             = { .type = NLA_U32 },
93         [IFA_ADDRESS]           = { .type = NLA_U32 },
94         [IFA_BROADCAST]         = { .type = NLA_U32 },
95         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97
98 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99  * value.  So if you change this define, make appropriate changes to
100  * inet_addr_hash as well.
101  */
102 #define IN4_ADDR_HSIZE  256
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109
110         return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111                 (IN4_ADDR_HSIZE - 1));
112 }
113
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 {
116         unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117
118         spin_lock(&inet_addr_hash_lock);
119         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120         spin_unlock(&inet_addr_hash_lock);
121 }
122
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125         spin_lock(&inet_addr_hash_lock);
126         hlist_del_init_rcu(&ifa->hash);
127         spin_unlock(&inet_addr_hash_lock);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         unsigned int hash = inet_addr_hash(net, addr);
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143         struct hlist_node *node;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147                 struct net_device *dev = ifa->ifa_dev->dev;
148
149                 if (!net_eq(dev_net(dev), net))
150                         continue;
151                 if (ifa->ifa_local == addr) {
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static inline void devinet_sysctl_register(struct in_device *idev)
187 {
188 }
189 static inline void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193
194 /* Locks all the inet devices. */
195
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204         if (ifa->ifa_dev)
205                 in_dev_put(ifa->ifa_dev);
206         kfree(ifa);
207 }
208
209 static inline void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216         struct net_device *dev = idev->dev;
217
218         WARN_ON(idev->ifa_list);
219         WARN_ON(idev->mc_list);
220 #ifdef NET_REFCNT_DEBUG
221         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
222                idev, dev ? dev->name : "NIL");
223 #endif
224         dev_put(dev);
225         if (!idev->dead)
226                 pr_err("Freeing alive in_device %p\n", idev);
227         else
228                 kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234         struct in_device *in_dev;
235
236         ASSERT_RTNL();
237
238         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239         if (!in_dev)
240                 goto out;
241         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242                         sizeof(in_dev->cnf));
243         in_dev->cnf.sysctl = NULL;
244         in_dev->dev = dev;
245         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246         if (!in_dev->arp_parms)
247                 goto out_kfree;
248         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249                 dev_disable_lro(dev);
250         /* Reference in_dev->dev */
251         dev_hold(dev);
252         /* Account for reference dev->ip_ptr (below) */
253         in_dev_hold(in_dev);
254
255         devinet_sysctl_register(in_dev);
256         ip_mc_init_dev(in_dev);
257         if (dev->flags & IFF_UP)
258                 ip_mc_up(in_dev);
259
260         /* we can receive as soon as ip_ptr is set -- do this last */
261         rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263         return in_dev;
264 out_kfree:
265         kfree(in_dev);
266         in_dev = NULL;
267         goto out;
268 }
269
270 static void in_dev_rcu_put(struct rcu_head *head)
271 {
272         struct in_device *idev = container_of(head, struct in_device, rcu_head);
273         in_dev_put(idev);
274 }
275
276 static void inetdev_destroy(struct in_device *in_dev)
277 {
278         struct in_ifaddr *ifa;
279         struct net_device *dev;
280
281         ASSERT_RTNL();
282
283         dev = in_dev->dev;
284
285         in_dev->dead = 1;
286
287         ip_mc_destroy_dev(in_dev);
288
289         while ((ifa = in_dev->ifa_list) != NULL) {
290                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291                 inet_free_ifa(ifa);
292         }
293
294         RCU_INIT_POINTER(dev->ip_ptr, NULL);
295
296         devinet_sysctl_unregister(in_dev);
297         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298         arp_ifdown(dev);
299
300         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 }
302
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 {
305         rcu_read_lock();
306         for_primary_ifa(in_dev) {
307                 if (inet_ifa_match(a, ifa)) {
308                         if (!b || inet_ifa_match(b, ifa)) {
309                                 rcu_read_unlock();
310                                 return 1;
311                         }
312                 }
313         } endfor_ifa(in_dev);
314         rcu_read_unlock();
315         return 0;
316 }
317
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319                          int destroy, struct nlmsghdr *nlh, u32 pid)
320 {
321         struct in_ifaddr *promote = NULL;
322         struct in_ifaddr *ifa, *ifa1 = *ifap;
323         struct in_ifaddr *last_prim = in_dev->ifa_list;
324         struct in_ifaddr *prev_prom = NULL;
325         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326
327         ASSERT_RTNL();
328
329         if (in_dev->dead)
330                 goto no_promotions;
331
332         /* 1. Deleting primary ifaddr forces deletion all secondaries
333          * unless alias promotion is set
334          **/
335
336         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338
339                 while ((ifa = *ifap1) != NULL) {
340                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341                             ifa1->ifa_scope <= ifa->ifa_scope)
342                                 last_prim = ifa;
343
344                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345                             ifa1->ifa_mask != ifa->ifa_mask ||
346                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
347                                 ifap1 = &ifa->ifa_next;
348                                 prev_prom = ifa;
349                                 continue;
350                         }
351
352                         if (!do_promote) {
353                                 inet_hash_remove(ifa);
354                                 *ifap1 = ifa->ifa_next;
355
356                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
357                                 blocking_notifier_call_chain(&inetaddr_chain,
358                                                 NETDEV_DOWN, ifa);
359                                 inet_free_ifa(ifa);
360                         } else {
361                                 promote = ifa;
362                                 break;
363                         }
364                 }
365         }
366
367         /* On promotion all secondaries from subnet are changing
368          * the primary IP, we must remove all their routes silently
369          * and later to add them back with new prefsrc. Do this
370          * while all addresses are on the device list.
371          */
372         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373                 if (ifa1->ifa_mask == ifa->ifa_mask &&
374                     inet_ifa_match(ifa1->ifa_address, ifa))
375                         fib_del_ifaddr(ifa, ifa1);
376         }
377
378 no_promotions:
379         /* 2. Unlink it */
380
381         *ifap = ifa1->ifa_next;
382         inet_hash_remove(ifa1);
383
384         /* 3. Announce address deletion */
385
386         /* Send message first, then call notifier.
387            At first sight, FIB update triggered by notifier
388            will refer to already deleted ifaddr, that could confuse
389            netlink listeners. It is not true: look, gated sees
390            that route deleted and if it still thinks that ifaddr
391            is valid, it will try to restore deleted routes... Grr.
392            So that, this order is correct.
393          */
394         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
395         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
396
397         if (promote) {
398                 struct in_ifaddr *next_sec = promote->ifa_next;
399
400                 if (prev_prom) {
401                         prev_prom->ifa_next = promote->ifa_next;
402                         promote->ifa_next = last_prim->ifa_next;
403                         last_prim->ifa_next = promote;
404                 }
405
406                 promote->ifa_flags &= ~IFA_F_SECONDARY;
407                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
408                 blocking_notifier_call_chain(&inetaddr_chain,
409                                 NETDEV_UP, promote);
410                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
411                         if (ifa1->ifa_mask != ifa->ifa_mask ||
412                             !inet_ifa_match(ifa1->ifa_address, ifa))
413                                         continue;
414                         fib_add_ifaddr(ifa);
415                 }
416
417         }
418         if (destroy)
419                 inet_free_ifa(ifa1);
420 }
421
422 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
423                          int destroy)
424 {
425         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
426 }
427
428 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
429                              u32 pid)
430 {
431         struct in_device *in_dev = ifa->ifa_dev;
432         struct in_ifaddr *ifa1, **ifap, **last_primary;
433
434         ASSERT_RTNL();
435
436         if (!ifa->ifa_local) {
437                 inet_free_ifa(ifa);
438                 return 0;
439         }
440
441         ifa->ifa_flags &= ~IFA_F_SECONDARY;
442         last_primary = &in_dev->ifa_list;
443
444         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
445              ifap = &ifa1->ifa_next) {
446                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
447                     ifa->ifa_scope <= ifa1->ifa_scope)
448                         last_primary = &ifa1->ifa_next;
449                 if (ifa1->ifa_mask == ifa->ifa_mask &&
450                     inet_ifa_match(ifa1->ifa_address, ifa)) {
451                         if (ifa1->ifa_local == ifa->ifa_local) {
452                                 inet_free_ifa(ifa);
453                                 return -EEXIST;
454                         }
455                         if (ifa1->ifa_scope != ifa->ifa_scope) {
456                                 inet_free_ifa(ifa);
457                                 return -EINVAL;
458                         }
459                         ifa->ifa_flags |= IFA_F_SECONDARY;
460                 }
461         }
462
463         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
464                 net_srandom(ifa->ifa_local);
465                 ifap = last_primary;
466         }
467
468         ifa->ifa_next = *ifap;
469         *ifap = ifa;
470
471         inet_hash_insert(dev_net(in_dev->dev), ifa);
472
473         /* Send message first, then call notifier.
474            Notifier will trigger FIB update, so that
475            listeners of netlink will know about new ifaddr */
476         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
477         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478
479         return 0;
480 }
481
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484         return __inet_insert_ifa(ifa, NULL, 0);
485 }
486
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489         struct in_device *in_dev = __in_dev_get_rtnl(dev);
490
491         ASSERT_RTNL();
492
493         if (!in_dev) {
494                 inet_free_ifa(ifa);
495                 return -ENOBUFS;
496         }
497         ipv4_devconf_setall(in_dev);
498         if (ifa->ifa_dev != in_dev) {
499                 WARN_ON(ifa->ifa_dev);
500                 in_dev_hold(in_dev);
501                 ifa->ifa_dev = in_dev;
502         }
503         if (ipv4_is_loopback(ifa->ifa_local))
504                 ifa->ifa_scope = RT_SCOPE_HOST;
505         return inet_insert_ifa(ifa);
506 }
507
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513         struct net_device *dev;
514         struct in_device *in_dev = NULL;
515
516         rcu_read_lock();
517         dev = dev_get_by_index_rcu(net, ifindex);
518         if (dev)
519                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520         rcu_read_unlock();
521         return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524
525 /* Called only from RTNL semaphored context. No locks. */
526
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528                                     __be32 mask)
529 {
530         ASSERT_RTNL();
531
532         for_primary_ifa(in_dev) {
533                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534                         return ifa;
535         } endfor_ifa(in_dev);
536         return NULL;
537 }
538
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
540 {
541         struct net *net = sock_net(skb->sk);
542         struct nlattr *tb[IFA_MAX+1];
543         struct in_device *in_dev;
544         struct ifaddrmsg *ifm;
545         struct in_ifaddr *ifa, **ifap;
546         int err = -EINVAL;
547
548         ASSERT_RTNL();
549
550         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551         if (err < 0)
552                 goto errout;
553
554         ifm = nlmsg_data(nlh);
555         in_dev = inetdev_by_index(net, ifm->ifa_index);
556         if (in_dev == NULL) {
557                 err = -ENODEV;
558                 goto errout;
559         }
560
561         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562              ifap = &ifa->ifa_next) {
563                 if (tb[IFA_LOCAL] &&
564                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565                         continue;
566
567                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568                         continue;
569
570                 if (tb[IFA_ADDRESS] &&
571                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573                         continue;
574
575                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
576                 return 0;
577         }
578
579         err = -EADDRNOTAVAIL;
580 errout:
581         return err;
582 }
583
584 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
585 {
586         struct nlattr *tb[IFA_MAX+1];
587         struct in_ifaddr *ifa;
588         struct ifaddrmsg *ifm;
589         struct net_device *dev;
590         struct in_device *in_dev;
591         int err;
592
593         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
594         if (err < 0)
595                 goto errout;
596
597         ifm = nlmsg_data(nlh);
598         err = -EINVAL;
599         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
600                 goto errout;
601
602         dev = __dev_get_by_index(net, ifm->ifa_index);
603         err = -ENODEV;
604         if (dev == NULL)
605                 goto errout;
606
607         in_dev = __in_dev_get_rtnl(dev);
608         err = -ENOBUFS;
609         if (in_dev == NULL)
610                 goto errout;
611
612         ifa = inet_alloc_ifa();
613         if (ifa == NULL)
614                 /*
615                  * A potential indev allocation can be left alive, it stays
616                  * assigned to its device and is destroy with it.
617                  */
618                 goto errout;
619
620         ipv4_devconf_setall(in_dev);
621         in_dev_hold(in_dev);
622
623         if (tb[IFA_ADDRESS] == NULL)
624                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
625
626         INIT_HLIST_NODE(&ifa->hash);
627         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
628         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
629         ifa->ifa_flags = ifm->ifa_flags;
630         ifa->ifa_scope = ifm->ifa_scope;
631         ifa->ifa_dev = in_dev;
632
633         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
634         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
635
636         if (tb[IFA_BROADCAST])
637                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
638
639         if (tb[IFA_LABEL])
640                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
641         else
642                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
643
644         return ifa;
645
646 errout:
647         return ERR_PTR(err);
648 }
649
650 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
651 {
652         struct net *net = sock_net(skb->sk);
653         struct in_ifaddr *ifa;
654
655         ASSERT_RTNL();
656
657         ifa = rtm_to_ifaddr(net, nlh);
658         if (IS_ERR(ifa))
659                 return PTR_ERR(ifa);
660
661         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
662 }
663
664 /*
665  *      Determine a default network mask, based on the IP address.
666  */
667
668 static inline int inet_abc_len(__be32 addr)
669 {
670         int rc = -1;    /* Something else, probably a multicast. */
671
672         if (ipv4_is_zeronet(addr))
673                 rc = 0;
674         else {
675                 __u32 haddr = ntohl(addr);
676
677                 if (IN_CLASSA(haddr))
678                         rc = 8;
679                 else if (IN_CLASSB(haddr))
680                         rc = 16;
681                 else if (IN_CLASSC(haddr))
682                         rc = 24;
683         }
684
685         return rc;
686 }
687
688
689 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
690 {
691         struct ifreq ifr;
692         struct sockaddr_in sin_orig;
693         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
694         struct in_device *in_dev;
695         struct in_ifaddr **ifap = NULL;
696         struct in_ifaddr *ifa = NULL;
697         struct net_device *dev;
698         char *colon;
699         int ret = -EFAULT;
700         int tryaddrmatch = 0;
701
702         /*
703          *      Fetch the caller's info block into kernel space
704          */
705
706         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
707                 goto out;
708         ifr.ifr_name[IFNAMSIZ - 1] = 0;
709
710         /* save original address for comparison */
711         memcpy(&sin_orig, sin, sizeof(*sin));
712
713         colon = strchr(ifr.ifr_name, ':');
714         if (colon)
715                 *colon = 0;
716
717         dev_load(net, ifr.ifr_name);
718
719         switch (cmd) {
720         case SIOCGIFADDR:       /* Get interface address */
721         case SIOCGIFBRDADDR:    /* Get the broadcast address */
722         case SIOCGIFDSTADDR:    /* Get the destination address */
723         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
724                 /* Note that these ioctls will not sleep,
725                    so that we do not impose a lock.
726                    One day we will be forced to put shlock here (I mean SMP)
727                  */
728                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
729                 memset(sin, 0, sizeof(*sin));
730                 sin->sin_family = AF_INET;
731                 break;
732
733         case SIOCSIFFLAGS:
734                 ret = -EACCES;
735                 if (!capable(CAP_NET_ADMIN))
736                         goto out;
737                 break;
738         case SIOCSIFADDR:       /* Set interface address (and family) */
739         case SIOCSIFBRDADDR:    /* Set the broadcast address */
740         case SIOCSIFDSTADDR:    /* Set the destination address */
741         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
742                 ret = -EACCES;
743                 if (!capable(CAP_NET_ADMIN))
744                         goto out;
745                 ret = -EINVAL;
746                 if (sin->sin_family != AF_INET)
747                         goto out;
748                 break;
749         default:
750                 ret = -EINVAL;
751                 goto out;
752         }
753
754         rtnl_lock();
755
756         ret = -ENODEV;
757         dev = __dev_get_by_name(net, ifr.ifr_name);
758         if (!dev)
759                 goto done;
760
761         if (colon)
762                 *colon = ':';
763
764         in_dev = __in_dev_get_rtnl(dev);
765         if (in_dev) {
766                 if (tryaddrmatch) {
767                         /* Matthias Andree */
768                         /* compare label and address (4.4BSD style) */
769                         /* note: we only do this for a limited set of ioctls
770                            and only if the original address family was AF_INET.
771                            This is checked above. */
772                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
773                              ifap = &ifa->ifa_next) {
774                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
775                                     sin_orig.sin_addr.s_addr ==
776                                                         ifa->ifa_local) {
777                                         break; /* found */
778                                 }
779                         }
780                 }
781                 /* we didn't get a match, maybe the application is
782                    4.3BSD-style and passed in junk so we fall back to
783                    comparing just the label */
784                 if (!ifa) {
785                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
786                              ifap = &ifa->ifa_next)
787                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
788                                         break;
789                 }
790         }
791
792         ret = -EADDRNOTAVAIL;
793         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
794                 goto done;
795
796         switch (cmd) {
797         case SIOCGIFADDR:       /* Get interface address */
798                 sin->sin_addr.s_addr = ifa->ifa_local;
799                 goto rarok;
800
801         case SIOCGIFBRDADDR:    /* Get the broadcast address */
802                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
803                 goto rarok;
804
805         case SIOCGIFDSTADDR:    /* Get the destination address */
806                 sin->sin_addr.s_addr = ifa->ifa_address;
807                 goto rarok;
808
809         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
810                 sin->sin_addr.s_addr = ifa->ifa_mask;
811                 goto rarok;
812
813         case SIOCSIFFLAGS:
814                 if (colon) {
815                         ret = -EADDRNOTAVAIL;
816                         if (!ifa)
817                                 break;
818                         ret = 0;
819                         if (!(ifr.ifr_flags & IFF_UP))
820                                 inet_del_ifa(in_dev, ifap, 1);
821                         break;
822                 }
823                 ret = dev_change_flags(dev, ifr.ifr_flags);
824                 break;
825
826         case SIOCSIFADDR:       /* Set interface address (and family) */
827                 ret = -EINVAL;
828                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
829                         break;
830
831                 if (!ifa) {
832                         ret = -ENOBUFS;
833                         ifa = inet_alloc_ifa();
834                         INIT_HLIST_NODE(&ifa->hash);
835                         if (!ifa)
836                                 break;
837                         if (colon)
838                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
839                         else
840                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
841                 } else {
842                         ret = 0;
843                         if (ifa->ifa_local == sin->sin_addr.s_addr)
844                                 break;
845                         inet_del_ifa(in_dev, ifap, 0);
846                         ifa->ifa_broadcast = 0;
847                         ifa->ifa_scope = 0;
848                 }
849
850                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
851
852                 if (!(dev->flags & IFF_POINTOPOINT)) {
853                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
854                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
855                         if ((dev->flags & IFF_BROADCAST) &&
856                             ifa->ifa_prefixlen < 31)
857                                 ifa->ifa_broadcast = ifa->ifa_address |
858                                                      ~ifa->ifa_mask;
859                 } else {
860                         ifa->ifa_prefixlen = 32;
861                         ifa->ifa_mask = inet_make_mask(32);
862                 }
863                 ret = inet_set_ifa(dev, ifa);
864                 break;
865
866         case SIOCSIFBRDADDR:    /* Set the broadcast address */
867                 ret = 0;
868                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
869                         inet_del_ifa(in_dev, ifap, 0);
870                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
871                         inet_insert_ifa(ifa);
872                 }
873                 break;
874
875         case SIOCSIFDSTADDR:    /* Set the destination address */
876                 ret = 0;
877                 if (ifa->ifa_address == sin->sin_addr.s_addr)
878                         break;
879                 ret = -EINVAL;
880                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
881                         break;
882                 ret = 0;
883                 inet_del_ifa(in_dev, ifap, 0);
884                 ifa->ifa_address = sin->sin_addr.s_addr;
885                 inet_insert_ifa(ifa);
886                 break;
887
888         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
889
890                 /*
891                  *      The mask we set must be legal.
892                  */
893                 ret = -EINVAL;
894                 if (bad_mask(sin->sin_addr.s_addr, 0))
895                         break;
896                 ret = 0;
897                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
898                         __be32 old_mask = ifa->ifa_mask;
899                         inet_del_ifa(in_dev, ifap, 0);
900                         ifa->ifa_mask = sin->sin_addr.s_addr;
901                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
902
903                         /* See if current broadcast address matches
904                          * with current netmask, then recalculate
905                          * the broadcast address. Otherwise it's a
906                          * funny address, so don't touch it since
907                          * the user seems to know what (s)he's doing...
908                          */
909                         if ((dev->flags & IFF_BROADCAST) &&
910                             (ifa->ifa_prefixlen < 31) &&
911                             (ifa->ifa_broadcast ==
912                              (ifa->ifa_local|~old_mask))) {
913                                 ifa->ifa_broadcast = (ifa->ifa_local |
914                                                       ~sin->sin_addr.s_addr);
915                         }
916                         inet_insert_ifa(ifa);
917                 }
918                 break;
919         }
920 done:
921         rtnl_unlock();
922 out:
923         return ret;
924 rarok:
925         rtnl_unlock();
926         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
927         goto out;
928 }
929
930 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
931 {
932         struct in_device *in_dev = __in_dev_get_rtnl(dev);
933         struct in_ifaddr *ifa;
934         struct ifreq ifr;
935         int done = 0;
936
937         if (!in_dev)
938                 goto out;
939
940         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
941                 if (!buf) {
942                         done += sizeof(ifr);
943                         continue;
944                 }
945                 if (len < (int) sizeof(ifr))
946                         break;
947                 memset(&ifr, 0, sizeof(struct ifreq));
948                 if (ifa->ifa_label)
949                         strcpy(ifr.ifr_name, ifa->ifa_label);
950                 else
951                         strcpy(ifr.ifr_name, dev->name);
952
953                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
954                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
955                                                                 ifa->ifa_local;
956
957                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
958                         done = -EFAULT;
959                         break;
960                 }
961                 buf  += sizeof(struct ifreq);
962                 len  -= sizeof(struct ifreq);
963                 done += sizeof(struct ifreq);
964         }
965 out:
966         return done;
967 }
968
969 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
970 {
971         __be32 addr = 0;
972         struct in_device *in_dev;
973         struct net *net = dev_net(dev);
974
975         rcu_read_lock();
976         in_dev = __in_dev_get_rcu(dev);
977         if (!in_dev)
978                 goto no_in_dev;
979
980         for_primary_ifa(in_dev) {
981                 if (ifa->ifa_scope > scope)
982                         continue;
983                 if (!dst || inet_ifa_match(dst, ifa)) {
984                         addr = ifa->ifa_local;
985                         break;
986                 }
987                 if (!addr)
988                         addr = ifa->ifa_local;
989         } endfor_ifa(in_dev);
990
991         if (addr)
992                 goto out_unlock;
993 no_in_dev:
994
995         /* Not loopback addresses on loopback should be preferred
996            in this case. It is importnat that lo is the first interface
997            in dev_base list.
998          */
999         for_each_netdev_rcu(net, dev) {
1000                 in_dev = __in_dev_get_rcu(dev);
1001                 if (!in_dev)
1002                         continue;
1003
1004                 for_primary_ifa(in_dev) {
1005                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1006                             ifa->ifa_scope <= scope) {
1007                                 addr = ifa->ifa_local;
1008                                 goto out_unlock;
1009                         }
1010                 } endfor_ifa(in_dev);
1011         }
1012 out_unlock:
1013         rcu_read_unlock();
1014         return addr;
1015 }
1016 EXPORT_SYMBOL(inet_select_addr);
1017
1018 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1019                               __be32 local, int scope)
1020 {
1021         int same = 0;
1022         __be32 addr = 0;
1023
1024         for_ifa(in_dev) {
1025                 if (!addr &&
1026                     (local == ifa->ifa_local || !local) &&
1027                     ifa->ifa_scope <= scope) {
1028                         addr = ifa->ifa_local;
1029                         if (same)
1030                                 break;
1031                 }
1032                 if (!same) {
1033                         same = (!local || inet_ifa_match(local, ifa)) &&
1034                                 (!dst || inet_ifa_match(dst, ifa));
1035                         if (same && addr) {
1036                                 if (local || !dst)
1037                                         break;
1038                                 /* Is the selected addr into dst subnet? */
1039                                 if (inet_ifa_match(addr, ifa))
1040                                         break;
1041                                 /* No, then can we use new local src? */
1042                                 if (ifa->ifa_scope <= scope) {
1043                                         addr = ifa->ifa_local;
1044                                         break;
1045                                 }
1046                                 /* search for large dst subnet for addr */
1047                                 same = 0;
1048                         }
1049                 }
1050         } endfor_ifa(in_dev);
1051
1052         return same ? addr : 0;
1053 }
1054
1055 /*
1056  * Confirm that local IP address exists using wildcards:
1057  * - in_dev: only on this interface, 0=any interface
1058  * - dst: only in the same subnet as dst, 0=any dst
1059  * - local: address, 0=autoselect the local address
1060  * - scope: maximum allowed scope value for the local address
1061  */
1062 __be32 inet_confirm_addr(struct in_device *in_dev,
1063                          __be32 dst, __be32 local, int scope)
1064 {
1065         __be32 addr = 0;
1066         struct net_device *dev;
1067         struct net *net;
1068
1069         if (scope != RT_SCOPE_LINK)
1070                 return confirm_addr_indev(in_dev, dst, local, scope);
1071
1072         net = dev_net(in_dev->dev);
1073         rcu_read_lock();
1074         for_each_netdev_rcu(net, dev) {
1075                 in_dev = __in_dev_get_rcu(dev);
1076                 if (in_dev) {
1077                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1078                         if (addr)
1079                                 break;
1080                 }
1081         }
1082         rcu_read_unlock();
1083
1084         return addr;
1085 }
1086
1087 /*
1088  *      Device notifier
1089  */
1090
1091 int register_inetaddr_notifier(struct notifier_block *nb)
1092 {
1093         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1094 }
1095 EXPORT_SYMBOL(register_inetaddr_notifier);
1096
1097 int unregister_inetaddr_notifier(struct notifier_block *nb)
1098 {
1099         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1100 }
1101 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1102
1103 /* Rename ifa_labels for a device name change. Make some effort to preserve
1104  * existing alias numbering and to create unique labels if possible.
1105 */
1106 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1107 {
1108         struct in_ifaddr *ifa;
1109         int named = 0;
1110
1111         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1112                 char old[IFNAMSIZ], *dot;
1113
1114                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1115                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1116                 if (named++ == 0)
1117                         goto skip;
1118                 dot = strchr(old, ':');
1119                 if (dot == NULL) {
1120                         sprintf(old, ":%d", named);
1121                         dot = old;
1122                 }
1123                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1124                         strcat(ifa->ifa_label, dot);
1125                 else
1126                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1127 skip:
1128                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1129         }
1130 }
1131
1132 static inline bool inetdev_valid_mtu(unsigned mtu)
1133 {
1134         return mtu >= 68;
1135 }
1136
1137 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1138                                         struct in_device *in_dev)
1139
1140 {
1141         struct in_ifaddr *ifa;
1142
1143         for (ifa = in_dev->ifa_list; ifa;
1144              ifa = ifa->ifa_next) {
1145                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1146                          ifa->ifa_local, dev,
1147                          ifa->ifa_local, NULL,
1148                          dev->dev_addr, NULL);
1149         }
1150 }
1151
1152 /* Called only under RTNL semaphore */
1153
1154 static int inetdev_event(struct notifier_block *this, unsigned long event,
1155                          void *ptr)
1156 {
1157         struct net_device *dev = ptr;
1158         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1159
1160         ASSERT_RTNL();
1161
1162         if (!in_dev) {
1163                 if (event == NETDEV_REGISTER) {
1164                         in_dev = inetdev_init(dev);
1165                         if (!in_dev)
1166                                 return notifier_from_errno(-ENOMEM);
1167                         if (dev->flags & IFF_LOOPBACK) {
1168                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1169                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1170                         }
1171                 } else if (event == NETDEV_CHANGEMTU) {
1172                         /* Re-enabling IP */
1173                         if (inetdev_valid_mtu(dev->mtu))
1174                                 in_dev = inetdev_init(dev);
1175                 }
1176                 goto out;
1177         }
1178
1179         switch (event) {
1180         case NETDEV_REGISTER:
1181                 printk(KERN_DEBUG "inetdev_event: bug\n");
1182                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1183                 break;
1184         case NETDEV_UP:
1185                 if (!inetdev_valid_mtu(dev->mtu))
1186                         break;
1187                 if (dev->flags & IFF_LOOPBACK) {
1188                         struct in_ifaddr *ifa = inet_alloc_ifa();
1189
1190                         if (ifa) {
1191                                 INIT_HLIST_NODE(&ifa->hash);
1192                                 ifa->ifa_local =
1193                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1194                                 ifa->ifa_prefixlen = 8;
1195                                 ifa->ifa_mask = inet_make_mask(8);
1196                                 in_dev_hold(in_dev);
1197                                 ifa->ifa_dev = in_dev;
1198                                 ifa->ifa_scope = RT_SCOPE_HOST;
1199                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1200                                 inet_insert_ifa(ifa);
1201                         }
1202                 }
1203                 ip_mc_up(in_dev);
1204                 /* fall through */
1205         case NETDEV_CHANGEADDR:
1206                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1207                         break;
1208                 /* fall through */
1209         case NETDEV_NOTIFY_PEERS:
1210                 /* Send gratuitous ARP to notify of link change */
1211                 inetdev_send_gratuitous_arp(dev, in_dev);
1212                 break;
1213         case NETDEV_DOWN:
1214                 ip_mc_down(in_dev);
1215                 break;
1216         case NETDEV_PRE_TYPE_CHANGE:
1217                 ip_mc_unmap(in_dev);
1218                 break;
1219         case NETDEV_POST_TYPE_CHANGE:
1220                 ip_mc_remap(in_dev);
1221                 break;
1222         case NETDEV_CHANGEMTU:
1223                 if (inetdev_valid_mtu(dev->mtu))
1224                         break;
1225                 /* disable IP when MTU is not enough */
1226         case NETDEV_UNREGISTER:
1227                 inetdev_destroy(in_dev);
1228                 break;
1229         case NETDEV_CHANGENAME:
1230                 /* Do not notify about label change, this event is
1231                  * not interesting to applications using netlink.
1232                  */
1233                 inetdev_changename(dev, in_dev);
1234
1235                 devinet_sysctl_unregister(in_dev);
1236                 devinet_sysctl_register(in_dev);
1237                 break;
1238         }
1239 out:
1240         return NOTIFY_DONE;
1241 }
1242
1243 static struct notifier_block ip_netdev_notifier = {
1244         .notifier_call = inetdev_event,
1245 };
1246
1247 static inline size_t inet_nlmsg_size(void)
1248 {
1249         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1250                + nla_total_size(4) /* IFA_ADDRESS */
1251                + nla_total_size(4) /* IFA_LOCAL */
1252                + nla_total_size(4) /* IFA_BROADCAST */
1253                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1254 }
1255
1256 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1257                             u32 pid, u32 seq, int event, unsigned int flags)
1258 {
1259         struct ifaddrmsg *ifm;
1260         struct nlmsghdr  *nlh;
1261
1262         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1263         if (nlh == NULL)
1264                 return -EMSGSIZE;
1265
1266         ifm = nlmsg_data(nlh);
1267         ifm->ifa_family = AF_INET;
1268         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1269         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1270         ifm->ifa_scope = ifa->ifa_scope;
1271         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1272
1273         if (ifa->ifa_address)
1274                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1275
1276         if (ifa->ifa_local)
1277                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1278
1279         if (ifa->ifa_broadcast)
1280                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1281
1282         if (ifa->ifa_label[0])
1283                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1284
1285         return nlmsg_end(skb, nlh);
1286
1287 nla_put_failure:
1288         nlmsg_cancel(skb, nlh);
1289         return -EMSGSIZE;
1290 }
1291
1292 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1293 {
1294         struct net *net = sock_net(skb->sk);
1295         int h, s_h;
1296         int idx, s_idx;
1297         int ip_idx, s_ip_idx;
1298         struct net_device *dev;
1299         struct in_device *in_dev;
1300         struct in_ifaddr *ifa;
1301         struct hlist_head *head;
1302         struct hlist_node *node;
1303
1304         s_h = cb->args[0];
1305         s_idx = idx = cb->args[1];
1306         s_ip_idx = ip_idx = cb->args[2];
1307
1308         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1309                 idx = 0;
1310                 head = &net->dev_index_head[h];
1311                 rcu_read_lock();
1312                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1313                         if (idx < s_idx)
1314                                 goto cont;
1315                         if (h > s_h || idx > s_idx)
1316                                 s_ip_idx = 0;
1317                         in_dev = __in_dev_get_rcu(dev);
1318                         if (!in_dev)
1319                                 goto cont;
1320
1321                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1322                              ifa = ifa->ifa_next, ip_idx++) {
1323                                 if (ip_idx < s_ip_idx)
1324                                         continue;
1325                                 if (inet_fill_ifaddr(skb, ifa,
1326                                              NETLINK_CB(cb->skb).pid,
1327                                              cb->nlh->nlmsg_seq,
1328                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1329                                         rcu_read_unlock();
1330                                         goto done;
1331                                 }
1332                         }
1333 cont:
1334                         idx++;
1335                 }
1336                 rcu_read_unlock();
1337         }
1338
1339 done:
1340         cb->args[0] = h;
1341         cb->args[1] = idx;
1342         cb->args[2] = ip_idx;
1343
1344         return skb->len;
1345 }
1346
1347 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1348                       u32 pid)
1349 {
1350         struct sk_buff *skb;
1351         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1352         int err = -ENOBUFS;
1353         struct net *net;
1354
1355         net = dev_net(ifa->ifa_dev->dev);
1356         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1357         if (skb == NULL)
1358                 goto errout;
1359
1360         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1361         if (err < 0) {
1362                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1363                 WARN_ON(err == -EMSGSIZE);
1364                 kfree_skb(skb);
1365                 goto errout;
1366         }
1367         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1368         return;
1369 errout:
1370         if (err < 0)
1371                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1372 }
1373
1374 static size_t inet_get_link_af_size(const struct net_device *dev)
1375 {
1376         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377
1378         if (!in_dev)
1379                 return 0;
1380
1381         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1382 }
1383
1384 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1385 {
1386         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1387         struct nlattr *nla;
1388         int i;
1389
1390         if (!in_dev)
1391                 return -ENODATA;
1392
1393         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1394         if (nla == NULL)
1395                 return -EMSGSIZE;
1396
1397         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1398                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1399
1400         return 0;
1401 }
1402
1403 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1404         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1405 };
1406
1407 static int inet_validate_link_af(const struct net_device *dev,
1408                                  const struct nlattr *nla)
1409 {
1410         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1411         int err, rem;
1412
1413         if (dev && !__in_dev_get_rtnl(dev))
1414                 return -EAFNOSUPPORT;
1415
1416         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1417         if (err < 0)
1418                 return err;
1419
1420         if (tb[IFLA_INET_CONF]) {
1421                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1422                         int cfgid = nla_type(a);
1423
1424                         if (nla_len(a) < 4)
1425                                 return -EINVAL;
1426
1427                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1428                                 return -EINVAL;
1429                 }
1430         }
1431
1432         return 0;
1433 }
1434
1435 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1436 {
1437         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1438         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1439         int rem;
1440
1441         if (!in_dev)
1442                 return -EAFNOSUPPORT;
1443
1444         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1445                 BUG();
1446
1447         if (tb[IFLA_INET_CONF]) {
1448                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1449                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1450         }
1451
1452         return 0;
1453 }
1454
1455 #ifdef CONFIG_SYSCTL
1456
1457 static void devinet_copy_dflt_conf(struct net *net, int i)
1458 {
1459         struct net_device *dev;
1460
1461         rcu_read_lock();
1462         for_each_netdev_rcu(net, dev) {
1463                 struct in_device *in_dev;
1464
1465                 in_dev = __in_dev_get_rcu(dev);
1466                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1467                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1468         }
1469         rcu_read_unlock();
1470 }
1471
1472 /* called with RTNL locked */
1473 static void inet_forward_change(struct net *net)
1474 {
1475         struct net_device *dev;
1476         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1477
1478         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1479         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1480
1481         for_each_netdev(net, dev) {
1482                 struct in_device *in_dev;
1483                 if (on)
1484                         dev_disable_lro(dev);
1485                 rcu_read_lock();
1486                 in_dev = __in_dev_get_rcu(dev);
1487                 if (in_dev)
1488                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1489                 rcu_read_unlock();
1490         }
1491 }
1492
1493 static int devinet_conf_proc(ctl_table *ctl, int write,
1494                              void __user *buffer,
1495                              size_t *lenp, loff_t *ppos)
1496 {
1497         int old_value = *(int *)ctl->data;
1498         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1499         int new_value = *(int *)ctl->data;
1500
1501         if (write) {
1502                 struct ipv4_devconf *cnf = ctl->extra1;
1503                 struct net *net = ctl->extra2;
1504                 int i = (int *)ctl->data - cnf->data;
1505
1506                 set_bit(i, cnf->state);
1507
1508                 if (cnf == net->ipv4.devconf_dflt)
1509                         devinet_copy_dflt_conf(net, i);
1510                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1511                         if ((new_value == 0) && (old_value != 0))
1512                                 rt_cache_flush(net, 0);
1513         }
1514
1515         return ret;
1516 }
1517
1518 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1519                                   void __user *buffer,
1520                                   size_t *lenp, loff_t *ppos)
1521 {
1522         int *valp = ctl->data;
1523         int val = *valp;
1524         loff_t pos = *ppos;
1525         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1526
1527         if (write && *valp != val) {
1528                 struct net *net = ctl->extra2;
1529
1530                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1531                         if (!rtnl_trylock()) {
1532                                 /* Restore the original values before restarting */
1533                                 *valp = val;
1534                                 *ppos = pos;
1535                                 return restart_syscall();
1536                         }
1537                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1538                                 inet_forward_change(net);
1539                         } else if (*valp) {
1540                                 struct ipv4_devconf *cnf = ctl->extra1;
1541                                 struct in_device *idev =
1542                                         container_of(cnf, struct in_device, cnf);
1543                                 dev_disable_lro(idev->dev);
1544                         }
1545                         rtnl_unlock();
1546                         rt_cache_flush(net, 0);
1547                 }
1548         }
1549
1550         return ret;
1551 }
1552
1553 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1554                                 void __user *buffer,
1555                                 size_t *lenp, loff_t *ppos)
1556 {
1557         int *valp = ctl->data;
1558         int val = *valp;
1559         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1560         struct net *net = ctl->extra2;
1561
1562         if (write && *valp != val)
1563                 rt_cache_flush(net, 0);
1564
1565         return ret;
1566 }
1567
1568 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1569         { \
1570                 .procname       = name, \
1571                 .data           = ipv4_devconf.data + \
1572                                   IPV4_DEVCONF_ ## attr - 1, \
1573                 .maxlen         = sizeof(int), \
1574                 .mode           = mval, \
1575                 .proc_handler   = proc, \
1576                 .extra1         = &ipv4_devconf, \
1577         }
1578
1579 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1580         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1581
1582 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1583         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1584
1585 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1586         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1587
1588 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1589         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1590
1591 static struct devinet_sysctl_table {
1592         struct ctl_table_header *sysctl_header;
1593         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1594         char *dev_name;
1595 } devinet_sysctl = {
1596         .devinet_vars = {
1597                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1598                                              devinet_sysctl_forward),
1599                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1600
1601                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1602                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1603                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1604                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1605                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1606                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1607                                         "accept_source_route"),
1608                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1609                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1610                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1611                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1612                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1613                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1614                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1615                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1616                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1617                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1618                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1619                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1620                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1621
1622                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1623                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1624                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1625                                               "force_igmp_version"),
1626                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1627                                               "promote_secondaries"),
1628         },
1629 };
1630
1631 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1632                                         struct ipv4_devconf *p)
1633 {
1634         int i;
1635         struct devinet_sysctl_table *t;
1636
1637 #define DEVINET_CTL_PATH_DEV    3
1638
1639         struct ctl_path devinet_ctl_path[] = {
1640                 { .procname = "net",  },
1641                 { .procname = "ipv4", },
1642                 { .procname = "conf", },
1643                 { /* to be set */ },
1644                 { },
1645         };
1646
1647         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1648         if (!t)
1649                 goto out;
1650
1651         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1652                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1653                 t->devinet_vars[i].extra1 = p;
1654                 t->devinet_vars[i].extra2 = net;
1655         }
1656
1657         /*
1658          * Make a copy of dev_name, because '.procname' is regarded as const
1659          * by sysctl and we wouldn't want anyone to change it under our feet
1660          * (see SIOCSIFNAME).
1661          */
1662         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1663         if (!t->dev_name)
1664                 goto free;
1665
1666         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1667
1668         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1669                         t->devinet_vars);
1670         if (!t->sysctl_header)
1671                 goto free_procname;
1672
1673         p->sysctl = t;
1674         return 0;
1675
1676 free_procname:
1677         kfree(t->dev_name);
1678 free:
1679         kfree(t);
1680 out:
1681         return -ENOBUFS;
1682 }
1683
1684 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1685 {
1686         struct devinet_sysctl_table *t = cnf->sysctl;
1687
1688         if (t == NULL)
1689                 return;
1690
1691         cnf->sysctl = NULL;
1692         unregister_net_sysctl_table(t->sysctl_header);
1693         kfree(t->dev_name);
1694         kfree(t);
1695 }
1696
1697 static void devinet_sysctl_register(struct in_device *idev)
1698 {
1699         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1700         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1701                                         &idev->cnf);
1702 }
1703
1704 static void devinet_sysctl_unregister(struct in_device *idev)
1705 {
1706         __devinet_sysctl_unregister(&idev->cnf);
1707         neigh_sysctl_unregister(idev->arp_parms);
1708 }
1709
1710 static struct ctl_table ctl_forward_entry[] = {
1711         {
1712                 .procname       = "ip_forward",
1713                 .data           = &ipv4_devconf.data[
1714                                         IPV4_DEVCONF_FORWARDING - 1],
1715                 .maxlen         = sizeof(int),
1716                 .mode           = 0644,
1717                 .proc_handler   = devinet_sysctl_forward,
1718                 .extra1         = &ipv4_devconf,
1719                 .extra2         = &init_net,
1720         },
1721         { },
1722 };
1723
1724 static __net_initdata struct ctl_path net_ipv4_path[] = {
1725         { .procname = "net", },
1726         { .procname = "ipv4", },
1727         { },
1728 };
1729 #endif
1730
1731 static __net_init int devinet_init_net(struct net *net)
1732 {
1733         int err;
1734         struct ipv4_devconf *all, *dflt;
1735 #ifdef CONFIG_SYSCTL
1736         struct ctl_table *tbl = ctl_forward_entry;
1737         struct ctl_table_header *forw_hdr;
1738 #endif
1739
1740         err = -ENOMEM;
1741         all = &ipv4_devconf;
1742         dflt = &ipv4_devconf_dflt;
1743
1744         if (!net_eq(net, &init_net)) {
1745                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1746                 if (all == NULL)
1747                         goto err_alloc_all;
1748
1749                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1750                 if (dflt == NULL)
1751                         goto err_alloc_dflt;
1752
1753 #ifdef CONFIG_SYSCTL
1754                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1755                 if (tbl == NULL)
1756                         goto err_alloc_ctl;
1757
1758                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1759                 tbl[0].extra1 = all;
1760                 tbl[0].extra2 = net;
1761 #endif
1762         }
1763
1764 #ifdef CONFIG_SYSCTL
1765         err = __devinet_sysctl_register(net, "all", all);
1766         if (err < 0)
1767                 goto err_reg_all;
1768
1769         err = __devinet_sysctl_register(net, "default", dflt);
1770         if (err < 0)
1771                 goto err_reg_dflt;
1772
1773         err = -ENOMEM;
1774         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1775         if (forw_hdr == NULL)
1776                 goto err_reg_ctl;
1777         net->ipv4.forw_hdr = forw_hdr;
1778 #endif
1779
1780         net->ipv4.devconf_all = all;
1781         net->ipv4.devconf_dflt = dflt;
1782         return 0;
1783
1784 #ifdef CONFIG_SYSCTL
1785 err_reg_ctl:
1786         __devinet_sysctl_unregister(dflt);
1787 err_reg_dflt:
1788         __devinet_sysctl_unregister(all);
1789 err_reg_all:
1790         if (tbl != ctl_forward_entry)
1791                 kfree(tbl);
1792 err_alloc_ctl:
1793 #endif
1794         if (dflt != &ipv4_devconf_dflt)
1795                 kfree(dflt);
1796 err_alloc_dflt:
1797         if (all != &ipv4_devconf)
1798                 kfree(all);
1799 err_alloc_all:
1800         return err;
1801 }
1802
1803 static __net_exit void devinet_exit_net(struct net *net)
1804 {
1805 #ifdef CONFIG_SYSCTL
1806         struct ctl_table *tbl;
1807
1808         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1809         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1810         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1811         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1812         kfree(tbl);
1813 #endif
1814         kfree(net->ipv4.devconf_dflt);
1815         kfree(net->ipv4.devconf_all);
1816 }
1817
1818 static __net_initdata struct pernet_operations devinet_ops = {
1819         .init = devinet_init_net,
1820         .exit = devinet_exit_net,
1821 };
1822
1823 static struct rtnl_af_ops inet_af_ops = {
1824         .family           = AF_INET,
1825         .fill_link_af     = inet_fill_link_af,
1826         .get_link_af_size = inet_get_link_af_size,
1827         .validate_link_af = inet_validate_link_af,
1828         .set_link_af      = inet_set_link_af,
1829 };
1830
1831 void __init devinet_init(void)
1832 {
1833         int i;
1834
1835         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1836                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1837
1838         register_pernet_subsys(&devinet_ops);
1839
1840         register_gifconf(PF_INET, inet_gifconf);
1841         register_netdevice_notifier(&ip_netdev_notifier);
1842
1843         rtnl_af_register(&inet_af_ops);
1844
1845         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1846         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1847         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1848 }
1849