net: ipv4 and ipv6: Convert printk(KERN_DEBUG to pr_debug
[pandora-kernel.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 #include "fib_lookup.h"
67
68 static struct ipv4_devconf ipv4_devconf = {
69         .data = {
70                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74         },
75 };
76
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78         .data = {
79                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84         },
85 };
86
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91         [IFA_LOCAL]             = { .type = NLA_U32 },
92         [IFA_ADDRESS]           = { .type = NLA_U32 },
93         [IFA_BROADCAST]         = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
98  * value.  So if you change this define, make appropriate changes to
99  * inet_addr_hash as well.
100  */
101 #define IN4_ADDR_HSIZE  256
102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
103 static DEFINE_SPINLOCK(inet_addr_hash_lock);
104
105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
106 {
107         u32 val = (__force u32) addr ^ hash_ptr(net, 8);
108
109         return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
110                 (IN4_ADDR_HSIZE - 1));
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         unsigned int hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142         struct hlist_node *node;
143
144         rcu_read_lock();
145         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146                 struct net_device *dev = ifa->ifa_dev->dev;
147
148                 if (!net_eq(dev_net(dev), net))
149                         continue;
150                 if (ifa->ifa_local == addr) {
151                         result = dev;
152                         break;
153                 }
154         }
155         if (!result) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         }
169         if (result && devref)
170                 dev_hold(result);
171         rcu_read_unlock();
172         return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180                          int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static inline void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static inline void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203         if (ifa->ifa_dev)
204                 in_dev_put(ifa->ifa_dev);
205         kfree(ifa);
206 }
207
208 static inline void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215         struct net_device *dev = idev->dev;
216
217         WARN_ON(idev->ifa_list);
218         WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222         dev_put(dev);
223         if (!idev->dead)
224                 pr_err("Freeing alive in_device %p\n", idev);
225         else
226                 kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232         struct in_device *in_dev;
233
234         ASSERT_RTNL();
235
236         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237         if (!in_dev)
238                 goto out;
239         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240                         sizeof(in_dev->cnf));
241         in_dev->cnf.sysctl = NULL;
242         in_dev->dev = dev;
243         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244         if (!in_dev->arp_parms)
245                 goto out_kfree;
246         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247                 dev_disable_lro(dev);
248         /* Reference in_dev->dev */
249         dev_hold(dev);
250         /* Account for reference dev->ip_ptr (below) */
251         in_dev_hold(in_dev);
252
253         devinet_sysctl_register(in_dev);
254         ip_mc_init_dev(in_dev);
255         if (dev->flags & IFF_UP)
256                 ip_mc_up(in_dev);
257
258         /* we can receive as soon as ip_ptr is set -- do this last */
259         rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261         return in_dev;
262 out_kfree:
263         kfree(in_dev);
264         in_dev = NULL;
265         goto out;
266 }
267
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270         struct in_device *idev = container_of(head, struct in_device, rcu_head);
271         in_dev_put(idev);
272 }
273
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276         struct in_ifaddr *ifa;
277         struct net_device *dev;
278
279         ASSERT_RTNL();
280
281         dev = in_dev->dev;
282
283         in_dev->dead = 1;
284
285         ip_mc_destroy_dev(in_dev);
286
287         while ((ifa = in_dev->ifa_list) != NULL) {
288                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289                 inet_free_ifa(ifa);
290         }
291
292         RCU_INIT_POINTER(dev->ip_ptr, NULL);
293
294         devinet_sysctl_unregister(in_dev);
295         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296         arp_ifdown(dev);
297
298         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303         rcu_read_lock();
304         for_primary_ifa(in_dev) {
305                 if (inet_ifa_match(a, ifa)) {
306                         if (!b || inet_ifa_match(b, ifa)) {
307                                 rcu_read_unlock();
308                                 return 1;
309                         }
310                 }
311         } endfor_ifa(in_dev);
312         rcu_read_unlock();
313         return 0;
314 }
315
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317                          int destroy, struct nlmsghdr *nlh, u32 pid)
318 {
319         struct in_ifaddr *promote = NULL;
320         struct in_ifaddr *ifa, *ifa1 = *ifap;
321         struct in_ifaddr *last_prim = in_dev->ifa_list;
322         struct in_ifaddr *prev_prom = NULL;
323         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324
325         ASSERT_RTNL();
326
327         /* 1. Deleting primary ifaddr forces deletion all secondaries
328          * unless alias promotion is set
329          **/
330
331         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333
334                 while ((ifa = *ifap1) != NULL) {
335                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336                             ifa1->ifa_scope <= ifa->ifa_scope)
337                                 last_prim = ifa;
338
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340                             ifa1->ifa_mask != ifa->ifa_mask ||
341                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
342                                 ifap1 = &ifa->ifa_next;
343                                 prev_prom = ifa;
344                                 continue;
345                         }
346
347                         if (!do_promote) {
348                                 inet_hash_remove(ifa);
349                                 *ifap1 = ifa->ifa_next;
350
351                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
352                                 blocking_notifier_call_chain(&inetaddr_chain,
353                                                 NETDEV_DOWN, ifa);
354                                 inet_free_ifa(ifa);
355                         } else {
356                                 promote = ifa;
357                                 break;
358                         }
359                 }
360         }
361
362         /* On promotion all secondaries from subnet are changing
363          * the primary IP, we must remove all their routes silently
364          * and later to add them back with new prefsrc. Do this
365          * while all addresses are on the device list.
366          */
367         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368                 if (ifa1->ifa_mask == ifa->ifa_mask &&
369                     inet_ifa_match(ifa1->ifa_address, ifa))
370                         fib_del_ifaddr(ifa, ifa1);
371         }
372
373         /* 2. Unlink it */
374
375         *ifap = ifa1->ifa_next;
376         inet_hash_remove(ifa1);
377
378         /* 3. Announce address deletion */
379
380         /* Send message first, then call notifier.
381            At first sight, FIB update triggered by notifier
382            will refer to already deleted ifaddr, that could confuse
383            netlink listeners. It is not true: look, gated sees
384            that route deleted and if it still thinks that ifaddr
385            is valid, it will try to restore deleted routes... Grr.
386            So that, this order is correct.
387          */
388         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
389         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390
391         if (promote) {
392                 struct in_ifaddr *next_sec = promote->ifa_next;
393
394                 if (prev_prom) {
395                         prev_prom->ifa_next = promote->ifa_next;
396                         promote->ifa_next = last_prim->ifa_next;
397                         last_prim->ifa_next = promote;
398                 }
399
400                 promote->ifa_flags &= ~IFA_F_SECONDARY;
401                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
402                 blocking_notifier_call_chain(&inetaddr_chain,
403                                 NETDEV_UP, promote);
404                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405                         if (ifa1->ifa_mask != ifa->ifa_mask ||
406                             !inet_ifa_match(ifa1->ifa_address, ifa))
407                                         continue;
408                         fib_add_ifaddr(ifa);
409                 }
410
411         }
412         if (destroy)
413                 inet_free_ifa(ifa1);
414 }
415
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417                          int destroy)
418 {
419         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421
422 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
423                              u32 pid)
424 {
425         struct in_device *in_dev = ifa->ifa_dev;
426         struct in_ifaddr *ifa1, **ifap, **last_primary;
427
428         ASSERT_RTNL();
429
430         if (!ifa->ifa_local) {
431                 inet_free_ifa(ifa);
432                 return 0;
433         }
434
435         ifa->ifa_flags &= ~IFA_F_SECONDARY;
436         last_primary = &in_dev->ifa_list;
437
438         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
439              ifap = &ifa1->ifa_next) {
440                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
441                     ifa->ifa_scope <= ifa1->ifa_scope)
442                         last_primary = &ifa1->ifa_next;
443                 if (ifa1->ifa_mask == ifa->ifa_mask &&
444                     inet_ifa_match(ifa1->ifa_address, ifa)) {
445                         if (ifa1->ifa_local == ifa->ifa_local) {
446                                 inet_free_ifa(ifa);
447                                 return -EEXIST;
448                         }
449                         if (ifa1->ifa_scope != ifa->ifa_scope) {
450                                 inet_free_ifa(ifa);
451                                 return -EINVAL;
452                         }
453                         ifa->ifa_flags |= IFA_F_SECONDARY;
454                 }
455         }
456
457         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
458                 net_srandom(ifa->ifa_local);
459                 ifap = last_primary;
460         }
461
462         ifa->ifa_next = *ifap;
463         *ifap = ifa;
464
465         inet_hash_insert(dev_net(in_dev->dev), ifa);
466
467         /* Send message first, then call notifier.
468            Notifier will trigger FIB update, so that
469            listeners of netlink will know about new ifaddr */
470         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
471         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
472
473         return 0;
474 }
475
476 static int inet_insert_ifa(struct in_ifaddr *ifa)
477 {
478         return __inet_insert_ifa(ifa, NULL, 0);
479 }
480
481 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
482 {
483         struct in_device *in_dev = __in_dev_get_rtnl(dev);
484
485         ASSERT_RTNL();
486
487         if (!in_dev) {
488                 inet_free_ifa(ifa);
489                 return -ENOBUFS;
490         }
491         ipv4_devconf_setall(in_dev);
492         if (ifa->ifa_dev != in_dev) {
493                 WARN_ON(ifa->ifa_dev);
494                 in_dev_hold(in_dev);
495                 ifa->ifa_dev = in_dev;
496         }
497         if (ipv4_is_loopback(ifa->ifa_local))
498                 ifa->ifa_scope = RT_SCOPE_HOST;
499         return inet_insert_ifa(ifa);
500 }
501
502 /* Caller must hold RCU or RTNL :
503  * We dont take a reference on found in_device
504  */
505 struct in_device *inetdev_by_index(struct net *net, int ifindex)
506 {
507         struct net_device *dev;
508         struct in_device *in_dev = NULL;
509
510         rcu_read_lock();
511         dev = dev_get_by_index_rcu(net, ifindex);
512         if (dev)
513                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
514         rcu_read_unlock();
515         return in_dev;
516 }
517 EXPORT_SYMBOL(inetdev_by_index);
518
519 /* Called only from RTNL semaphored context. No locks. */
520
521 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
522                                     __be32 mask)
523 {
524         ASSERT_RTNL();
525
526         for_primary_ifa(in_dev) {
527                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
528                         return ifa;
529         } endfor_ifa(in_dev);
530         return NULL;
531 }
532
533 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
534 {
535         struct net *net = sock_net(skb->sk);
536         struct nlattr *tb[IFA_MAX+1];
537         struct in_device *in_dev;
538         struct ifaddrmsg *ifm;
539         struct in_ifaddr *ifa, **ifap;
540         int err = -EINVAL;
541
542         ASSERT_RTNL();
543
544         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
545         if (err < 0)
546                 goto errout;
547
548         ifm = nlmsg_data(nlh);
549         in_dev = inetdev_by_index(net, ifm->ifa_index);
550         if (in_dev == NULL) {
551                 err = -ENODEV;
552                 goto errout;
553         }
554
555         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
556              ifap = &ifa->ifa_next) {
557                 if (tb[IFA_LOCAL] &&
558                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
559                         continue;
560
561                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
562                         continue;
563
564                 if (tb[IFA_ADDRESS] &&
565                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
566                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
567                         continue;
568
569                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
570                 return 0;
571         }
572
573         err = -EADDRNOTAVAIL;
574 errout:
575         return err;
576 }
577
578 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
579 {
580         struct nlattr *tb[IFA_MAX+1];
581         struct in_ifaddr *ifa;
582         struct ifaddrmsg *ifm;
583         struct net_device *dev;
584         struct in_device *in_dev;
585         int err;
586
587         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
588         if (err < 0)
589                 goto errout;
590
591         ifm = nlmsg_data(nlh);
592         err = -EINVAL;
593         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
594                 goto errout;
595
596         dev = __dev_get_by_index(net, ifm->ifa_index);
597         err = -ENODEV;
598         if (dev == NULL)
599                 goto errout;
600
601         in_dev = __in_dev_get_rtnl(dev);
602         err = -ENOBUFS;
603         if (in_dev == NULL)
604                 goto errout;
605
606         ifa = inet_alloc_ifa();
607         if (ifa == NULL)
608                 /*
609                  * A potential indev allocation can be left alive, it stays
610                  * assigned to its device and is destroy with it.
611                  */
612                 goto errout;
613
614         ipv4_devconf_setall(in_dev);
615         in_dev_hold(in_dev);
616
617         if (tb[IFA_ADDRESS] == NULL)
618                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
619
620         INIT_HLIST_NODE(&ifa->hash);
621         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
622         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
623         ifa->ifa_flags = ifm->ifa_flags;
624         ifa->ifa_scope = ifm->ifa_scope;
625         ifa->ifa_dev = in_dev;
626
627         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
628         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
629
630         if (tb[IFA_BROADCAST])
631                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
632
633         if (tb[IFA_LABEL])
634                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
635         else
636                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
637
638         return ifa;
639
640 errout:
641         return ERR_PTR(err);
642 }
643
644 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
645 {
646         struct net *net = sock_net(skb->sk);
647         struct in_ifaddr *ifa;
648
649         ASSERT_RTNL();
650
651         ifa = rtm_to_ifaddr(net, nlh);
652         if (IS_ERR(ifa))
653                 return PTR_ERR(ifa);
654
655         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
656 }
657
658 /*
659  *      Determine a default network mask, based on the IP address.
660  */
661
662 static inline int inet_abc_len(__be32 addr)
663 {
664         int rc = -1;    /* Something else, probably a multicast. */
665
666         if (ipv4_is_zeronet(addr))
667                 rc = 0;
668         else {
669                 __u32 haddr = ntohl(addr);
670
671                 if (IN_CLASSA(haddr))
672                         rc = 8;
673                 else if (IN_CLASSB(haddr))
674                         rc = 16;
675                 else if (IN_CLASSC(haddr))
676                         rc = 24;
677         }
678
679         return rc;
680 }
681
682
683 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
684 {
685         struct ifreq ifr;
686         struct sockaddr_in sin_orig;
687         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
688         struct in_device *in_dev;
689         struct in_ifaddr **ifap = NULL;
690         struct in_ifaddr *ifa = NULL;
691         struct net_device *dev;
692         char *colon;
693         int ret = -EFAULT;
694         int tryaddrmatch = 0;
695
696         /*
697          *      Fetch the caller's info block into kernel space
698          */
699
700         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
701                 goto out;
702         ifr.ifr_name[IFNAMSIZ - 1] = 0;
703
704         /* save original address for comparison */
705         memcpy(&sin_orig, sin, sizeof(*sin));
706
707         colon = strchr(ifr.ifr_name, ':');
708         if (colon)
709                 *colon = 0;
710
711         dev_load(net, ifr.ifr_name);
712
713         switch (cmd) {
714         case SIOCGIFADDR:       /* Get interface address */
715         case SIOCGIFBRDADDR:    /* Get the broadcast address */
716         case SIOCGIFDSTADDR:    /* Get the destination address */
717         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
718                 /* Note that these ioctls will not sleep,
719                    so that we do not impose a lock.
720                    One day we will be forced to put shlock here (I mean SMP)
721                  */
722                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
723                 memset(sin, 0, sizeof(*sin));
724                 sin->sin_family = AF_INET;
725                 break;
726
727         case SIOCSIFFLAGS:
728                 ret = -EACCES;
729                 if (!capable(CAP_NET_ADMIN))
730                         goto out;
731                 break;
732         case SIOCSIFADDR:       /* Set interface address (and family) */
733         case SIOCSIFBRDADDR:    /* Set the broadcast address */
734         case SIOCSIFDSTADDR:    /* Set the destination address */
735         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
736                 ret = -EACCES;
737                 if (!capable(CAP_NET_ADMIN))
738                         goto out;
739                 ret = -EINVAL;
740                 if (sin->sin_family != AF_INET)
741                         goto out;
742                 break;
743         default:
744                 ret = -EINVAL;
745                 goto out;
746         }
747
748         rtnl_lock();
749
750         ret = -ENODEV;
751         dev = __dev_get_by_name(net, ifr.ifr_name);
752         if (!dev)
753                 goto done;
754
755         if (colon)
756                 *colon = ':';
757
758         in_dev = __in_dev_get_rtnl(dev);
759         if (in_dev) {
760                 if (tryaddrmatch) {
761                         /* Matthias Andree */
762                         /* compare label and address (4.4BSD style) */
763                         /* note: we only do this for a limited set of ioctls
764                            and only if the original address family was AF_INET.
765                            This is checked above. */
766                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
767                              ifap = &ifa->ifa_next) {
768                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
769                                     sin_orig.sin_addr.s_addr ==
770                                                         ifa->ifa_local) {
771                                         break; /* found */
772                                 }
773                         }
774                 }
775                 /* we didn't get a match, maybe the application is
776                    4.3BSD-style and passed in junk so we fall back to
777                    comparing just the label */
778                 if (!ifa) {
779                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
780                              ifap = &ifa->ifa_next)
781                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
782                                         break;
783                 }
784         }
785
786         ret = -EADDRNOTAVAIL;
787         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
788                 goto done;
789
790         switch (cmd) {
791         case SIOCGIFADDR:       /* Get interface address */
792                 sin->sin_addr.s_addr = ifa->ifa_local;
793                 goto rarok;
794
795         case SIOCGIFBRDADDR:    /* Get the broadcast address */
796                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
797                 goto rarok;
798
799         case SIOCGIFDSTADDR:    /* Get the destination address */
800                 sin->sin_addr.s_addr = ifa->ifa_address;
801                 goto rarok;
802
803         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
804                 sin->sin_addr.s_addr = ifa->ifa_mask;
805                 goto rarok;
806
807         case SIOCSIFFLAGS:
808                 if (colon) {
809                         ret = -EADDRNOTAVAIL;
810                         if (!ifa)
811                                 break;
812                         ret = 0;
813                         if (!(ifr.ifr_flags & IFF_UP))
814                                 inet_del_ifa(in_dev, ifap, 1);
815                         break;
816                 }
817                 ret = dev_change_flags(dev, ifr.ifr_flags);
818                 break;
819
820         case SIOCSIFADDR:       /* Set interface address (and family) */
821                 ret = -EINVAL;
822                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
823                         break;
824
825                 if (!ifa) {
826                         ret = -ENOBUFS;
827                         ifa = inet_alloc_ifa();
828                         INIT_HLIST_NODE(&ifa->hash);
829                         if (!ifa)
830                                 break;
831                         if (colon)
832                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
833                         else
834                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
835                 } else {
836                         ret = 0;
837                         if (ifa->ifa_local == sin->sin_addr.s_addr)
838                                 break;
839                         inet_del_ifa(in_dev, ifap, 0);
840                         ifa->ifa_broadcast = 0;
841                         ifa->ifa_scope = 0;
842                 }
843
844                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
845
846                 if (!(dev->flags & IFF_POINTOPOINT)) {
847                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
848                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
849                         if ((dev->flags & IFF_BROADCAST) &&
850                             ifa->ifa_prefixlen < 31)
851                                 ifa->ifa_broadcast = ifa->ifa_address |
852                                                      ~ifa->ifa_mask;
853                 } else {
854                         ifa->ifa_prefixlen = 32;
855                         ifa->ifa_mask = inet_make_mask(32);
856                 }
857                 ret = inet_set_ifa(dev, ifa);
858                 break;
859
860         case SIOCSIFBRDADDR:    /* Set the broadcast address */
861                 ret = 0;
862                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
863                         inet_del_ifa(in_dev, ifap, 0);
864                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
865                         inet_insert_ifa(ifa);
866                 }
867                 break;
868
869         case SIOCSIFDSTADDR:    /* Set the destination address */
870                 ret = 0;
871                 if (ifa->ifa_address == sin->sin_addr.s_addr)
872                         break;
873                 ret = -EINVAL;
874                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
875                         break;
876                 ret = 0;
877                 inet_del_ifa(in_dev, ifap, 0);
878                 ifa->ifa_address = sin->sin_addr.s_addr;
879                 inet_insert_ifa(ifa);
880                 break;
881
882         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
883
884                 /*
885                  *      The mask we set must be legal.
886                  */
887                 ret = -EINVAL;
888                 if (bad_mask(sin->sin_addr.s_addr, 0))
889                         break;
890                 ret = 0;
891                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
892                         __be32 old_mask = ifa->ifa_mask;
893                         inet_del_ifa(in_dev, ifap, 0);
894                         ifa->ifa_mask = sin->sin_addr.s_addr;
895                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
896
897                         /* See if current broadcast address matches
898                          * with current netmask, then recalculate
899                          * the broadcast address. Otherwise it's a
900                          * funny address, so don't touch it since
901                          * the user seems to know what (s)he's doing...
902                          */
903                         if ((dev->flags & IFF_BROADCAST) &&
904                             (ifa->ifa_prefixlen < 31) &&
905                             (ifa->ifa_broadcast ==
906                              (ifa->ifa_local|~old_mask))) {
907                                 ifa->ifa_broadcast = (ifa->ifa_local |
908                                                       ~sin->sin_addr.s_addr);
909                         }
910                         inet_insert_ifa(ifa);
911                 }
912                 break;
913         }
914 done:
915         rtnl_unlock();
916 out:
917         return ret;
918 rarok:
919         rtnl_unlock();
920         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
921         goto out;
922 }
923
924 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
925 {
926         struct in_device *in_dev = __in_dev_get_rtnl(dev);
927         struct in_ifaddr *ifa;
928         struct ifreq ifr;
929         int done = 0;
930
931         if (!in_dev)
932                 goto out;
933
934         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
935                 if (!buf) {
936                         done += sizeof(ifr);
937                         continue;
938                 }
939                 if (len < (int) sizeof(ifr))
940                         break;
941                 memset(&ifr, 0, sizeof(struct ifreq));
942                 if (ifa->ifa_label)
943                         strcpy(ifr.ifr_name, ifa->ifa_label);
944                 else
945                         strcpy(ifr.ifr_name, dev->name);
946
947                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
948                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
949                                                                 ifa->ifa_local;
950
951                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
952                         done = -EFAULT;
953                         break;
954                 }
955                 buf  += sizeof(struct ifreq);
956                 len  -= sizeof(struct ifreq);
957                 done += sizeof(struct ifreq);
958         }
959 out:
960         return done;
961 }
962
963 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
964 {
965         __be32 addr = 0;
966         struct in_device *in_dev;
967         struct net *net = dev_net(dev);
968
969         rcu_read_lock();
970         in_dev = __in_dev_get_rcu(dev);
971         if (!in_dev)
972                 goto no_in_dev;
973
974         for_primary_ifa(in_dev) {
975                 if (ifa->ifa_scope > scope)
976                         continue;
977                 if (!dst || inet_ifa_match(dst, ifa)) {
978                         addr = ifa->ifa_local;
979                         break;
980                 }
981                 if (!addr)
982                         addr = ifa->ifa_local;
983         } endfor_ifa(in_dev);
984
985         if (addr)
986                 goto out_unlock;
987 no_in_dev:
988
989         /* Not loopback addresses on loopback should be preferred
990            in this case. It is importnat that lo is the first interface
991            in dev_base list.
992          */
993         for_each_netdev_rcu(net, dev) {
994                 in_dev = __in_dev_get_rcu(dev);
995                 if (!in_dev)
996                         continue;
997
998                 for_primary_ifa(in_dev) {
999                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1000                             ifa->ifa_scope <= scope) {
1001                                 addr = ifa->ifa_local;
1002                                 goto out_unlock;
1003                         }
1004                 } endfor_ifa(in_dev);
1005         }
1006 out_unlock:
1007         rcu_read_unlock();
1008         return addr;
1009 }
1010 EXPORT_SYMBOL(inet_select_addr);
1011
1012 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1013                               __be32 local, int scope)
1014 {
1015         int same = 0;
1016         __be32 addr = 0;
1017
1018         for_ifa(in_dev) {
1019                 if (!addr &&
1020                     (local == ifa->ifa_local || !local) &&
1021                     ifa->ifa_scope <= scope) {
1022                         addr = ifa->ifa_local;
1023                         if (same)
1024                                 break;
1025                 }
1026                 if (!same) {
1027                         same = (!local || inet_ifa_match(local, ifa)) &&
1028                                 (!dst || inet_ifa_match(dst, ifa));
1029                         if (same && addr) {
1030                                 if (local || !dst)
1031                                         break;
1032                                 /* Is the selected addr into dst subnet? */
1033                                 if (inet_ifa_match(addr, ifa))
1034                                         break;
1035                                 /* No, then can we use new local src? */
1036                                 if (ifa->ifa_scope <= scope) {
1037                                         addr = ifa->ifa_local;
1038                                         break;
1039                                 }
1040                                 /* search for large dst subnet for addr */
1041                                 same = 0;
1042                         }
1043                 }
1044         } endfor_ifa(in_dev);
1045
1046         return same ? addr : 0;
1047 }
1048
1049 /*
1050  * Confirm that local IP address exists using wildcards:
1051  * - in_dev: only on this interface, 0=any interface
1052  * - dst: only in the same subnet as dst, 0=any dst
1053  * - local: address, 0=autoselect the local address
1054  * - scope: maximum allowed scope value for the local address
1055  */
1056 __be32 inet_confirm_addr(struct in_device *in_dev,
1057                          __be32 dst, __be32 local, int scope)
1058 {
1059         __be32 addr = 0;
1060         struct net_device *dev;
1061         struct net *net;
1062
1063         if (scope != RT_SCOPE_LINK)
1064                 return confirm_addr_indev(in_dev, dst, local, scope);
1065
1066         net = dev_net(in_dev->dev);
1067         rcu_read_lock();
1068         for_each_netdev_rcu(net, dev) {
1069                 in_dev = __in_dev_get_rcu(dev);
1070                 if (in_dev) {
1071                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1072                         if (addr)
1073                                 break;
1074                 }
1075         }
1076         rcu_read_unlock();
1077
1078         return addr;
1079 }
1080 EXPORT_SYMBOL(inet_confirm_addr);
1081
1082 /*
1083  *      Device notifier
1084  */
1085
1086 int register_inetaddr_notifier(struct notifier_block *nb)
1087 {
1088         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1089 }
1090 EXPORT_SYMBOL(register_inetaddr_notifier);
1091
1092 int unregister_inetaddr_notifier(struct notifier_block *nb)
1093 {
1094         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1095 }
1096 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1097
1098 /* Rename ifa_labels for a device name change. Make some effort to preserve
1099  * existing alias numbering and to create unique labels if possible.
1100 */
1101 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1102 {
1103         struct in_ifaddr *ifa;
1104         int named = 0;
1105
1106         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1107                 char old[IFNAMSIZ], *dot;
1108
1109                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1110                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1111                 if (named++ == 0)
1112                         goto skip;
1113                 dot = strchr(old, ':');
1114                 if (dot == NULL) {
1115                         sprintf(old, ":%d", named);
1116                         dot = old;
1117                 }
1118                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1119                         strcat(ifa->ifa_label, dot);
1120                 else
1121                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1122 skip:
1123                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1124         }
1125 }
1126
1127 static inline bool inetdev_valid_mtu(unsigned int mtu)
1128 {
1129         return mtu >= 68;
1130 }
1131
1132 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1133                                         struct in_device *in_dev)
1134
1135 {
1136         struct in_ifaddr *ifa;
1137
1138         for (ifa = in_dev->ifa_list; ifa;
1139              ifa = ifa->ifa_next) {
1140                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1141                          ifa->ifa_local, dev,
1142                          ifa->ifa_local, NULL,
1143                          dev->dev_addr, NULL);
1144         }
1145 }
1146
1147 /* Called only under RTNL semaphore */
1148
1149 static int inetdev_event(struct notifier_block *this, unsigned long event,
1150                          void *ptr)
1151 {
1152         struct net_device *dev = ptr;
1153         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1154
1155         ASSERT_RTNL();
1156
1157         if (!in_dev) {
1158                 if (event == NETDEV_REGISTER) {
1159                         in_dev = inetdev_init(dev);
1160                         if (!in_dev)
1161                                 return notifier_from_errno(-ENOMEM);
1162                         if (dev->flags & IFF_LOOPBACK) {
1163                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1164                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1165                         }
1166                 } else if (event == NETDEV_CHANGEMTU) {
1167                         /* Re-enabling IP */
1168                         if (inetdev_valid_mtu(dev->mtu))
1169                                 in_dev = inetdev_init(dev);
1170                 }
1171                 goto out;
1172         }
1173
1174         switch (event) {
1175         case NETDEV_REGISTER:
1176                 pr_debug("%s: bug\n", __func__);
1177                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1178                 break;
1179         case NETDEV_UP:
1180                 if (!inetdev_valid_mtu(dev->mtu))
1181                         break;
1182                 if (dev->flags & IFF_LOOPBACK) {
1183                         struct in_ifaddr *ifa = inet_alloc_ifa();
1184
1185                         if (ifa) {
1186                                 INIT_HLIST_NODE(&ifa->hash);
1187                                 ifa->ifa_local =
1188                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1189                                 ifa->ifa_prefixlen = 8;
1190                                 ifa->ifa_mask = inet_make_mask(8);
1191                                 in_dev_hold(in_dev);
1192                                 ifa->ifa_dev = in_dev;
1193                                 ifa->ifa_scope = RT_SCOPE_HOST;
1194                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1195                                 inet_insert_ifa(ifa);
1196                         }
1197                 }
1198                 ip_mc_up(in_dev);
1199                 /* fall through */
1200         case NETDEV_CHANGEADDR:
1201                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1202                         break;
1203                 /* fall through */
1204         case NETDEV_NOTIFY_PEERS:
1205                 /* Send gratuitous ARP to notify of link change */
1206                 inetdev_send_gratuitous_arp(dev, in_dev);
1207                 break;
1208         case NETDEV_DOWN:
1209                 ip_mc_down(in_dev);
1210                 break;
1211         case NETDEV_PRE_TYPE_CHANGE:
1212                 ip_mc_unmap(in_dev);
1213                 break;
1214         case NETDEV_POST_TYPE_CHANGE:
1215                 ip_mc_remap(in_dev);
1216                 break;
1217         case NETDEV_CHANGEMTU:
1218                 if (inetdev_valid_mtu(dev->mtu))
1219                         break;
1220                 /* disable IP when MTU is not enough */
1221         case NETDEV_UNREGISTER:
1222                 inetdev_destroy(in_dev);
1223                 break;
1224         case NETDEV_CHANGENAME:
1225                 /* Do not notify about label change, this event is
1226                  * not interesting to applications using netlink.
1227                  */
1228                 inetdev_changename(dev, in_dev);
1229
1230                 devinet_sysctl_unregister(in_dev);
1231                 devinet_sysctl_register(in_dev);
1232                 break;
1233         }
1234 out:
1235         return NOTIFY_DONE;
1236 }
1237
1238 static struct notifier_block ip_netdev_notifier = {
1239         .notifier_call = inetdev_event,
1240 };
1241
1242 static inline size_t inet_nlmsg_size(void)
1243 {
1244         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1245                + nla_total_size(4) /* IFA_ADDRESS */
1246                + nla_total_size(4) /* IFA_LOCAL */
1247                + nla_total_size(4) /* IFA_BROADCAST */
1248                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1249 }
1250
1251 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1252                             u32 pid, u32 seq, int event, unsigned int flags)
1253 {
1254         struct ifaddrmsg *ifm;
1255         struct nlmsghdr  *nlh;
1256
1257         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1258         if (nlh == NULL)
1259                 return -EMSGSIZE;
1260
1261         ifm = nlmsg_data(nlh);
1262         ifm->ifa_family = AF_INET;
1263         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1264         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1265         ifm->ifa_scope = ifa->ifa_scope;
1266         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1267
1268         if ((ifa->ifa_address &&
1269              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1270             (ifa->ifa_local &&
1271              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1272             (ifa->ifa_broadcast &&
1273              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1274             (ifa->ifa_label[0] &&
1275              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1276                 goto nla_put_failure;
1277
1278         return nlmsg_end(skb, nlh);
1279
1280 nla_put_failure:
1281         nlmsg_cancel(skb, nlh);
1282         return -EMSGSIZE;
1283 }
1284
1285 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1286 {
1287         struct net *net = sock_net(skb->sk);
1288         int h, s_h;
1289         int idx, s_idx;
1290         int ip_idx, s_ip_idx;
1291         struct net_device *dev;
1292         struct in_device *in_dev;
1293         struct in_ifaddr *ifa;
1294         struct hlist_head *head;
1295         struct hlist_node *node;
1296
1297         s_h = cb->args[0];
1298         s_idx = idx = cb->args[1];
1299         s_ip_idx = ip_idx = cb->args[2];
1300
1301         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1302                 idx = 0;
1303                 head = &net->dev_index_head[h];
1304                 rcu_read_lock();
1305                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1306                         if (idx < s_idx)
1307                                 goto cont;
1308                         if (h > s_h || idx > s_idx)
1309                                 s_ip_idx = 0;
1310                         in_dev = __in_dev_get_rcu(dev);
1311                         if (!in_dev)
1312                                 goto cont;
1313
1314                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1315                              ifa = ifa->ifa_next, ip_idx++) {
1316                                 if (ip_idx < s_ip_idx)
1317                                         continue;
1318                                 if (inet_fill_ifaddr(skb, ifa,
1319                                              NETLINK_CB(cb->skb).pid,
1320                                              cb->nlh->nlmsg_seq,
1321                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1322                                         rcu_read_unlock();
1323                                         goto done;
1324                                 }
1325                         }
1326 cont:
1327                         idx++;
1328                 }
1329                 rcu_read_unlock();
1330         }
1331
1332 done:
1333         cb->args[0] = h;
1334         cb->args[1] = idx;
1335         cb->args[2] = ip_idx;
1336
1337         return skb->len;
1338 }
1339
1340 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1341                       u32 pid)
1342 {
1343         struct sk_buff *skb;
1344         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1345         int err = -ENOBUFS;
1346         struct net *net;
1347
1348         net = dev_net(ifa->ifa_dev->dev);
1349         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1350         if (skb == NULL)
1351                 goto errout;
1352
1353         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1354         if (err < 0) {
1355                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1356                 WARN_ON(err == -EMSGSIZE);
1357                 kfree_skb(skb);
1358                 goto errout;
1359         }
1360         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1361         return;
1362 errout:
1363         if (err < 0)
1364                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1365 }
1366
1367 static size_t inet_get_link_af_size(const struct net_device *dev)
1368 {
1369         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1370
1371         if (!in_dev)
1372                 return 0;
1373
1374         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1375 }
1376
1377 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1378 {
1379         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1380         struct nlattr *nla;
1381         int i;
1382
1383         if (!in_dev)
1384                 return -ENODATA;
1385
1386         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1387         if (nla == NULL)
1388                 return -EMSGSIZE;
1389
1390         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1391                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1392
1393         return 0;
1394 }
1395
1396 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1397         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1398 };
1399
1400 static int inet_validate_link_af(const struct net_device *dev,
1401                                  const struct nlattr *nla)
1402 {
1403         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1404         int err, rem;
1405
1406         if (dev && !__in_dev_get_rtnl(dev))
1407                 return -EAFNOSUPPORT;
1408
1409         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1410         if (err < 0)
1411                 return err;
1412
1413         if (tb[IFLA_INET_CONF]) {
1414                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1415                         int cfgid = nla_type(a);
1416
1417                         if (nla_len(a) < 4)
1418                                 return -EINVAL;
1419
1420                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1421                                 return -EINVAL;
1422                 }
1423         }
1424
1425         return 0;
1426 }
1427
1428 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1429 {
1430         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1431         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1432         int rem;
1433
1434         if (!in_dev)
1435                 return -EAFNOSUPPORT;
1436
1437         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1438                 BUG();
1439
1440         if (tb[IFLA_INET_CONF]) {
1441                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1442                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1443         }
1444
1445         return 0;
1446 }
1447
1448 #ifdef CONFIG_SYSCTL
1449
1450 static void devinet_copy_dflt_conf(struct net *net, int i)
1451 {
1452         struct net_device *dev;
1453
1454         rcu_read_lock();
1455         for_each_netdev_rcu(net, dev) {
1456                 struct in_device *in_dev;
1457
1458                 in_dev = __in_dev_get_rcu(dev);
1459                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1460                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1461         }
1462         rcu_read_unlock();
1463 }
1464
1465 /* called with RTNL locked */
1466 static void inet_forward_change(struct net *net)
1467 {
1468         struct net_device *dev;
1469         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1470
1471         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1472         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1473
1474         for_each_netdev(net, dev) {
1475                 struct in_device *in_dev;
1476                 if (on)
1477                         dev_disable_lro(dev);
1478                 rcu_read_lock();
1479                 in_dev = __in_dev_get_rcu(dev);
1480                 if (in_dev)
1481                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1482                 rcu_read_unlock();
1483         }
1484 }
1485
1486 static int devinet_conf_proc(ctl_table *ctl, int write,
1487                              void __user *buffer,
1488                              size_t *lenp, loff_t *ppos)
1489 {
1490         int old_value = *(int *)ctl->data;
1491         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1492         int new_value = *(int *)ctl->data;
1493
1494         if (write) {
1495                 struct ipv4_devconf *cnf = ctl->extra1;
1496                 struct net *net = ctl->extra2;
1497                 int i = (int *)ctl->data - cnf->data;
1498
1499                 set_bit(i, cnf->state);
1500
1501                 if (cnf == net->ipv4.devconf_dflt)
1502                         devinet_copy_dflt_conf(net, i);
1503                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1504                         if ((new_value == 0) && (old_value != 0))
1505                                 rt_cache_flush(net, 0);
1506         }
1507
1508         return ret;
1509 }
1510
1511 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1512                                   void __user *buffer,
1513                                   size_t *lenp, loff_t *ppos)
1514 {
1515         int *valp = ctl->data;
1516         int val = *valp;
1517         loff_t pos = *ppos;
1518         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1519
1520         if (write && *valp != val) {
1521                 struct net *net = ctl->extra2;
1522
1523                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1524                         if (!rtnl_trylock()) {
1525                                 /* Restore the original values before restarting */
1526                                 *valp = val;
1527                                 *ppos = pos;
1528                                 return restart_syscall();
1529                         }
1530                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1531                                 inet_forward_change(net);
1532                         } else if (*valp) {
1533                                 struct ipv4_devconf *cnf = ctl->extra1;
1534                                 struct in_device *idev =
1535                                         container_of(cnf, struct in_device, cnf);
1536                                 dev_disable_lro(idev->dev);
1537                         }
1538                         rtnl_unlock();
1539                         rt_cache_flush(net, 0);
1540                 }
1541         }
1542
1543         return ret;
1544 }
1545
1546 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1547                                 void __user *buffer,
1548                                 size_t *lenp, loff_t *ppos)
1549 {
1550         int *valp = ctl->data;
1551         int val = *valp;
1552         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1553         struct net *net = ctl->extra2;
1554
1555         if (write && *valp != val)
1556                 rt_cache_flush(net, 0);
1557
1558         return ret;
1559 }
1560
1561 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1562         { \
1563                 .procname       = name, \
1564                 .data           = ipv4_devconf.data + \
1565                                   IPV4_DEVCONF_ ## attr - 1, \
1566                 .maxlen         = sizeof(int), \
1567                 .mode           = mval, \
1568                 .proc_handler   = proc, \
1569                 .extra1         = &ipv4_devconf, \
1570         }
1571
1572 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1573         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1574
1575 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1576         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1577
1578 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1579         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1580
1581 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1582         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1583
1584 static struct devinet_sysctl_table {
1585         struct ctl_table_header *sysctl_header;
1586         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1587 } devinet_sysctl = {
1588         .devinet_vars = {
1589                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1590                                              devinet_sysctl_forward),
1591                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1592
1593                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1594                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1595                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1596                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1597                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1598                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1599                                         "accept_source_route"),
1600                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1601                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1602                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1603                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1604                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1605                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1606                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1607                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1608                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1609                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1610                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1611                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1612                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1613
1614                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1615                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1616                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1617                                               "force_igmp_version"),
1618                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1619                                               "promote_secondaries"),
1620         },
1621 };
1622
1623 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624                                         struct ipv4_devconf *p)
1625 {
1626         int i;
1627         struct devinet_sysctl_table *t;
1628         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1629
1630         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631         if (!t)
1632                 goto out;
1633
1634         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636                 t->devinet_vars[i].extra1 = p;
1637                 t->devinet_vars[i].extra2 = net;
1638         }
1639
1640         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1641
1642         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643         if (!t->sysctl_header)
1644                 goto free;
1645
1646         p->sysctl = t;
1647         return 0;
1648
1649 free:
1650         kfree(t);
1651 out:
1652         return -ENOBUFS;
1653 }
1654
1655 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1656 {
1657         struct devinet_sysctl_table *t = cnf->sysctl;
1658
1659         if (t == NULL)
1660                 return;
1661
1662         cnf->sysctl = NULL;
1663         unregister_net_sysctl_table(t->sysctl_header);
1664         kfree(t);
1665 }
1666
1667 static void devinet_sysctl_register(struct in_device *idev)
1668 {
1669         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671                                         &idev->cnf);
1672 }
1673
1674 static void devinet_sysctl_unregister(struct in_device *idev)
1675 {
1676         __devinet_sysctl_unregister(&idev->cnf);
1677         neigh_sysctl_unregister(idev->arp_parms);
1678 }
1679
1680 static struct ctl_table ctl_forward_entry[] = {
1681         {
1682                 .procname       = "ip_forward",
1683                 .data           = &ipv4_devconf.data[
1684                                         IPV4_DEVCONF_FORWARDING - 1],
1685                 .maxlen         = sizeof(int),
1686                 .mode           = 0644,
1687                 .proc_handler   = devinet_sysctl_forward,
1688                 .extra1         = &ipv4_devconf,
1689                 .extra2         = &init_net,
1690         },
1691         { },
1692 };
1693 #endif
1694
1695 static __net_init int devinet_init_net(struct net *net)
1696 {
1697         int err;
1698         struct ipv4_devconf *all, *dflt;
1699 #ifdef CONFIG_SYSCTL
1700         struct ctl_table *tbl = ctl_forward_entry;
1701         struct ctl_table_header *forw_hdr;
1702 #endif
1703
1704         err = -ENOMEM;
1705         all = &ipv4_devconf;
1706         dflt = &ipv4_devconf_dflt;
1707
1708         if (!net_eq(net, &init_net)) {
1709                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710                 if (all == NULL)
1711                         goto err_alloc_all;
1712
1713                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714                 if (dflt == NULL)
1715                         goto err_alloc_dflt;
1716
1717 #ifdef CONFIG_SYSCTL
1718                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719                 if (tbl == NULL)
1720                         goto err_alloc_ctl;
1721
1722                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723                 tbl[0].extra1 = all;
1724                 tbl[0].extra2 = net;
1725 #endif
1726         }
1727
1728 #ifdef CONFIG_SYSCTL
1729         err = __devinet_sysctl_register(net, "all", all);
1730         if (err < 0)
1731                 goto err_reg_all;
1732
1733         err = __devinet_sysctl_register(net, "default", dflt);
1734         if (err < 0)
1735                 goto err_reg_dflt;
1736
1737         err = -ENOMEM;
1738         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739         if (forw_hdr == NULL)
1740                 goto err_reg_ctl;
1741         net->ipv4.forw_hdr = forw_hdr;
1742 #endif
1743
1744         net->ipv4.devconf_all = all;
1745         net->ipv4.devconf_dflt = dflt;
1746         return 0;
1747
1748 #ifdef CONFIG_SYSCTL
1749 err_reg_ctl:
1750         __devinet_sysctl_unregister(dflt);
1751 err_reg_dflt:
1752         __devinet_sysctl_unregister(all);
1753 err_reg_all:
1754         if (tbl != ctl_forward_entry)
1755                 kfree(tbl);
1756 err_alloc_ctl:
1757 #endif
1758         if (dflt != &ipv4_devconf_dflt)
1759                 kfree(dflt);
1760 err_alloc_dflt:
1761         if (all != &ipv4_devconf)
1762                 kfree(all);
1763 err_alloc_all:
1764         return err;
1765 }
1766
1767 static __net_exit void devinet_exit_net(struct net *net)
1768 {
1769 #ifdef CONFIG_SYSCTL
1770         struct ctl_table *tbl;
1771
1772         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1776         kfree(tbl);
1777 #endif
1778         kfree(net->ipv4.devconf_dflt);
1779         kfree(net->ipv4.devconf_all);
1780 }
1781
1782 static __net_initdata struct pernet_operations devinet_ops = {
1783         .init = devinet_init_net,
1784         .exit = devinet_exit_net,
1785 };
1786
1787 static struct rtnl_af_ops inet_af_ops = {
1788         .family           = AF_INET,
1789         .fill_link_af     = inet_fill_link_af,
1790         .get_link_af_size = inet_get_link_af_size,
1791         .validate_link_af = inet_validate_link_af,
1792         .set_link_af      = inet_set_link_af,
1793 };
1794
1795 void __init devinet_init(void)
1796 {
1797         int i;
1798
1799         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1801
1802         register_pernet_subsys(&devinet_ops);
1803
1804         register_gifconf(PF_INET, inet_gifconf);
1805         register_netdevice_notifier(&ip_netdev_notifier);
1806
1807         rtnl_af_register(&inet_af_ops);
1808
1809         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1812 }
1813