Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[pandora-kernel.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95
96 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97
98 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100                          int destroy);
101 #ifdef CONFIG_SYSCTL
102 static void devinet_sysctl_register(struct in_device *idev);
103 static void devinet_sysctl_unregister(struct in_device *idev);
104 #else
105 static inline void devinet_sysctl_register(struct in_device *idev)
106 {
107 }
108 static inline void devinet_sysctl_unregister(struct in_device *idev)
109 {
110 }
111 #endif
112
113 /* Locks all the inet devices. */
114
115 static struct in_ifaddr *inet_alloc_ifa(void)
116 {
117         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
118
119         if (ifa) {
120                 INIT_RCU_HEAD(&ifa->rcu_head);
121         }
122
123         return ifa;
124 }
125
126 static void inet_rcu_free_ifa(struct rcu_head *head)
127 {
128         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
129         if (ifa->ifa_dev)
130                 in_dev_put(ifa->ifa_dev);
131         kfree(ifa);
132 }
133
134 static inline void inet_free_ifa(struct in_ifaddr *ifa)
135 {
136         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
137 }
138
139 void in_dev_finish_destroy(struct in_device *idev)
140 {
141         struct net_device *dev = idev->dev;
142
143         BUG_TRAP(!idev->ifa_list);
144         BUG_TRAP(!idev->mc_list);
145 #ifdef NET_REFCNT_DEBUG
146         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
147                idev, dev ? dev->name : "NIL");
148 #endif
149         dev_put(dev);
150         if (!idev->dead)
151                 printk("Freeing alive in_device %p\n", idev);
152         else {
153                 kfree(idev);
154         }
155 }
156
157 static struct in_device *inetdev_init(struct net_device *dev)
158 {
159         struct in_device *in_dev;
160
161         ASSERT_RTNL();
162
163         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
164         if (!in_dev)
165                 goto out;
166         INIT_RCU_HEAD(&in_dev->rcu_head);
167         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
168                         sizeof(in_dev->cnf));
169         in_dev->cnf.sysctl = NULL;
170         in_dev->dev = dev;
171         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
172                 goto out_kfree;
173         /* Reference in_dev->dev */
174         dev_hold(dev);
175         /* Account for reference dev->ip_ptr (below) */
176         in_dev_hold(in_dev);
177
178         devinet_sysctl_register(in_dev);
179         ip_mc_init_dev(in_dev);
180         if (dev->flags & IFF_UP)
181                 ip_mc_up(in_dev);
182
183         /* we can receive as soon as ip_ptr is set -- do this last */
184         rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186         return in_dev;
187 out_kfree:
188         kfree(in_dev);
189         in_dev = NULL;
190         goto out;
191 }
192
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195         struct in_device *idev = container_of(head, struct in_device, rcu_head);
196         in_dev_put(idev);
197 }
198
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201         struct in_ifaddr *ifa;
202         struct net_device *dev;
203
204         ASSERT_RTNL();
205
206         dev = in_dev->dev;
207
208         in_dev->dead = 1;
209
210         ip_mc_destroy_dev(in_dev);
211
212         while ((ifa = in_dev->ifa_list) != NULL) {
213                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214                 inet_free_ifa(ifa);
215         }
216
217         dev->ip_ptr = NULL;
218
219         devinet_sysctl_unregister(in_dev);
220         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221         arp_ifdown(dev);
222
223         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228         rcu_read_lock();
229         for_primary_ifa(in_dev) {
230                 if (inet_ifa_match(a, ifa)) {
231                         if (!b || inet_ifa_match(b, ifa)) {
232                                 rcu_read_unlock();
233                                 return 1;
234                         }
235                 }
236         } endfor_ifa(in_dev);
237         rcu_read_unlock();
238         return 0;
239 }
240
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242                          int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244         struct in_ifaddr *promote = NULL;
245         struct in_ifaddr *ifa, *ifa1 = *ifap;
246         struct in_ifaddr *last_prim = in_dev->ifa_list;
247         struct in_ifaddr *prev_prom = NULL;
248         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249
250         ASSERT_RTNL();
251
252         /* 1. Deleting primary ifaddr forces deletion all secondaries
253          * unless alias promotion is set
254          **/
255
256         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258
259                 while ((ifa = *ifap1) != NULL) {
260                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261                             ifa1->ifa_scope <= ifa->ifa_scope)
262                                 last_prim = ifa;
263
264                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265                             ifa1->ifa_mask != ifa->ifa_mask ||
266                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
267                                 ifap1 = &ifa->ifa_next;
268                                 prev_prom = ifa;
269                                 continue;
270                         }
271
272                         if (!do_promote) {
273                                 *ifap1 = ifa->ifa_next;
274
275                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276                                 blocking_notifier_call_chain(&inetaddr_chain,
277                                                 NETDEV_DOWN, ifa);
278                                 inet_free_ifa(ifa);
279                         } else {
280                                 promote = ifa;
281                                 break;
282                         }
283                 }
284         }
285
286         /* 2. Unlink it */
287
288         *ifap = ifa1->ifa_next;
289
290         /* 3. Announce address deletion */
291
292         /* Send message first, then call notifier.
293            At first sight, FIB update triggered by notifier
294            will refer to already deleted ifaddr, that could confuse
295            netlink listeners. It is not true: look, gated sees
296            that route deleted and if it still thinks that ifaddr
297            is valid, it will try to restore deleted routes... Grr.
298            So that, this order is correct.
299          */
300         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302
303         if (promote) {
304
305                 if (prev_prom) {
306                         prev_prom->ifa_next = promote->ifa_next;
307                         promote->ifa_next = last_prim->ifa_next;
308                         last_prim->ifa_next = promote;
309                 }
310
311                 promote->ifa_flags &= ~IFA_F_SECONDARY;
312                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313                 blocking_notifier_call_chain(&inetaddr_chain,
314                                 NETDEV_UP, promote);
315                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316                         if (ifa1->ifa_mask != ifa->ifa_mask ||
317                             !inet_ifa_match(ifa1->ifa_address, ifa))
318                                         continue;
319                         fib_add_ifaddr(ifa);
320                 }
321
322         }
323         if (destroy)
324                 inet_free_ifa(ifa1);
325 }
326
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328                          int destroy)
329 {
330         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334                              u32 pid)
335 {
336         struct in_device *in_dev = ifa->ifa_dev;
337         struct in_ifaddr *ifa1, **ifap, **last_primary;
338
339         ASSERT_RTNL();
340
341         if (!ifa->ifa_local) {
342                 inet_free_ifa(ifa);
343                 return 0;
344         }
345
346         ifa->ifa_flags &= ~IFA_F_SECONDARY;
347         last_primary = &in_dev->ifa_list;
348
349         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350              ifap = &ifa1->ifa_next) {
351                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352                     ifa->ifa_scope <= ifa1->ifa_scope)
353                         last_primary = &ifa1->ifa_next;
354                 if (ifa1->ifa_mask == ifa->ifa_mask &&
355                     inet_ifa_match(ifa1->ifa_address, ifa)) {
356                         if (ifa1->ifa_local == ifa->ifa_local) {
357                                 inet_free_ifa(ifa);
358                                 return -EEXIST;
359                         }
360                         if (ifa1->ifa_scope != ifa->ifa_scope) {
361                                 inet_free_ifa(ifa);
362                                 return -EINVAL;
363                         }
364                         ifa->ifa_flags |= IFA_F_SECONDARY;
365                 }
366         }
367
368         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369                 net_srandom(ifa->ifa_local);
370                 ifap = last_primary;
371         }
372
373         ifa->ifa_next = *ifap;
374         *ifap = ifa;
375
376         /* Send message first, then call notifier.
377            Notifier will trigger FIB update, so that
378            listeners of netlink will know about new ifaddr */
379         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381
382         return 0;
383 }
384
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387         return __inet_insert_ifa(ifa, NULL, 0);
388 }
389
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392         struct in_device *in_dev = __in_dev_get_rtnl(dev);
393
394         ASSERT_RTNL();
395
396         if (!in_dev) {
397                 inet_free_ifa(ifa);
398                 return -ENOBUFS;
399         }
400         ipv4_devconf_setall(in_dev);
401         if (ifa->ifa_dev != in_dev) {
402                 BUG_TRAP(!ifa->ifa_dev);
403                 in_dev_hold(in_dev);
404                 ifa->ifa_dev = in_dev;
405         }
406         if (ipv4_is_loopback(ifa->ifa_local))
407                 ifa->ifa_scope = RT_SCOPE_HOST;
408         return inet_insert_ifa(ifa);
409 }
410
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413         struct net_device *dev;
414         struct in_device *in_dev = NULL;
415         read_lock(&dev_base_lock);
416         dev = __dev_get_by_index(net, ifindex);
417         if (dev)
418                 in_dev = in_dev_get(dev);
419         read_unlock(&dev_base_lock);
420         return in_dev;
421 }
422
423 /* Called only from RTNL semaphored context. No locks. */
424
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426                                     __be32 mask)
427 {
428         ASSERT_RTNL();
429
430         for_primary_ifa(in_dev) {
431                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432                         return ifa;
433         } endfor_ifa(in_dev);
434         return NULL;
435 }
436
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439         struct net *net = sock_net(skb->sk);
440         struct nlattr *tb[IFA_MAX+1];
441         struct in_device *in_dev;
442         struct ifaddrmsg *ifm;
443         struct in_ifaddr *ifa, **ifap;
444         int err = -EINVAL;
445
446         ASSERT_RTNL();
447
448         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449         if (err < 0)
450                 goto errout;
451
452         ifm = nlmsg_data(nlh);
453         in_dev = inetdev_by_index(net, ifm->ifa_index);
454         if (in_dev == NULL) {
455                 err = -ENODEV;
456                 goto errout;
457         }
458
459         __in_dev_put(in_dev);
460
461         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462              ifap = &ifa->ifa_next) {
463                 if (tb[IFA_LOCAL] &&
464                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465                         continue;
466
467                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468                         continue;
469
470                 if (tb[IFA_ADDRESS] &&
471                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473                         continue;
474
475                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476                 return 0;
477         }
478
479         err = -EADDRNOTAVAIL;
480 errout:
481         return err;
482 }
483
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486         struct nlattr *tb[IFA_MAX+1];
487         struct in_ifaddr *ifa;
488         struct ifaddrmsg *ifm;
489         struct net_device *dev;
490         struct in_device *in_dev;
491         int err;
492
493         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494         if (err < 0)
495                 goto errout;
496
497         ifm = nlmsg_data(nlh);
498         err = -EINVAL;
499         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500                 goto errout;
501
502         dev = __dev_get_by_index(net, ifm->ifa_index);
503         err = -ENODEV;
504         if (dev == NULL)
505                 goto errout;
506
507         in_dev = __in_dev_get_rtnl(dev);
508         err = -ENOBUFS;
509         if (in_dev == NULL)
510                 goto errout;
511
512         ifa = inet_alloc_ifa();
513         if (ifa == NULL)
514                 /*
515                  * A potential indev allocation can be left alive, it stays
516                  * assigned to its device and is destroy with it.
517                  */
518                 goto errout;
519
520         ipv4_devconf_setall(in_dev);
521         in_dev_hold(in_dev);
522
523         if (tb[IFA_ADDRESS] == NULL)
524                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525
526         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528         ifa->ifa_flags = ifm->ifa_flags;
529         ifa->ifa_scope = ifm->ifa_scope;
530         ifa->ifa_dev = in_dev;
531
532         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534
535         if (tb[IFA_BROADCAST])
536                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537
538         if (tb[IFA_LABEL])
539                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540         else
541                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542
543         return ifa;
544
545 errout:
546         return ERR_PTR(err);
547 }
548
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551         struct net *net = sock_net(skb->sk);
552         struct in_ifaddr *ifa;
553
554         ASSERT_RTNL();
555
556         ifa = rtm_to_ifaddr(net, nlh);
557         if (IS_ERR(ifa))
558                 return PTR_ERR(ifa);
559
560         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562
563 /*
564  *      Determine a default network mask, based on the IP address.
565  */
566
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569         int rc = -1;    /* Something else, probably a multicast. */
570
571         if (ipv4_is_zeronet(addr))
572                 rc = 0;
573         else {
574                 __u32 haddr = ntohl(addr);
575
576                 if (IN_CLASSA(haddr))
577                         rc = 8;
578                 else if (IN_CLASSB(haddr))
579                         rc = 16;
580                 else if (IN_CLASSC(haddr))
581                         rc = 24;
582         }
583
584         return rc;
585 }
586
587
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590         struct ifreq ifr;
591         struct sockaddr_in sin_orig;
592         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593         struct in_device *in_dev;
594         struct in_ifaddr **ifap = NULL;
595         struct in_ifaddr *ifa = NULL;
596         struct net_device *dev;
597         char *colon;
598         int ret = -EFAULT;
599         int tryaddrmatch = 0;
600
601         /*
602          *      Fetch the caller's info block into kernel space
603          */
604
605         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606                 goto out;
607         ifr.ifr_name[IFNAMSIZ - 1] = 0;
608
609         /* save original address for comparison */
610         memcpy(&sin_orig, sin, sizeof(*sin));
611
612         colon = strchr(ifr.ifr_name, ':');
613         if (colon)
614                 *colon = 0;
615
616 #ifdef CONFIG_KMOD
617         dev_load(net, ifr.ifr_name);
618 #endif
619
620         switch (cmd) {
621         case SIOCGIFADDR:       /* Get interface address */
622         case SIOCGIFBRDADDR:    /* Get the broadcast address */
623         case SIOCGIFDSTADDR:    /* Get the destination address */
624         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
625                 /* Note that these ioctls will not sleep,
626                    so that we do not impose a lock.
627                    One day we will be forced to put shlock here (I mean SMP)
628                  */
629                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
630                 memset(sin, 0, sizeof(*sin));
631                 sin->sin_family = AF_INET;
632                 break;
633
634         case SIOCSIFFLAGS:
635                 ret = -EACCES;
636                 if (!capable(CAP_NET_ADMIN))
637                         goto out;
638                 break;
639         case SIOCSIFADDR:       /* Set interface address (and family) */
640         case SIOCSIFBRDADDR:    /* Set the broadcast address */
641         case SIOCSIFDSTADDR:    /* Set the destination address */
642         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
643                 ret = -EACCES;
644                 if (!capable(CAP_NET_ADMIN))
645                         goto out;
646                 ret = -EINVAL;
647                 if (sin->sin_family != AF_INET)
648                         goto out;
649                 break;
650         default:
651                 ret = -EINVAL;
652                 goto out;
653         }
654
655         rtnl_lock();
656
657         ret = -ENODEV;
658         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659                 goto done;
660
661         if (colon)
662                 *colon = ':';
663
664         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665                 if (tryaddrmatch) {
666                         /* Matthias Andree */
667                         /* compare label and address (4.4BSD style) */
668                         /* note: we only do this for a limited set of ioctls
669                            and only if the original address family was AF_INET.
670                            This is checked above. */
671                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672                              ifap = &ifa->ifa_next) {
673                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674                                     sin_orig.sin_addr.s_addr ==
675                                                         ifa->ifa_address) {
676                                         break; /* found */
677                                 }
678                         }
679                 }
680                 /* we didn't get a match, maybe the application is
681                    4.3BSD-style and passed in junk so we fall back to
682                    comparing just the label */
683                 if (!ifa) {
684                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685                              ifap = &ifa->ifa_next)
686                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687                                         break;
688                 }
689         }
690
691         ret = -EADDRNOTAVAIL;
692         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693                 goto done;
694
695         switch (cmd) {
696         case SIOCGIFADDR:       /* Get interface address */
697                 sin->sin_addr.s_addr = ifa->ifa_local;
698                 goto rarok;
699
700         case SIOCGIFBRDADDR:    /* Get the broadcast address */
701                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
702                 goto rarok;
703
704         case SIOCGIFDSTADDR:    /* Get the destination address */
705                 sin->sin_addr.s_addr = ifa->ifa_address;
706                 goto rarok;
707
708         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
709                 sin->sin_addr.s_addr = ifa->ifa_mask;
710                 goto rarok;
711
712         case SIOCSIFFLAGS:
713                 if (colon) {
714                         ret = -EADDRNOTAVAIL;
715                         if (!ifa)
716                                 break;
717                         ret = 0;
718                         if (!(ifr.ifr_flags & IFF_UP))
719                                 inet_del_ifa(in_dev, ifap, 1);
720                         break;
721                 }
722                 ret = dev_change_flags(dev, ifr.ifr_flags);
723                 break;
724
725         case SIOCSIFADDR:       /* Set interface address (and family) */
726                 ret = -EINVAL;
727                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728                         break;
729
730                 if (!ifa) {
731                         ret = -ENOBUFS;
732                         if ((ifa = inet_alloc_ifa()) == NULL)
733                                 break;
734                         if (colon)
735                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736                         else
737                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738                 } else {
739                         ret = 0;
740                         if (ifa->ifa_local == sin->sin_addr.s_addr)
741                                 break;
742                         inet_del_ifa(in_dev, ifap, 0);
743                         ifa->ifa_broadcast = 0;
744                         ifa->ifa_scope = 0;
745                 }
746
747                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748
749                 if (!(dev->flags & IFF_POINTOPOINT)) {
750                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752                         if ((dev->flags & IFF_BROADCAST) &&
753                             ifa->ifa_prefixlen < 31)
754                                 ifa->ifa_broadcast = ifa->ifa_address |
755                                                      ~ifa->ifa_mask;
756                 } else {
757                         ifa->ifa_prefixlen = 32;
758                         ifa->ifa_mask = inet_make_mask(32);
759                 }
760                 ret = inet_set_ifa(dev, ifa);
761                 break;
762
763         case SIOCSIFBRDADDR:    /* Set the broadcast address */
764                 ret = 0;
765                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766                         inet_del_ifa(in_dev, ifap, 0);
767                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
768                         inet_insert_ifa(ifa);
769                 }
770                 break;
771
772         case SIOCSIFDSTADDR:    /* Set the destination address */
773                 ret = 0;
774                 if (ifa->ifa_address == sin->sin_addr.s_addr)
775                         break;
776                 ret = -EINVAL;
777                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778                         break;
779                 ret = 0;
780                 inet_del_ifa(in_dev, ifap, 0);
781                 ifa->ifa_address = sin->sin_addr.s_addr;
782                 inet_insert_ifa(ifa);
783                 break;
784
785         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
786
787                 /*
788                  *      The mask we set must be legal.
789                  */
790                 ret = -EINVAL;
791                 if (bad_mask(sin->sin_addr.s_addr, 0))
792                         break;
793                 ret = 0;
794                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795                         __be32 old_mask = ifa->ifa_mask;
796                         inet_del_ifa(in_dev, ifap, 0);
797                         ifa->ifa_mask = sin->sin_addr.s_addr;
798                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799
800                         /* See if current broadcast address matches
801                          * with current netmask, then recalculate
802                          * the broadcast address. Otherwise it's a
803                          * funny address, so don't touch it since
804                          * the user seems to know what (s)he's doing...
805                          */
806                         if ((dev->flags & IFF_BROADCAST) &&
807                             (ifa->ifa_prefixlen < 31) &&
808                             (ifa->ifa_broadcast ==
809                              (ifa->ifa_local|~old_mask))) {
810                                 ifa->ifa_broadcast = (ifa->ifa_local |
811                                                       ~sin->sin_addr.s_addr);
812                         }
813                         inet_insert_ifa(ifa);
814                 }
815                 break;
816         }
817 done:
818         rtnl_unlock();
819 out:
820         return ret;
821 rarok:
822         rtnl_unlock();
823         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824         goto out;
825 }
826
827 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 {
829         struct in_device *in_dev = __in_dev_get_rtnl(dev);
830         struct in_ifaddr *ifa;
831         struct ifreq ifr;
832         int done = 0;
833
834         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835                 goto out;
836
837         for (; ifa; ifa = ifa->ifa_next) {
838                 if (!buf) {
839                         done += sizeof(ifr);
840                         continue;
841                 }
842                 if (len < (int) sizeof(ifr))
843                         break;
844                 memset(&ifr, 0, sizeof(struct ifreq));
845                 if (ifa->ifa_label)
846                         strcpy(ifr.ifr_name, ifa->ifa_label);
847                 else
848                         strcpy(ifr.ifr_name, dev->name);
849
850                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852                                                                 ifa->ifa_local;
853
854                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855                         done = -EFAULT;
856                         break;
857                 }
858                 buf  += sizeof(struct ifreq);
859                 len  -= sizeof(struct ifreq);
860                 done += sizeof(struct ifreq);
861         }
862 out:
863         return done;
864 }
865
866 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 {
868         __be32 addr = 0;
869         struct in_device *in_dev;
870         struct net *net = dev_net(dev);
871
872         rcu_read_lock();
873         in_dev = __in_dev_get_rcu(dev);
874         if (!in_dev)
875                 goto no_in_dev;
876
877         for_primary_ifa(in_dev) {
878                 if (ifa->ifa_scope > scope)
879                         continue;
880                 if (!dst || inet_ifa_match(dst, ifa)) {
881                         addr = ifa->ifa_local;
882                         break;
883                 }
884                 if (!addr)
885                         addr = ifa->ifa_local;
886         } endfor_ifa(in_dev);
887 no_in_dev:
888         rcu_read_unlock();
889
890         if (addr)
891                 goto out;
892
893         /* Not loopback addresses on loopback should be preferred
894            in this case. It is importnat that lo is the first interface
895            in dev_base list.
896          */
897         read_lock(&dev_base_lock);
898         rcu_read_lock();
899         for_each_netdev(net, dev) {
900                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901                         continue;
902
903                 for_primary_ifa(in_dev) {
904                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
905                             ifa->ifa_scope <= scope) {
906                                 addr = ifa->ifa_local;
907                                 goto out_unlock_both;
908                         }
909                 } endfor_ifa(in_dev);
910         }
911 out_unlock_both:
912         read_unlock(&dev_base_lock);
913         rcu_read_unlock();
914 out:
915         return addr;
916 }
917
918 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919                               __be32 local, int scope)
920 {
921         int same = 0;
922         __be32 addr = 0;
923
924         for_ifa(in_dev) {
925                 if (!addr &&
926                     (local == ifa->ifa_local || !local) &&
927                     ifa->ifa_scope <= scope) {
928                         addr = ifa->ifa_local;
929                         if (same)
930                                 break;
931                 }
932                 if (!same) {
933                         same = (!local || inet_ifa_match(local, ifa)) &&
934                                 (!dst || inet_ifa_match(dst, ifa));
935                         if (same && addr) {
936                                 if (local || !dst)
937                                         break;
938                                 /* Is the selected addr into dst subnet? */
939                                 if (inet_ifa_match(addr, ifa))
940                                         break;
941                                 /* No, then can we use new local src? */
942                                 if (ifa->ifa_scope <= scope) {
943                                         addr = ifa->ifa_local;
944                                         break;
945                                 }
946                                 /* search for large dst subnet for addr */
947                                 same = 0;
948                         }
949                 }
950         } endfor_ifa(in_dev);
951
952         return same? addr : 0;
953 }
954
955 /*
956  * Confirm that local IP address exists using wildcards:
957  * - in_dev: only on this interface, 0=any interface
958  * - dst: only in the same subnet as dst, 0=any dst
959  * - local: address, 0=autoselect the local address
960  * - scope: maximum allowed scope value for the local address
961  */
962 __be32 inet_confirm_addr(struct in_device *in_dev,
963                          __be32 dst, __be32 local, int scope)
964 {
965         __be32 addr = 0;
966         struct net_device *dev;
967         struct net *net;
968
969         if (scope != RT_SCOPE_LINK)
970                 return confirm_addr_indev(in_dev, dst, local, scope);
971
972         net = dev_net(in_dev->dev);
973         read_lock(&dev_base_lock);
974         rcu_read_lock();
975         for_each_netdev(net, dev) {
976                 if ((in_dev = __in_dev_get_rcu(dev))) {
977                         addr = confirm_addr_indev(in_dev, dst, local, scope);
978                         if (addr)
979                                 break;
980                 }
981         }
982         rcu_read_unlock();
983         read_unlock(&dev_base_lock);
984
985         return addr;
986 }
987
988 /*
989  *      Device notifier
990  */
991
992 int register_inetaddr_notifier(struct notifier_block *nb)
993 {
994         return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 }
996
997 int unregister_inetaddr_notifier(struct notifier_block *nb)
998 {
999         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 }
1001
1002 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003  * alias numbering and to create unique labels if possible.
1004 */
1005 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006 {
1007         struct in_ifaddr *ifa;
1008         int named = 0;
1009
1010         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011                 char old[IFNAMSIZ], *dot;
1012
1013                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015                 if (named++ == 0)
1016                         continue;
1017                 dot = strchr(old, ':');
1018                 if (dot == NULL) {
1019                         sprintf(old, ":%d", named);
1020                         dot = old;
1021                 }
1022                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023                         strcat(ifa->ifa_label, dot);
1024                 } else {
1025                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026                 }
1027         }
1028 }
1029
1030 /* Called only under RTNL semaphore */
1031
1032 static int inetdev_event(struct notifier_block *this, unsigned long event,
1033                          void *ptr)
1034 {
1035         struct net_device *dev = ptr;
1036         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1037
1038         ASSERT_RTNL();
1039
1040         if (!in_dev) {
1041                 if (event == NETDEV_REGISTER) {
1042                         in_dev = inetdev_init(dev);
1043                         if (!in_dev)
1044                                 return notifier_from_errno(-ENOMEM);
1045                         if (dev->flags & IFF_LOOPBACK) {
1046                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1047                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1048                         }
1049                 }
1050                 goto out;
1051         }
1052
1053         switch (event) {
1054         case NETDEV_REGISTER:
1055                 printk(KERN_DEBUG "inetdev_event: bug\n");
1056                 dev->ip_ptr = NULL;
1057                 break;
1058         case NETDEV_UP:
1059                 if (dev->mtu < 68)
1060                         break;
1061                 if (dev->flags & IFF_LOOPBACK) {
1062                         struct in_ifaddr *ifa;
1063                         if ((ifa = inet_alloc_ifa()) != NULL) {
1064                                 ifa->ifa_local =
1065                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1066                                 ifa->ifa_prefixlen = 8;
1067                                 ifa->ifa_mask = inet_make_mask(8);
1068                                 in_dev_hold(in_dev);
1069                                 ifa->ifa_dev = in_dev;
1070                                 ifa->ifa_scope = RT_SCOPE_HOST;
1071                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1072                                 inet_insert_ifa(ifa);
1073                         }
1074                 }
1075                 ip_mc_up(in_dev);
1076                 break;
1077         case NETDEV_DOWN:
1078                 ip_mc_down(in_dev);
1079                 break;
1080         case NETDEV_CHANGEMTU:
1081                 if (dev->mtu >= 68)
1082                         break;
1083                 /* MTU falled under 68, disable IP */
1084         case NETDEV_UNREGISTER:
1085                 inetdev_destroy(in_dev);
1086                 break;
1087         case NETDEV_CHANGENAME:
1088                 /* Do not notify about label change, this event is
1089                  * not interesting to applications using netlink.
1090                  */
1091                 inetdev_changename(dev, in_dev);
1092
1093                 devinet_sysctl_unregister(in_dev);
1094                 devinet_sysctl_register(in_dev);
1095                 break;
1096         }
1097 out:
1098         return NOTIFY_DONE;
1099 }
1100
1101 static struct notifier_block ip_netdev_notifier = {
1102         .notifier_call =inetdev_event,
1103 };
1104
1105 static inline size_t inet_nlmsg_size(void)
1106 {
1107         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1108                + nla_total_size(4) /* IFA_ADDRESS */
1109                + nla_total_size(4) /* IFA_LOCAL */
1110                + nla_total_size(4) /* IFA_BROADCAST */
1111                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1112 }
1113
1114 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1115                             u32 pid, u32 seq, int event, unsigned int flags)
1116 {
1117         struct ifaddrmsg *ifm;
1118         struct nlmsghdr  *nlh;
1119
1120         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1121         if (nlh == NULL)
1122                 return -EMSGSIZE;
1123
1124         ifm = nlmsg_data(nlh);
1125         ifm->ifa_family = AF_INET;
1126         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1127         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1128         ifm->ifa_scope = ifa->ifa_scope;
1129         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1130
1131         if (ifa->ifa_address)
1132                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1133
1134         if (ifa->ifa_local)
1135                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1136
1137         if (ifa->ifa_broadcast)
1138                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1139
1140         if (ifa->ifa_label[0])
1141                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1142
1143         return nlmsg_end(skb, nlh);
1144
1145 nla_put_failure:
1146         nlmsg_cancel(skb, nlh);
1147         return -EMSGSIZE;
1148 }
1149
1150 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1151 {
1152         struct net *net = sock_net(skb->sk);
1153         int idx, ip_idx;
1154         struct net_device *dev;
1155         struct in_device *in_dev;
1156         struct in_ifaddr *ifa;
1157         int s_ip_idx, s_idx = cb->args[0];
1158
1159         s_ip_idx = ip_idx = cb->args[1];
1160         idx = 0;
1161         for_each_netdev(net, dev) {
1162                 if (idx < s_idx)
1163                         goto cont;
1164                 if (idx > s_idx)
1165                         s_ip_idx = 0;
1166                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1167                         goto cont;
1168
1169                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1170                      ifa = ifa->ifa_next, ip_idx++) {
1171                         if (ip_idx < s_ip_idx)
1172                                 continue;
1173                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1174                                              cb->nlh->nlmsg_seq,
1175                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1176                                 goto done;
1177                 }
1178 cont:
1179                 idx++;
1180         }
1181
1182 done:
1183         cb->args[0] = idx;
1184         cb->args[1] = ip_idx;
1185
1186         return skb->len;
1187 }
1188
1189 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1190                       u32 pid)
1191 {
1192         struct sk_buff *skb;
1193         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1194         int err = -ENOBUFS;
1195         struct net *net;
1196
1197         net = dev_net(ifa->ifa_dev->dev);
1198         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1199         if (skb == NULL)
1200                 goto errout;
1201
1202         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1203         if (err < 0) {
1204                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1205                 WARN_ON(err == -EMSGSIZE);
1206                 kfree_skb(skb);
1207                 goto errout;
1208         }
1209         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1210 errout:
1211         if (err < 0)
1212                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1213 }
1214
1215 #ifdef CONFIG_SYSCTL
1216
1217 static void devinet_copy_dflt_conf(struct net *net, int i)
1218 {
1219         struct net_device *dev;
1220
1221         read_lock(&dev_base_lock);
1222         for_each_netdev(net, dev) {
1223                 struct in_device *in_dev;
1224                 rcu_read_lock();
1225                 in_dev = __in_dev_get_rcu(dev);
1226                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1227                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1228                 rcu_read_unlock();
1229         }
1230         read_unlock(&dev_base_lock);
1231 }
1232
1233 static void inet_forward_change(struct net *net)
1234 {
1235         struct net_device *dev;
1236         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1237
1238         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1239         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1240
1241         read_lock(&dev_base_lock);
1242         for_each_netdev(net, dev) {
1243                 struct in_device *in_dev;
1244                 rcu_read_lock();
1245                 in_dev = __in_dev_get_rcu(dev);
1246                 if (in_dev)
1247                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1248                 rcu_read_unlock();
1249         }
1250         read_unlock(&dev_base_lock);
1251
1252         rt_cache_flush(0);
1253 }
1254
1255 static int devinet_conf_proc(ctl_table *ctl, int write,
1256                              struct file* filp, void __user *buffer,
1257                              size_t *lenp, loff_t *ppos)
1258 {
1259         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1260
1261         if (write) {
1262                 struct ipv4_devconf *cnf = ctl->extra1;
1263                 struct net *net = ctl->extra2;
1264                 int i = (int *)ctl->data - cnf->data;
1265
1266                 set_bit(i, cnf->state);
1267
1268                 if (cnf == net->ipv4.devconf_dflt)
1269                         devinet_copy_dflt_conf(net, i);
1270         }
1271
1272         return ret;
1273 }
1274
1275 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276                                void __user *oldval, size_t __user *oldlenp,
1277                                void __user *newval, size_t newlen)
1278 {
1279         struct ipv4_devconf *cnf;
1280         struct net *net;
1281         int *valp = table->data;
1282         int new;
1283         int i;
1284
1285         if (!newval || !newlen)
1286                 return 0;
1287
1288         if (newlen != sizeof(int))
1289                 return -EINVAL;
1290
1291         if (get_user(new, (int __user *)newval))
1292                 return -EFAULT;
1293
1294         if (new == *valp)
1295                 return 0;
1296
1297         if (oldval && oldlenp) {
1298                 size_t len;
1299
1300                 if (get_user(len, oldlenp))
1301                         return -EFAULT;
1302
1303                 if (len) {
1304                         if (len > table->maxlen)
1305                                 len = table->maxlen;
1306                         if (copy_to_user(oldval, valp, len))
1307                                 return -EFAULT;
1308                         if (put_user(len, oldlenp))
1309                                 return -EFAULT;
1310                 }
1311         }
1312
1313         *valp = new;
1314
1315         cnf = table->extra1;
1316         net = table->extra2;
1317         i = (int *)table->data - cnf->data;
1318
1319         set_bit(i, cnf->state);
1320
1321         if (cnf == net->ipv4.devconf_dflt)
1322                 devinet_copy_dflt_conf(net, i);
1323
1324         return 1;
1325 }
1326
1327 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1328                                   struct file* filp, void __user *buffer,
1329                                   size_t *lenp, loff_t *ppos)
1330 {
1331         int *valp = ctl->data;
1332         int val = *valp;
1333         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1334
1335         if (write && *valp != val) {
1336                 struct net *net = ctl->extra2;
1337
1338                 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1339                         inet_forward_change(net);
1340                 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1341                         rt_cache_flush(0);
1342         }
1343
1344         return ret;
1345 }
1346
1347 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1348                          struct file* filp, void __user *buffer,
1349                          size_t *lenp, loff_t *ppos)
1350 {
1351         int *valp = ctl->data;
1352         int val = *valp;
1353         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354
1355         if (write && *valp != val)
1356                 rt_cache_flush(0);
1357
1358         return ret;
1359 }
1360
1361 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1362                                   void __user *oldval, size_t __user *oldlenp,
1363                                   void __user *newval, size_t newlen)
1364 {
1365         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1366                                       newval, newlen);
1367
1368         if (ret == 1)
1369                 rt_cache_flush(0);
1370
1371         return ret;
1372 }
1373
1374
1375 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1376         { \
1377                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1378                 .procname       = name, \
1379                 .data           = ipv4_devconf.data + \
1380                                   NET_IPV4_CONF_ ## attr - 1, \
1381                 .maxlen         = sizeof(int), \
1382                 .mode           = mval, \
1383                 .proc_handler   = proc, \
1384                 .strategy       = sysctl, \
1385                 .extra1         = &ipv4_devconf, \
1386         }
1387
1388 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1389         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1390                              devinet_conf_sysctl)
1391
1392 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1393         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1394                              devinet_conf_sysctl)
1395
1396 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1397         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1398
1399 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1400         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1401                                      ipv4_doint_and_flush_strategy)
1402
1403 static struct devinet_sysctl_table {
1404         struct ctl_table_header *sysctl_header;
1405         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1406         char *dev_name;
1407 } devinet_sysctl = {
1408         .devinet_vars = {
1409                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1410                                              devinet_sysctl_forward,
1411                                              devinet_conf_sysctl),
1412                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1413
1414                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1415                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1416                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1417                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1418                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1419                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1420                                         "accept_source_route"),
1421                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1422                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1423                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1424                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1425                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1426                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1427                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1428                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1429                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1430
1431                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1432                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1433                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1434                                               "force_igmp_version"),
1435                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1436                                               "promote_secondaries"),
1437         },
1438 };
1439
1440 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1441                 int ctl_name, struct ipv4_devconf *p)
1442 {
1443         int i;
1444         struct devinet_sysctl_table *t;
1445
1446 #define DEVINET_CTL_PATH_DEV    3
1447
1448         struct ctl_path devinet_ctl_path[] = {
1449                 { .procname = "net", .ctl_name = CTL_NET, },
1450                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1451                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1452                 { /* to be set */ },
1453                 { },
1454         };
1455
1456         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1457         if (!t)
1458                 goto out;
1459
1460         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1461                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1462                 t->devinet_vars[i].extra1 = p;
1463                 t->devinet_vars[i].extra2 = net;
1464         }
1465
1466         /*
1467          * Make a copy of dev_name, because '.procname' is regarded as const
1468          * by sysctl and we wouldn't want anyone to change it under our feet
1469          * (see SIOCSIFNAME).
1470          */
1471         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1472         if (!t->dev_name)
1473                 goto free;
1474
1475         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1476         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1477
1478         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1479                         t->devinet_vars);
1480         if (!t->sysctl_header)
1481                 goto free_procname;
1482
1483         p->sysctl = t;
1484         return 0;
1485
1486 free_procname:
1487         kfree(t->dev_name);
1488 free:
1489         kfree(t);
1490 out:
1491         return -ENOBUFS;
1492 }
1493
1494 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1495 {
1496         struct devinet_sysctl_table *t = cnf->sysctl;
1497
1498         if (t == NULL)
1499                 return;
1500
1501         cnf->sysctl = NULL;
1502         unregister_sysctl_table(t->sysctl_header);
1503         kfree(t->dev_name);
1504         kfree(t);
1505 }
1506
1507 static void devinet_sysctl_register(struct in_device *idev)
1508 {
1509         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1510                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1511         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1512                         idev->dev->ifindex, &idev->cnf);
1513 }
1514
1515 static void devinet_sysctl_unregister(struct in_device *idev)
1516 {
1517         __devinet_sysctl_unregister(&idev->cnf);
1518         neigh_sysctl_unregister(idev->arp_parms);
1519 }
1520
1521 static struct ctl_table ctl_forward_entry[] = {
1522         {
1523                 .ctl_name       = NET_IPV4_FORWARD,
1524                 .procname       = "ip_forward",
1525                 .data           = &ipv4_devconf.data[
1526                                         NET_IPV4_CONF_FORWARDING - 1],
1527                 .maxlen         = sizeof(int),
1528                 .mode           = 0644,
1529                 .proc_handler   = devinet_sysctl_forward,
1530                 .strategy       = devinet_conf_sysctl,
1531                 .extra1         = &ipv4_devconf,
1532                 .extra2         = &init_net,
1533         },
1534         { },
1535 };
1536
1537 static __net_initdata struct ctl_path net_ipv4_path[] = {
1538         { .procname = "net", .ctl_name = CTL_NET, },
1539         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1540         { },
1541 };
1542 #endif
1543
1544 static __net_init int devinet_init_net(struct net *net)
1545 {
1546         int err;
1547         struct ipv4_devconf *all, *dflt;
1548 #ifdef CONFIG_SYSCTL
1549         struct ctl_table *tbl = ctl_forward_entry;
1550         struct ctl_table_header *forw_hdr;
1551 #endif
1552
1553         err = -ENOMEM;
1554         all = &ipv4_devconf;
1555         dflt = &ipv4_devconf_dflt;
1556
1557         if (net != &init_net) {
1558                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1559                 if (all == NULL)
1560                         goto err_alloc_all;
1561
1562                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1563                 if (dflt == NULL)
1564                         goto err_alloc_dflt;
1565
1566 #ifdef CONFIG_SYSCTL
1567                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1568                 if (tbl == NULL)
1569                         goto err_alloc_ctl;
1570
1571                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1572                 tbl[0].extra1 = all;
1573                 tbl[0].extra2 = net;
1574 #endif
1575         }
1576
1577 #ifdef CONFIG_SYSCTL
1578         err = __devinet_sysctl_register(net, "all",
1579                         NET_PROTO_CONF_ALL, all);
1580         if (err < 0)
1581                 goto err_reg_all;
1582
1583         err = __devinet_sysctl_register(net, "default",
1584                         NET_PROTO_CONF_DEFAULT, dflt);
1585         if (err < 0)
1586                 goto err_reg_dflt;
1587
1588         err = -ENOMEM;
1589         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1590         if (forw_hdr == NULL)
1591                 goto err_reg_ctl;
1592         net->ipv4.forw_hdr = forw_hdr;
1593 #endif
1594
1595         net->ipv4.devconf_all = all;
1596         net->ipv4.devconf_dflt = dflt;
1597         return 0;
1598
1599 #ifdef CONFIG_SYSCTL
1600 err_reg_ctl:
1601         __devinet_sysctl_unregister(dflt);
1602 err_reg_dflt:
1603         __devinet_sysctl_unregister(all);
1604 err_reg_all:
1605         if (tbl != ctl_forward_entry)
1606                 kfree(tbl);
1607 err_alloc_ctl:
1608 #endif
1609         if (dflt != &ipv4_devconf_dflt)
1610                 kfree(dflt);
1611 err_alloc_dflt:
1612         if (all != &ipv4_devconf)
1613                 kfree(all);
1614 err_alloc_all:
1615         return err;
1616 }
1617
1618 static __net_exit void devinet_exit_net(struct net *net)
1619 {
1620 #ifdef CONFIG_SYSCTL
1621         struct ctl_table *tbl;
1622
1623         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1624         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1625         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1626         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1627         kfree(tbl);
1628 #endif
1629         kfree(net->ipv4.devconf_dflt);
1630         kfree(net->ipv4.devconf_all);
1631 }
1632
1633 static __net_initdata struct pernet_operations devinet_ops = {
1634         .init = devinet_init_net,
1635         .exit = devinet_exit_net,
1636 };
1637
1638 void __init devinet_init(void)
1639 {
1640         register_pernet_subsys(&devinet_ops);
1641
1642         register_gifconf(PF_INET, inet_gifconf);
1643         register_netdevice_notifier(&ip_netdev_notifier);
1644
1645         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1646         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1647         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1648 }
1649
1650 EXPORT_SYMBOL(in_dev_finish_destroy);
1651 EXPORT_SYMBOL(inet_select_addr);
1652 EXPORT_SYMBOL(inetdev_by_index);
1653 EXPORT_SYMBOL(register_inetaddr_notifier);
1654 EXPORT_SYMBOL(unregister_inetaddr_notifier);