2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
54 #include <linux/sysctl.h>
56 #include <linux/kmod.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
65 static struct ipv4_devconf ipv4_devconf = {
67 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
74 static struct ipv4_devconf ipv4_devconf_dflt = {
76 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 },
89 [IFA_ADDRESS] = { .type = NLA_U32 },
90 [IFA_BROADCAST] = { .type = NLA_U32 },
91 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
103 static inline void devinet_sysctl_register(struct in_device *idev)
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
111 /* Locks all the inet devices. */
113 static struct in_ifaddr *inet_alloc_ifa(void)
115 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
118 static void inet_rcu_free_ifa(struct rcu_head *head)
120 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 in_dev_put(ifa->ifa_dev);
126 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
131 void in_dev_finish_destroy(struct in_device *idev)
133 struct net_device *dev = idev->dev;
135 WARN_ON(idev->ifa_list);
136 WARN_ON(idev->mc_list);
137 #ifdef NET_REFCNT_DEBUG
138 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139 idev, dev ? dev->name : "NIL");
143 pr_err("Freeing alive in_device %p\n", idev);
147 EXPORT_SYMBOL(in_dev_finish_destroy);
149 static struct in_device *inetdev_init(struct net_device *dev)
151 struct in_device *in_dev;
155 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
158 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159 sizeof(in_dev->cnf));
160 in_dev->cnf.sysctl = NULL;
162 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
163 if (!in_dev->arp_parms)
165 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
166 dev_disable_lro(dev);
167 /* Reference in_dev->dev */
169 /* Account for reference dev->ip_ptr (below) */
172 devinet_sysctl_register(in_dev);
173 ip_mc_init_dev(in_dev);
174 if (dev->flags & IFF_UP)
177 /* we can receive as soon as ip_ptr is set -- do this last */
178 rcu_assign_pointer(dev->ip_ptr, in_dev);
187 static void in_dev_rcu_put(struct rcu_head *head)
189 struct in_device *idev = container_of(head, struct in_device, rcu_head);
193 static void inetdev_destroy(struct in_device *in_dev)
195 struct in_ifaddr *ifa;
196 struct net_device *dev;
204 ip_mc_destroy_dev(in_dev);
206 while ((ifa = in_dev->ifa_list) != NULL) {
207 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
213 devinet_sysctl_unregister(in_dev);
214 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
217 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
220 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
223 for_primary_ifa(in_dev) {
224 if (inet_ifa_match(a, ifa)) {
225 if (!b || inet_ifa_match(b, ifa)) {
230 } endfor_ifa(in_dev);
235 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
236 int destroy, struct nlmsghdr *nlh, u32 pid)
238 struct in_ifaddr *promote = NULL;
239 struct in_ifaddr *ifa, *ifa1 = *ifap;
240 struct in_ifaddr *last_prim = in_dev->ifa_list;
241 struct in_ifaddr *prev_prom = NULL;
242 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
246 /* 1. Deleting primary ifaddr forces deletion all secondaries
247 * unless alias promotion is set
250 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
251 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253 while ((ifa = *ifap1) != NULL) {
254 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
255 ifa1->ifa_scope <= ifa->ifa_scope)
258 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
259 ifa1->ifa_mask != ifa->ifa_mask ||
260 !inet_ifa_match(ifa1->ifa_address, ifa)) {
261 ifap1 = &ifa->ifa_next;
267 *ifap1 = ifa->ifa_next;
269 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
270 blocking_notifier_call_chain(&inetaddr_chain,
282 *ifap = ifa1->ifa_next;
284 /* 3. Announce address deletion */
286 /* Send message first, then call notifier.
287 At first sight, FIB update triggered by notifier
288 will refer to already deleted ifaddr, that could confuse
289 netlink listeners. It is not true: look, gated sees
290 that route deleted and if it still thinks that ifaddr
291 is valid, it will try to restore deleted routes... Grr.
292 So that, this order is correct.
294 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
295 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
300 prev_prom->ifa_next = promote->ifa_next;
301 promote->ifa_next = last_prim->ifa_next;
302 last_prim->ifa_next = promote;
305 promote->ifa_flags &= ~IFA_F_SECONDARY;
306 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
307 blocking_notifier_call_chain(&inetaddr_chain,
309 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
310 if (ifa1->ifa_mask != ifa->ifa_mask ||
311 !inet_ifa_match(ifa1->ifa_address, ifa))
321 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
324 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
327 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
330 struct in_device *in_dev = ifa->ifa_dev;
331 struct in_ifaddr *ifa1, **ifap, **last_primary;
335 if (!ifa->ifa_local) {
340 ifa->ifa_flags &= ~IFA_F_SECONDARY;
341 last_primary = &in_dev->ifa_list;
343 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
344 ifap = &ifa1->ifa_next) {
345 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
346 ifa->ifa_scope <= ifa1->ifa_scope)
347 last_primary = &ifa1->ifa_next;
348 if (ifa1->ifa_mask == ifa->ifa_mask &&
349 inet_ifa_match(ifa1->ifa_address, ifa)) {
350 if (ifa1->ifa_local == ifa->ifa_local) {
354 if (ifa1->ifa_scope != ifa->ifa_scope) {
358 ifa->ifa_flags |= IFA_F_SECONDARY;
362 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
363 net_srandom(ifa->ifa_local);
367 ifa->ifa_next = *ifap;
370 /* Send message first, then call notifier.
371 Notifier will trigger FIB update, so that
372 listeners of netlink will know about new ifaddr */
373 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
374 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
379 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 return __inet_insert_ifa(ifa, NULL, 0);
384 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 struct in_device *in_dev = __in_dev_get_rtnl(dev);
394 ipv4_devconf_setall(in_dev);
395 if (ifa->ifa_dev != in_dev) {
396 WARN_ON(ifa->ifa_dev);
398 ifa->ifa_dev = in_dev;
400 if (ipv4_is_loopback(ifa->ifa_local))
401 ifa->ifa_scope = RT_SCOPE_HOST;
402 return inet_insert_ifa(ifa);
405 struct in_device *inetdev_by_index(struct net *net, int ifindex)
407 struct net_device *dev;
408 struct in_device *in_dev = NULL;
411 dev = dev_get_by_index_rcu(net, ifindex);
413 in_dev = in_dev_get(dev);
417 EXPORT_SYMBOL(inetdev_by_index);
419 /* Called only from RTNL semaphored context. No locks. */
421 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 for_primary_ifa(in_dev) {
427 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
429 } endfor_ifa(in_dev);
433 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435 struct net *net = sock_net(skb->sk);
436 struct nlattr *tb[IFA_MAX+1];
437 struct in_device *in_dev;
438 struct ifaddrmsg *ifm;
439 struct in_ifaddr *ifa, **ifap;
444 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
448 ifm = nlmsg_data(nlh);
449 in_dev = inetdev_by_index(net, ifm->ifa_index);
450 if (in_dev == NULL) {
455 __in_dev_put(in_dev);
457 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
458 ifap = &ifa->ifa_next) {
460 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466 if (tb[IFA_ADDRESS] &&
467 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
468 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
475 err = -EADDRNOTAVAIL;
480 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
482 struct nlattr *tb[IFA_MAX+1];
483 struct in_ifaddr *ifa;
484 struct ifaddrmsg *ifm;
485 struct net_device *dev;
486 struct in_device *in_dev;
489 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
493 ifm = nlmsg_data(nlh);
495 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498 dev = __dev_get_by_index(net, ifm->ifa_index);
503 in_dev = __in_dev_get_rtnl(dev);
508 ifa = inet_alloc_ifa();
511 * A potential indev allocation can be left alive, it stays
512 * assigned to its device and is destroy with it.
516 ipv4_devconf_setall(in_dev);
519 if (tb[IFA_ADDRESS] == NULL)
520 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
522 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
523 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
524 ifa->ifa_flags = ifm->ifa_flags;
525 ifa->ifa_scope = ifm->ifa_scope;
526 ifa->ifa_dev = in_dev;
528 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
529 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
531 if (tb[IFA_BROADCAST])
532 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
537 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
545 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
547 struct net *net = sock_net(skb->sk);
548 struct in_ifaddr *ifa;
552 ifa = rtm_to_ifaddr(net, nlh);
556 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
560 * Determine a default network mask, based on the IP address.
563 static inline int inet_abc_len(__be32 addr)
565 int rc = -1; /* Something else, probably a multicast. */
567 if (ipv4_is_zeronet(addr))
570 __u32 haddr = ntohl(addr);
572 if (IN_CLASSA(haddr))
574 else if (IN_CLASSB(haddr))
576 else if (IN_CLASSC(haddr))
584 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 struct sockaddr_in sin_orig;
588 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
589 struct in_device *in_dev;
590 struct in_ifaddr **ifap = NULL;
591 struct in_ifaddr *ifa = NULL;
592 struct net_device *dev;
595 int tryaddrmatch = 0;
598 * Fetch the caller's info block into kernel space
601 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
603 ifr.ifr_name[IFNAMSIZ - 1] = 0;
605 /* save original address for comparison */
606 memcpy(&sin_orig, sin, sizeof(*sin));
608 colon = strchr(ifr.ifr_name, ':');
612 dev_load(net, ifr.ifr_name);
615 case SIOCGIFADDR: /* Get interface address */
616 case SIOCGIFBRDADDR: /* Get the broadcast address */
617 case SIOCGIFDSTADDR: /* Get the destination address */
618 case SIOCGIFNETMASK: /* Get the netmask for the interface */
619 /* Note that these ioctls will not sleep,
620 so that we do not impose a lock.
621 One day we will be forced to put shlock here (I mean SMP)
623 tryaddrmatch = (sin_orig.sin_family == AF_INET);
624 memset(sin, 0, sizeof(*sin));
625 sin->sin_family = AF_INET;
630 if (!capable(CAP_NET_ADMIN))
633 case SIOCSIFADDR: /* Set interface address (and family) */
634 case SIOCSIFBRDADDR: /* Set the broadcast address */
635 case SIOCSIFDSTADDR: /* Set the destination address */
636 case SIOCSIFNETMASK: /* Set the netmask for the interface */
638 if (!capable(CAP_NET_ADMIN))
641 if (sin->sin_family != AF_INET)
652 dev = __dev_get_by_name(net, ifr.ifr_name);
659 in_dev = __in_dev_get_rtnl(dev);
662 /* Matthias Andree */
663 /* compare label and address (4.4BSD style) */
664 /* note: we only do this for a limited set of ioctls
665 and only if the original address family was AF_INET.
666 This is checked above. */
667 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
668 ifap = &ifa->ifa_next) {
669 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
670 sin_orig.sin_addr.s_addr ==
676 /* we didn't get a match, maybe the application is
677 4.3BSD-style and passed in junk so we fall back to
678 comparing just the label */
680 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
681 ifap = &ifa->ifa_next)
682 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687 ret = -EADDRNOTAVAIL;
688 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
692 case SIOCGIFADDR: /* Get interface address */
693 sin->sin_addr.s_addr = ifa->ifa_local;
696 case SIOCGIFBRDADDR: /* Get the broadcast address */
697 sin->sin_addr.s_addr = ifa->ifa_broadcast;
700 case SIOCGIFDSTADDR: /* Get the destination address */
701 sin->sin_addr.s_addr = ifa->ifa_address;
704 case SIOCGIFNETMASK: /* Get the netmask for the interface */
705 sin->sin_addr.s_addr = ifa->ifa_mask;
710 ret = -EADDRNOTAVAIL;
714 if (!(ifr.ifr_flags & IFF_UP))
715 inet_del_ifa(in_dev, ifap, 1);
718 ret = dev_change_flags(dev, ifr.ifr_flags);
721 case SIOCSIFADDR: /* Set interface address (and family) */
723 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728 ifa = inet_alloc_ifa();
732 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737 if (ifa->ifa_local == sin->sin_addr.s_addr)
739 inet_del_ifa(in_dev, ifap, 0);
740 ifa->ifa_broadcast = 0;
744 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746 if (!(dev->flags & IFF_POINTOPOINT)) {
747 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
748 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
749 if ((dev->flags & IFF_BROADCAST) &&
750 ifa->ifa_prefixlen < 31)
751 ifa->ifa_broadcast = ifa->ifa_address |
754 ifa->ifa_prefixlen = 32;
755 ifa->ifa_mask = inet_make_mask(32);
757 ret = inet_set_ifa(dev, ifa);
760 case SIOCSIFBRDADDR: /* Set the broadcast address */
762 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
763 inet_del_ifa(in_dev, ifap, 0);
764 ifa->ifa_broadcast = sin->sin_addr.s_addr;
765 inet_insert_ifa(ifa);
769 case SIOCSIFDSTADDR: /* Set the destination address */
771 if (ifa->ifa_address == sin->sin_addr.s_addr)
774 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777 inet_del_ifa(in_dev, ifap, 0);
778 ifa->ifa_address = sin->sin_addr.s_addr;
779 inet_insert_ifa(ifa);
782 case SIOCSIFNETMASK: /* Set the netmask for the interface */
785 * The mask we set must be legal.
788 if (bad_mask(sin->sin_addr.s_addr, 0))
791 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
792 __be32 old_mask = ifa->ifa_mask;
793 inet_del_ifa(in_dev, ifap, 0);
794 ifa->ifa_mask = sin->sin_addr.s_addr;
795 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797 /* See if current broadcast address matches
798 * with current netmask, then recalculate
799 * the broadcast address. Otherwise it's a
800 * funny address, so don't touch it since
801 * the user seems to know what (s)he's doing...
803 if ((dev->flags & IFF_BROADCAST) &&
804 (ifa->ifa_prefixlen < 31) &&
805 (ifa->ifa_broadcast ==
806 (ifa->ifa_local|~old_mask))) {
807 ifa->ifa_broadcast = (ifa->ifa_local |
808 ~sin->sin_addr.s_addr);
810 inet_insert_ifa(ifa);
820 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 struct in_device *in_dev = __in_dev_get_rtnl(dev);
827 struct in_ifaddr *ifa;
834 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
839 if (len < (int) sizeof(ifr))
841 memset(&ifr, 0, sizeof(struct ifreq));
843 strcpy(ifr.ifr_name, ifa->ifa_label);
845 strcpy(ifr.ifr_name, dev->name);
847 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
848 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855 buf += sizeof(struct ifreq);
856 len -= sizeof(struct ifreq);
857 done += sizeof(struct ifreq);
863 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866 struct in_device *in_dev;
867 struct net *net = dev_net(dev);
870 in_dev = __in_dev_get_rcu(dev);
874 for_primary_ifa(in_dev) {
875 if (ifa->ifa_scope > scope)
877 if (!dst || inet_ifa_match(dst, ifa)) {
878 addr = ifa->ifa_local;
882 addr = ifa->ifa_local;
883 } endfor_ifa(in_dev);
889 /* Not loopback addresses on loopback should be preferred
890 in this case. It is importnat that lo is the first interface
893 for_each_netdev_rcu(net, dev) {
894 in_dev = __in_dev_get_rcu(dev);
898 for_primary_ifa(in_dev) {
899 if (ifa->ifa_scope != RT_SCOPE_LINK &&
900 ifa->ifa_scope <= scope) {
901 addr = ifa->ifa_local;
904 } endfor_ifa(in_dev);
910 EXPORT_SYMBOL(inet_select_addr);
912 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
913 __be32 local, int scope)
920 (local == ifa->ifa_local || !local) &&
921 ifa->ifa_scope <= scope) {
922 addr = ifa->ifa_local;
927 same = (!local || inet_ifa_match(local, ifa)) &&
928 (!dst || inet_ifa_match(dst, ifa));
932 /* Is the selected addr into dst subnet? */
933 if (inet_ifa_match(addr, ifa))
935 /* No, then can we use new local src? */
936 if (ifa->ifa_scope <= scope) {
937 addr = ifa->ifa_local;
940 /* search for large dst subnet for addr */
944 } endfor_ifa(in_dev);
946 return same ? addr : 0;
950 * Confirm that local IP address exists using wildcards:
951 * - in_dev: only on this interface, 0=any interface
952 * - dst: only in the same subnet as dst, 0=any dst
953 * - local: address, 0=autoselect the local address
954 * - scope: maximum allowed scope value for the local address
956 __be32 inet_confirm_addr(struct in_device *in_dev,
957 __be32 dst, __be32 local, int scope)
960 struct net_device *dev;
963 if (scope != RT_SCOPE_LINK)
964 return confirm_addr_indev(in_dev, dst, local, scope);
966 net = dev_net(in_dev->dev);
968 for_each_netdev_rcu(net, dev) {
969 in_dev = __in_dev_get_rcu(dev);
971 addr = confirm_addr_indev(in_dev, dst, local, scope);
985 int register_inetaddr_notifier(struct notifier_block *nb)
987 return blocking_notifier_chain_register(&inetaddr_chain, nb);
989 EXPORT_SYMBOL(register_inetaddr_notifier);
991 int unregister_inetaddr_notifier(struct notifier_block *nb)
993 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
995 EXPORT_SYMBOL(unregister_inetaddr_notifier);
997 /* Rename ifa_labels for a device name change. Make some effort to preserve
998 * existing alias numbering and to create unique labels if possible.
1000 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1002 struct in_ifaddr *ifa;
1005 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1006 char old[IFNAMSIZ], *dot;
1008 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1009 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012 dot = strchr(old, ':');
1014 sprintf(old, ":%d", named);
1017 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1018 strcat(ifa->ifa_label, dot);
1020 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1022 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1026 static inline bool inetdev_valid_mtu(unsigned mtu)
1031 /* Called only under RTNL semaphore */
1033 static int inetdev_event(struct notifier_block *this, unsigned long event,
1036 struct net_device *dev = ptr;
1037 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1042 if (event == NETDEV_REGISTER) {
1043 in_dev = inetdev_init(dev);
1045 return notifier_from_errno(-ENOMEM);
1046 if (dev->flags & IFF_LOOPBACK) {
1047 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1048 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050 } else if (event == NETDEV_CHANGEMTU) {
1051 /* Re-enabling IP */
1052 if (inetdev_valid_mtu(dev->mtu))
1053 in_dev = inetdev_init(dev);
1059 case NETDEV_REGISTER:
1060 printk(KERN_DEBUG "inetdev_event: bug\n");
1064 if (!inetdev_valid_mtu(dev->mtu))
1066 if (dev->flags & IFF_LOOPBACK) {
1067 struct in_ifaddr *ifa = inet_alloc_ifa();
1071 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1072 ifa->ifa_prefixlen = 8;
1073 ifa->ifa_mask = inet_make_mask(8);
1074 in_dev_hold(in_dev);
1075 ifa->ifa_dev = in_dev;
1076 ifa->ifa_scope = RT_SCOPE_HOST;
1077 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1078 inet_insert_ifa(ifa);
1083 case NETDEV_CHANGEADDR:
1084 /* Send gratuitous ARP to notify of link change */
1085 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1086 struct in_ifaddr *ifa = in_dev->ifa_list;
1089 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1090 ifa->ifa_address, dev,
1091 ifa->ifa_address, NULL,
1092 dev->dev_addr, NULL);
1098 case NETDEV_BONDING_OLDTYPE:
1099 ip_mc_unmap(in_dev);
1101 case NETDEV_BONDING_NEWTYPE:
1102 ip_mc_remap(in_dev);
1104 case NETDEV_CHANGEMTU:
1105 if (inetdev_valid_mtu(dev->mtu))
1107 /* disable IP when MTU is not enough */
1108 case NETDEV_UNREGISTER:
1109 inetdev_destroy(in_dev);
1111 case NETDEV_CHANGENAME:
1112 /* Do not notify about label change, this event is
1113 * not interesting to applications using netlink.
1115 inetdev_changename(dev, in_dev);
1117 devinet_sysctl_unregister(in_dev);
1118 devinet_sysctl_register(in_dev);
1125 static struct notifier_block ip_netdev_notifier = {
1126 .notifier_call = inetdev_event,
1129 static inline size_t inet_nlmsg_size(void)
1131 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1132 + nla_total_size(4) /* IFA_ADDRESS */
1133 + nla_total_size(4) /* IFA_LOCAL */
1134 + nla_total_size(4) /* IFA_BROADCAST */
1135 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1138 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139 u32 pid, u32 seq, int event, unsigned int flags)
1141 struct ifaddrmsg *ifm;
1142 struct nlmsghdr *nlh;
1144 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1148 ifm = nlmsg_data(nlh);
1149 ifm->ifa_family = AF_INET;
1150 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152 ifm->ifa_scope = ifa->ifa_scope;
1153 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1155 if (ifa->ifa_address)
1156 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1159 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1161 if (ifa->ifa_broadcast)
1162 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1164 if (ifa->ifa_label[0])
1165 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1167 return nlmsg_end(skb, nlh);
1170 nlmsg_cancel(skb, nlh);
1174 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1176 struct net *net = sock_net(skb->sk);
1179 int ip_idx, s_ip_idx;
1180 struct net_device *dev;
1181 struct in_device *in_dev;
1182 struct in_ifaddr *ifa;
1183 struct hlist_head *head;
1184 struct hlist_node *node;
1187 s_idx = idx = cb->args[1];
1188 s_ip_idx = ip_idx = cb->args[2];
1190 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1192 head = &net->dev_index_head[h];
1194 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1199 in_dev = __in_dev_get_rcu(dev);
1203 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1204 ifa = ifa->ifa_next, ip_idx++) {
1205 if (ip_idx < s_ip_idx)
1207 if (inet_fill_ifaddr(skb, ifa,
1208 NETLINK_CB(cb->skb).pid,
1210 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1224 cb->args[2] = ip_idx;
1229 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1232 struct sk_buff *skb;
1233 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1237 net = dev_net(ifa->ifa_dev->dev);
1238 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1242 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1244 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1245 WARN_ON(err == -EMSGSIZE);
1249 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1253 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1256 #ifdef CONFIG_SYSCTL
1258 static void devinet_copy_dflt_conf(struct net *net, int i)
1260 struct net_device *dev;
1263 for_each_netdev_rcu(net, dev) {
1264 struct in_device *in_dev;
1266 in_dev = __in_dev_get_rcu(dev);
1267 if (in_dev && !test_bit(i, in_dev->cnf.state))
1268 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1273 /* called with RTNL locked */
1274 static void inet_forward_change(struct net *net)
1276 struct net_device *dev;
1277 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1279 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1280 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1282 for_each_netdev(net, dev) {
1283 struct in_device *in_dev;
1285 dev_disable_lro(dev);
1287 in_dev = __in_dev_get_rcu(dev);
1289 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1294 static int devinet_conf_proc(ctl_table *ctl, int write,
1295 void __user *buffer,
1296 size_t *lenp, loff_t *ppos)
1298 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1301 struct ipv4_devconf *cnf = ctl->extra1;
1302 struct net *net = ctl->extra2;
1303 int i = (int *)ctl->data - cnf->data;
1305 set_bit(i, cnf->state);
1307 if (cnf == net->ipv4.devconf_dflt)
1308 devinet_copy_dflt_conf(net, i);
1314 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1315 void __user *buffer,
1316 size_t *lenp, loff_t *ppos)
1318 int *valp = ctl->data;
1320 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1322 if (write && *valp != val) {
1323 struct net *net = ctl->extra2;
1325 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1326 if (!rtnl_trylock())
1327 return restart_syscall();
1328 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1329 inet_forward_change(net);
1331 struct ipv4_devconf *cnf = ctl->extra1;
1332 struct in_device *idev =
1333 container_of(cnf, struct in_device, cnf);
1334 dev_disable_lro(idev->dev);
1337 rt_cache_flush(net, 0);
1344 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1345 void __user *buffer,
1346 size_t *lenp, loff_t *ppos)
1348 int *valp = ctl->data;
1350 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1351 struct net *net = ctl->extra2;
1353 if (write && *valp != val)
1354 rt_cache_flush(net, 0);
1359 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1362 .data = ipv4_devconf.data + \
1363 NET_IPV4_CONF_ ## attr - 1, \
1364 .maxlen = sizeof(int), \
1366 .proc_handler = proc, \
1367 .extra1 = &ipv4_devconf, \
1370 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1371 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1373 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1374 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1376 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1377 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1379 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1380 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1382 static struct devinet_sysctl_table {
1383 struct ctl_table_header *sysctl_header;
1384 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1386 } devinet_sysctl = {
1388 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1389 devinet_sysctl_forward),
1390 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1392 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1393 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1394 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1395 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1396 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1397 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1398 "accept_source_route"),
1399 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1400 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1401 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1402 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1403 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1404 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1405 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1406 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1407 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1408 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1409 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1410 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1412 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1413 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1414 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1415 "force_igmp_version"),
1416 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1417 "promote_secondaries"),
1421 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1422 struct ipv4_devconf *p)
1425 struct devinet_sysctl_table *t;
1427 #define DEVINET_CTL_PATH_DEV 3
1429 struct ctl_path devinet_ctl_path[] = {
1430 { .procname = "net", },
1431 { .procname = "ipv4", },
1432 { .procname = "conf", },
1433 { /* to be set */ },
1437 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1441 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1442 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1443 t->devinet_vars[i].extra1 = p;
1444 t->devinet_vars[i].extra2 = net;
1448 * Make a copy of dev_name, because '.procname' is regarded as const
1449 * by sysctl and we wouldn't want anyone to change it under our feet
1450 * (see SIOCSIFNAME).
1452 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1456 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1458 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1460 if (!t->sysctl_header)
1474 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1476 struct devinet_sysctl_table *t = cnf->sysctl;
1482 unregister_sysctl_table(t->sysctl_header);
1487 static void devinet_sysctl_register(struct in_device *idev)
1489 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1490 NET_IPV4_NEIGH, "ipv4", NULL);
1491 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1495 static void devinet_sysctl_unregister(struct in_device *idev)
1497 __devinet_sysctl_unregister(&idev->cnf);
1498 neigh_sysctl_unregister(idev->arp_parms);
1501 static struct ctl_table ctl_forward_entry[] = {
1503 .procname = "ip_forward",
1504 .data = &ipv4_devconf.data[
1505 NET_IPV4_CONF_FORWARDING - 1],
1506 .maxlen = sizeof(int),
1508 .proc_handler = devinet_sysctl_forward,
1509 .extra1 = &ipv4_devconf,
1510 .extra2 = &init_net,
1515 static __net_initdata struct ctl_path net_ipv4_path[] = {
1516 { .procname = "net", },
1517 { .procname = "ipv4", },
1522 static __net_init int devinet_init_net(struct net *net)
1525 struct ipv4_devconf *all, *dflt;
1526 #ifdef CONFIG_SYSCTL
1527 struct ctl_table *tbl = ctl_forward_entry;
1528 struct ctl_table_header *forw_hdr;
1532 all = &ipv4_devconf;
1533 dflt = &ipv4_devconf_dflt;
1535 if (!net_eq(net, &init_net)) {
1536 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1540 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1542 goto err_alloc_dflt;
1544 #ifdef CONFIG_SYSCTL
1545 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1549 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1550 tbl[0].extra1 = all;
1551 tbl[0].extra2 = net;
1555 #ifdef CONFIG_SYSCTL
1556 err = __devinet_sysctl_register(net, "all", all);
1560 err = __devinet_sysctl_register(net, "default", dflt);
1565 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1566 if (forw_hdr == NULL)
1568 net->ipv4.forw_hdr = forw_hdr;
1571 net->ipv4.devconf_all = all;
1572 net->ipv4.devconf_dflt = dflt;
1575 #ifdef CONFIG_SYSCTL
1577 __devinet_sysctl_unregister(dflt);
1579 __devinet_sysctl_unregister(all);
1581 if (tbl != ctl_forward_entry)
1585 if (dflt != &ipv4_devconf_dflt)
1588 if (all != &ipv4_devconf)
1594 static __net_exit void devinet_exit_net(struct net *net)
1596 #ifdef CONFIG_SYSCTL
1597 struct ctl_table *tbl;
1599 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1600 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1601 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1602 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1605 kfree(net->ipv4.devconf_dflt);
1606 kfree(net->ipv4.devconf_all);
1609 static __net_initdata struct pernet_operations devinet_ops = {
1610 .init = devinet_init_net,
1611 .exit = devinet_exit_net,
1614 void __init devinet_init(void)
1616 register_pernet_subsys(&devinet_ops);
1618 register_gifconf(PF_INET, inet_gifconf);
1619 register_netdevice_notifier(&ip_netdev_notifier);
1621 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1622 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1623 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);