[IPV4] FIB: printk related cleanups
[pandora-kernel.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
37
38 #include <net/arp.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
47
48 #include "fib_lookup.h"
49
50 static DEFINE_SPINLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
55
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62 static DEFINE_SPINLOCK(fib_multipath_lock);
63
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
73
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82 #define endfor_nexthops(fi) }
83
84
85 static const struct
86 {
87         int     error;
88         u8      scope;
89 } fib_props[RTN_MAX + 1] = {
90         {
91                 .error  = 0,
92                 .scope  = RT_SCOPE_NOWHERE,
93         },      /* RTN_UNSPEC */
94         {
95                 .error  = 0,
96                 .scope  = RT_SCOPE_UNIVERSE,
97         },      /* RTN_UNICAST */
98         {
99                 .error  = 0,
100                 .scope  = RT_SCOPE_HOST,
101         },      /* RTN_LOCAL */
102         {
103                 .error  = 0,
104                 .scope  = RT_SCOPE_LINK,
105         },      /* RTN_BROADCAST */
106         {
107                 .error  = 0,
108                 .scope  = RT_SCOPE_LINK,
109         },      /* RTN_ANYCAST */
110         {
111                 .error  = 0,
112                 .scope  = RT_SCOPE_UNIVERSE,
113         },      /* RTN_MULTICAST */
114         {
115                 .error  = -EINVAL,
116                 .scope  = RT_SCOPE_UNIVERSE,
117         },      /* RTN_BLACKHOLE */
118         {
119                 .error  = -EHOSTUNREACH,
120                 .scope  = RT_SCOPE_UNIVERSE,
121         },      /* RTN_UNREACHABLE */
122         {
123                 .error  = -EACCES,
124                 .scope  = RT_SCOPE_UNIVERSE,
125         },      /* RTN_PROHIBIT */
126         {
127                 .error  = -EAGAIN,
128                 .scope  = RT_SCOPE_UNIVERSE,
129         },      /* RTN_THROW */
130         {
131                 .error  = -EINVAL,
132                 .scope  = RT_SCOPE_NOWHERE,
133         },      /* RTN_NAT */
134         {
135                 .error  = -EINVAL,
136                 .scope  = RT_SCOPE_NOWHERE,
137         },      /* RTN_XRESOLVE */
138 };
139
140
141 /* Release a nexthop info record */
142
143 void free_fib_info(struct fib_info *fi)
144 {
145         if (fi->fib_dead == 0) {
146                 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
147                 return;
148         }
149         change_nexthops(fi) {
150                 if (nh->nh_dev)
151                         dev_put(nh->nh_dev);
152                 nh->nh_dev = NULL;
153         } endfor_nexthops(fi);
154         fib_info_cnt--;
155         kfree(fi);
156 }
157
158 void fib_release_info(struct fib_info *fi)
159 {
160         spin_lock_bh(&fib_info_lock);
161         if (fi && --fi->fib_treeref == 0) {
162                 hlist_del(&fi->fib_hash);
163                 if (fi->fib_prefsrc)
164                         hlist_del(&fi->fib_lhash);
165                 change_nexthops(fi) {
166                         if (!nh->nh_dev)
167                                 continue;
168                         hlist_del(&nh->nh_hash);
169                 } endfor_nexthops(fi)
170                 fi->fib_dead = 1;
171                 fib_info_put(fi);
172         }
173         spin_unlock_bh(&fib_info_lock);
174 }
175
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177 {
178         const struct fib_nh *onh = ofi->fib_nh;
179
180         for_nexthops(fi) {
181                 if (nh->nh_oif != onh->nh_oif ||
182                     nh->nh_gw  != onh->nh_gw ||
183                     nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185                     nh->nh_weight != onh->nh_weight ||
186 #endif
187 #ifdef CONFIG_NET_CLS_ROUTE
188                     nh->nh_tclassid != onh->nh_tclassid ||
189 #endif
190                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191                         return -1;
192                 onh++;
193         } endfor_nexthops(fi);
194         return 0;
195 }
196
197 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
198 {
199         unsigned int mask = (fib_hash_size - 1);
200         unsigned int val = fi->fib_nhs;
201
202         val ^= fi->fib_protocol;
203         val ^= (__force u32)fi->fib_prefsrc;
204         val ^= fi->fib_priority;
205
206         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
207 }
208
209 static struct fib_info *fib_find_info(const struct fib_info *nfi)
210 {
211         struct hlist_head *head;
212         struct hlist_node *node;
213         struct fib_info *fi;
214         unsigned int hash;
215
216         hash = fib_info_hashfn(nfi);
217         head = &fib_info_hash[hash];
218
219         hlist_for_each_entry(fi, node, head, fib_hash) {
220                 if (fi->fib_nhs != nfi->fib_nhs)
221                         continue;
222                 if (nfi->fib_protocol == fi->fib_protocol &&
223                     nfi->fib_prefsrc == fi->fib_prefsrc &&
224                     nfi->fib_priority == fi->fib_priority &&
225                     memcmp(nfi->fib_metrics, fi->fib_metrics,
226                            sizeof(fi->fib_metrics)) == 0 &&
227                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
228                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
229                         return fi;
230         }
231
232         return NULL;
233 }
234
235 static inline unsigned int fib_devindex_hashfn(unsigned int val)
236 {
237         unsigned int mask = DEVINDEX_HASHSIZE - 1;
238
239         return (val ^
240                 (val >> DEVINDEX_HASHBITS) ^
241                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
242 }
243
244 /* Check, that the gateway is already configured.
245    Used only by redirect accept routine.
246  */
247
248 int ip_fib_check_default(__be32 gw, struct net_device *dev)
249 {
250         struct hlist_head *head;
251         struct hlist_node *node;
252         struct fib_nh *nh;
253         unsigned int hash;
254
255         spin_lock(&fib_info_lock);
256
257         hash = fib_devindex_hashfn(dev->ifindex);
258         head = &fib_info_devhash[hash];
259         hlist_for_each_entry(nh, node, head, nh_hash) {
260                 if (nh->nh_dev == dev &&
261                     nh->nh_gw == gw &&
262                     !(nh->nh_flags&RTNH_F_DEAD)) {
263                         spin_unlock(&fib_info_lock);
264                         return 0;
265                 }
266         }
267
268         spin_unlock(&fib_info_lock);
269
270         return -1;
271 }
272
273 static inline size_t fib_nlmsg_size(struct fib_info *fi)
274 {
275         size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
276                          + nla_total_size(4) /* RTA_TABLE */
277                          + nla_total_size(4) /* RTA_DST */
278                          + nla_total_size(4) /* RTA_PRIORITY */
279                          + nla_total_size(4); /* RTA_PREFSRC */
280
281         /* space for nested metrics */
282         payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
283
284         if (fi->fib_nhs) {
285                 /* Also handles the special case fib_nhs == 1 */
286
287                 /* each nexthop is packed in an attribute */
288                 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
289
290                 /* may contain flow and gateway attribute */
291                 nhsize += 2 * nla_total_size(4);
292
293                 /* all nexthops are packed in a nested attribute */
294                 payload += nla_total_size(fi->fib_nhs * nhsize);
295         }
296
297         return payload;
298 }
299
300 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
301                int dst_len, u32 tb_id, struct nl_info *info,
302                unsigned int nlm_flags)
303 {
304         struct sk_buff *skb;
305         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
306         int err = -ENOBUFS;
307
308         skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
309         if (skb == NULL)
310                 goto errout;
311
312         err = fib_dump_info(skb, info->pid, seq, event, tb_id,
313                             fa->fa_type, fa->fa_scope, key, dst_len,
314                             fa->fa_tos, fa->fa_info, nlm_flags);
315         if (err < 0) {
316                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
317                 WARN_ON(err == -EMSGSIZE);
318                 kfree_skb(skb);
319                 goto errout;
320         }
321         err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
322                           info->nlh, GFP_KERNEL);
323 errout:
324         if (err < 0)
325                 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
326 }
327
328 /* Return the first fib alias matching TOS with
329  * priority less than or equal to PRIO.
330  */
331 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
332 {
333         if (fah) {
334                 struct fib_alias *fa;
335                 list_for_each_entry(fa, fah, fa_list) {
336                         if (fa->fa_tos > tos)
337                                 continue;
338                         if (fa->fa_info->fib_priority >= prio ||
339                             fa->fa_tos < tos)
340                                 return fa;
341                 }
342         }
343         return NULL;
344 }
345
346 int fib_detect_death(struct fib_info *fi, int order,
347                      struct fib_info **last_resort, int *last_idx, int dflt)
348 {
349         struct neighbour *n;
350         int state = NUD_NONE;
351
352         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
353         if (n) {
354                 state = n->nud_state;
355                 neigh_release(n);
356         }
357         if (state==NUD_REACHABLE)
358                 return 0;
359         if ((state&NUD_VALID) && order != dflt)
360                 return 0;
361         if ((state&NUD_VALID) ||
362             (*last_idx<0 && order > dflt)) {
363                 *last_resort = fi;
364                 *last_idx = order;
365         }
366         return 1;
367 }
368
369 #ifdef CONFIG_IP_ROUTE_MULTIPATH
370
371 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
372 {
373         int nhs = 0;
374
375         while (rtnh_ok(rtnh, remaining)) {
376                 nhs++;
377                 rtnh = rtnh_next(rtnh, &remaining);
378         }
379
380         /* leftover implies invalid nexthop configuration, discard it */
381         return remaining > 0 ? 0 : nhs;
382 }
383
384 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
385                        int remaining, struct fib_config *cfg)
386 {
387         change_nexthops(fi) {
388                 int attrlen;
389
390                 if (!rtnh_ok(rtnh, remaining))
391                         return -EINVAL;
392
393                 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
394                 nh->nh_oif = rtnh->rtnh_ifindex;
395                 nh->nh_weight = rtnh->rtnh_hops + 1;
396
397                 attrlen = rtnh_attrlen(rtnh);
398                 if (attrlen > 0) {
399                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
400
401                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
402                         nh->nh_gw = nla ? nla_get_be32(nla) : 0;
403 #ifdef CONFIG_NET_CLS_ROUTE
404                         nla = nla_find(attrs, attrlen, RTA_FLOW);
405                         nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
406 #endif
407                 }
408
409                 rtnh = rtnh_next(rtnh, &remaining);
410         } endfor_nexthops(fi);
411
412         return 0;
413 }
414
415 #endif
416
417 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
418 {
419 #ifdef CONFIG_IP_ROUTE_MULTIPATH
420         struct rtnexthop *rtnh;
421         int remaining;
422 #endif
423
424         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
425                 return 1;
426
427         if (cfg->fc_oif || cfg->fc_gw) {
428                 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
429                     (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
430                         return 0;
431                 return 1;
432         }
433
434 #ifdef CONFIG_IP_ROUTE_MULTIPATH
435         if (cfg->fc_mp == NULL)
436                 return 0;
437
438         rtnh = cfg->fc_mp;
439         remaining = cfg->fc_mp_len;
440
441         for_nexthops(fi) {
442                 int attrlen;
443
444                 if (!rtnh_ok(rtnh, remaining))
445                         return -EINVAL;
446
447                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
448                         return 1;
449
450                 attrlen = rtnh_attrlen(rtnh);
451                 if (attrlen < 0) {
452                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
453
454                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
455                         if (nla && nla_get_be32(nla) != nh->nh_gw)
456                                 return 1;
457 #ifdef CONFIG_NET_CLS_ROUTE
458                         nla = nla_find(attrs, attrlen, RTA_FLOW);
459                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
460                                 return 1;
461 #endif
462                 }
463
464                 rtnh = rtnh_next(rtnh, &remaining);
465         } endfor_nexthops(fi);
466 #endif
467         return 0;
468 }
469
470
471 /*
472    Picture
473    -------
474
475    Semantics of nexthop is very messy by historical reasons.
476    We have to take into account, that:
477    a) gateway can be actually local interface address,
478       so that gatewayed route is direct.
479    b) gateway must be on-link address, possibly
480       described not by an ifaddr, but also by a direct route.
481    c) If both gateway and interface are specified, they should not
482       contradict.
483    d) If we use tunnel routes, gateway could be not on-link.
484
485    Attempt to reconcile all of these (alas, self-contradictory) conditions
486    results in pretty ugly and hairy code with obscure logic.
487
488    I chose to generalized it instead, so that the size
489    of code does not increase practically, but it becomes
490    much more general.
491    Every prefix is assigned a "scope" value: "host" is local address,
492    "link" is direct route,
493    [ ... "site" ... "interior" ... ]
494    and "universe" is true gateway route with global meaning.
495
496    Every prefix refers to a set of "nexthop"s (gw, oif),
497    where gw must have narrower scope. This recursion stops
498    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
499    which means that gw is forced to be on link.
500
501    Code is still hairy, but now it is apparently logically
502    consistent and very flexible. F.e. as by-product it allows
503    to co-exists in peace independent exterior and interior
504    routing processes.
505
506    Normally it looks as following.
507
508    {universe prefix}  -> (gw, oif) [scope link]
509                           |
510                           |-> {link prefix} -> (gw, oif) [scope local]
511                                                 |
512                                                 |-> {local prefix} (terminal node)
513  */
514
515 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
516                         struct fib_nh *nh)
517 {
518         int err;
519
520         if (nh->nh_gw) {
521                 struct fib_result res;
522
523 #ifdef CONFIG_IP_ROUTE_PERVASIVE
524                 if (nh->nh_flags&RTNH_F_PERVASIVE)
525                         return 0;
526 #endif
527                 if (nh->nh_flags&RTNH_F_ONLINK) {
528                         struct net_device *dev;
529
530                         if (cfg->fc_scope >= RT_SCOPE_LINK)
531                                 return -EINVAL;
532                         if (inet_addr_type(cfg->fc_nlinfo.nl_net,
533                                            nh->nh_gw) != RTN_UNICAST)
534                                 return -EINVAL;
535                         if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
536                                                       nh->nh_oif)) == NULL)
537                                 return -ENODEV;
538                         if (!(dev->flags&IFF_UP))
539                                 return -ENETDOWN;
540                         nh->nh_dev = dev;
541                         dev_hold(dev);
542                         nh->nh_scope = RT_SCOPE_LINK;
543                         return 0;
544                 }
545                 {
546                         struct flowi fl = {
547                                 .nl_u = {
548                                         .ip4_u = {
549                                                 .daddr = nh->nh_gw,
550                                                 .scope = cfg->fc_scope + 1,
551                                         },
552                                 },
553                                 .oif = nh->nh_oif,
554                         };
555
556                         /* It is not necessary, but requires a bit of thinking */
557                         if (fl.fl4_scope < RT_SCOPE_LINK)
558                                 fl.fl4_scope = RT_SCOPE_LINK;
559                         if ((err = fib_lookup(&fl, &res)) != 0)
560                                 return err;
561                 }
562                 err = -EINVAL;
563                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564                         goto out;
565                 nh->nh_scope = res.scope;
566                 nh->nh_oif = FIB_RES_OIF(res);
567                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568                         goto out;
569                 dev_hold(nh->nh_dev);
570                 err = -ENETDOWN;
571                 if (!(nh->nh_dev->flags & IFF_UP))
572                         goto out;
573                 err = 0;
574 out:
575                 fib_res_put(&res);
576                 return err;
577         } else {
578                 struct in_device *in_dev;
579
580                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581                         return -EINVAL;
582
583                 in_dev = inetdev_by_index(nh->nh_oif);
584                 if (in_dev == NULL)
585                         return -ENODEV;
586                 if (!(in_dev->dev->flags&IFF_UP)) {
587                         in_dev_put(in_dev);
588                         return -ENETDOWN;
589                 }
590                 nh->nh_dev = in_dev->dev;
591                 dev_hold(nh->nh_dev);
592                 nh->nh_scope = RT_SCOPE_HOST;
593                 in_dev_put(in_dev);
594         }
595         return 0;
596 }
597
598 static inline unsigned int fib_laddr_hashfn(__be32 val)
599 {
600         unsigned int mask = (fib_hash_size - 1);
601
602         return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
603 }
604
605 static struct hlist_head *fib_hash_alloc(int bytes)
606 {
607         if (bytes <= PAGE_SIZE)
608                 return kzalloc(bytes, GFP_KERNEL);
609         else
610                 return (struct hlist_head *)
611                         __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
612 }
613
614 static void fib_hash_free(struct hlist_head *hash, int bytes)
615 {
616         if (!hash)
617                 return;
618
619         if (bytes <= PAGE_SIZE)
620                 kfree(hash);
621         else
622                 free_pages((unsigned long) hash, get_order(bytes));
623 }
624
625 static void fib_hash_move(struct hlist_head *new_info_hash,
626                           struct hlist_head *new_laddrhash,
627                           unsigned int new_size)
628 {
629         struct hlist_head *old_info_hash, *old_laddrhash;
630         unsigned int old_size = fib_hash_size;
631         unsigned int i, bytes;
632
633         spin_lock_bh(&fib_info_lock);
634         old_info_hash = fib_info_hash;
635         old_laddrhash = fib_info_laddrhash;
636         fib_hash_size = new_size;
637
638         for (i = 0; i < old_size; i++) {
639                 struct hlist_head *head = &fib_info_hash[i];
640                 struct hlist_node *node, *n;
641                 struct fib_info *fi;
642
643                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644                         struct hlist_head *dest;
645                         unsigned int new_hash;
646
647                         hlist_del(&fi->fib_hash);
648
649                         new_hash = fib_info_hashfn(fi);
650                         dest = &new_info_hash[new_hash];
651                         hlist_add_head(&fi->fib_hash, dest);
652                 }
653         }
654         fib_info_hash = new_info_hash;
655
656         for (i = 0; i < old_size; i++) {
657                 struct hlist_head *lhead = &fib_info_laddrhash[i];
658                 struct hlist_node *node, *n;
659                 struct fib_info *fi;
660
661                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662                         struct hlist_head *ldest;
663                         unsigned int new_hash;
664
665                         hlist_del(&fi->fib_lhash);
666
667                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668                         ldest = &new_laddrhash[new_hash];
669                         hlist_add_head(&fi->fib_lhash, ldest);
670                 }
671         }
672         fib_info_laddrhash = new_laddrhash;
673
674         spin_unlock_bh(&fib_info_lock);
675
676         bytes = old_size * sizeof(struct hlist_head *);
677         fib_hash_free(old_info_hash, bytes);
678         fib_hash_free(old_laddrhash, bytes);
679 }
680
681 struct fib_info *fib_create_info(struct fib_config *cfg)
682 {
683         int err;
684         struct fib_info *fi = NULL;
685         struct fib_info *ofi;
686         int nhs = 1;
687
688         /* Fast check to catch the most weird cases */
689         if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
690                 goto err_inval;
691
692 #ifdef CONFIG_IP_ROUTE_MULTIPATH
693         if (cfg->fc_mp) {
694                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
695                 if (nhs == 0)
696                         goto err_inval;
697         }
698 #endif
699
700         err = -ENOBUFS;
701         if (fib_info_cnt >= fib_hash_size) {
702                 unsigned int new_size = fib_hash_size << 1;
703                 struct hlist_head *new_info_hash;
704                 struct hlist_head *new_laddrhash;
705                 unsigned int bytes;
706
707                 if (!new_size)
708                         new_size = 1;
709                 bytes = new_size * sizeof(struct hlist_head *);
710                 new_info_hash = fib_hash_alloc(bytes);
711                 new_laddrhash = fib_hash_alloc(bytes);
712                 if (!new_info_hash || !new_laddrhash) {
713                         fib_hash_free(new_info_hash, bytes);
714                         fib_hash_free(new_laddrhash, bytes);
715                 } else
716                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
717
718                 if (!fib_hash_size)
719                         goto failure;
720         }
721
722         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
723         if (fi == NULL)
724                 goto failure;
725         fib_info_cnt++;
726
727         fi->fib_protocol = cfg->fc_protocol;
728         fi->fib_flags = cfg->fc_flags;
729         fi->fib_priority = cfg->fc_priority;
730         fi->fib_prefsrc = cfg->fc_prefsrc;
731
732         fi->fib_nhs = nhs;
733         change_nexthops(fi) {
734                 nh->nh_parent = fi;
735         } endfor_nexthops(fi)
736
737         if (cfg->fc_mx) {
738                 struct nlattr *nla;
739                 int remaining;
740
741                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
742                         int type = nla_type(nla);
743
744                         if (type) {
745                                 if (type > RTAX_MAX)
746                                         goto err_inval;
747                                 fi->fib_metrics[type - 1] = nla_get_u32(nla);
748                         }
749                 }
750         }
751
752         if (cfg->fc_mp) {
753 #ifdef CONFIG_IP_ROUTE_MULTIPATH
754                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
755                 if (err != 0)
756                         goto failure;
757                 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
758                         goto err_inval;
759                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
760                         goto err_inval;
761 #ifdef CONFIG_NET_CLS_ROUTE
762                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
763                         goto err_inval;
764 #endif
765 #else
766                 goto err_inval;
767 #endif
768         } else {
769                 struct fib_nh *nh = fi->fib_nh;
770
771                 nh->nh_oif = cfg->fc_oif;
772                 nh->nh_gw = cfg->fc_gw;
773                 nh->nh_flags = cfg->fc_flags;
774 #ifdef CONFIG_NET_CLS_ROUTE
775                 nh->nh_tclassid = cfg->fc_flow;
776 #endif
777 #ifdef CONFIG_IP_ROUTE_MULTIPATH
778                 nh->nh_weight = 1;
779 #endif
780         }
781
782         if (fib_props[cfg->fc_type].error) {
783                 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
784                         goto err_inval;
785                 goto link_it;
786         }
787
788         if (cfg->fc_scope > RT_SCOPE_HOST)
789                 goto err_inval;
790
791         if (cfg->fc_scope == RT_SCOPE_HOST) {
792                 struct fib_nh *nh = fi->fib_nh;
793
794                 /* Local address is added. */
795                 if (nhs != 1 || nh->nh_gw)
796                         goto err_inval;
797                 nh->nh_scope = RT_SCOPE_NOWHERE;
798                 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
799                                               fi->fib_nh->nh_oif);
800                 err = -ENODEV;
801                 if (nh->nh_dev == NULL)
802                         goto failure;
803         } else {
804                 change_nexthops(fi) {
805                         if ((err = fib_check_nh(cfg, fi, nh)) != 0)
806                                 goto failure;
807                 } endfor_nexthops(fi)
808         }
809
810         if (fi->fib_prefsrc) {
811                 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
812                     fi->fib_prefsrc != cfg->fc_dst)
813                         if (inet_addr_type(cfg->fc_nlinfo.nl_net,
814                                            fi->fib_prefsrc) != RTN_LOCAL)
815                                 goto err_inval;
816         }
817
818 link_it:
819         if ((ofi = fib_find_info(fi)) != NULL) {
820                 fi->fib_dead = 1;
821                 free_fib_info(fi);
822                 ofi->fib_treeref++;
823                 return ofi;
824         }
825
826         fi->fib_treeref++;
827         atomic_inc(&fi->fib_clntref);
828         spin_lock_bh(&fib_info_lock);
829         hlist_add_head(&fi->fib_hash,
830                        &fib_info_hash[fib_info_hashfn(fi)]);
831         if (fi->fib_prefsrc) {
832                 struct hlist_head *head;
833
834                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
835                 hlist_add_head(&fi->fib_lhash, head);
836         }
837         change_nexthops(fi) {
838                 struct hlist_head *head;
839                 unsigned int hash;
840
841                 if (!nh->nh_dev)
842                         continue;
843                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
844                 head = &fib_info_devhash[hash];
845                 hlist_add_head(&nh->nh_hash, head);
846         } endfor_nexthops(fi)
847         spin_unlock_bh(&fib_info_lock);
848         return fi;
849
850 err_inval:
851         err = -EINVAL;
852
853 failure:
854         if (fi) {
855                 fi->fib_dead = 1;
856                 free_fib_info(fi);
857         }
858
859         return ERR_PTR(err);
860 }
861
862 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
863 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
864                        struct fib_result *res, __be32 zone, __be32 mask,
865                         int prefixlen)
866 {
867         struct fib_alias *fa;
868         int nh_sel = 0;
869
870         list_for_each_entry_rcu(fa, head, fa_list) {
871                 int err;
872
873                 if (fa->fa_tos &&
874                     fa->fa_tos != flp->fl4_tos)
875                         continue;
876
877                 if (fa->fa_scope < flp->fl4_scope)
878                         continue;
879
880                 fa->fa_state |= FA_S_ACCESSED;
881
882                 err = fib_props[fa->fa_type].error;
883                 if (err == 0) {
884                         struct fib_info *fi = fa->fa_info;
885
886                         if (fi->fib_flags & RTNH_F_DEAD)
887                                 continue;
888
889                         switch (fa->fa_type) {
890                         case RTN_UNICAST:
891                         case RTN_LOCAL:
892                         case RTN_BROADCAST:
893                         case RTN_ANYCAST:
894                         case RTN_MULTICAST:
895                                 for_nexthops(fi) {
896                                         if (nh->nh_flags&RTNH_F_DEAD)
897                                                 continue;
898                                         if (!flp->oif || flp->oif == nh->nh_oif)
899                                                 break;
900                                 }
901 #ifdef CONFIG_IP_ROUTE_MULTIPATH
902                                 if (nhsel < fi->fib_nhs) {
903                                         nh_sel = nhsel;
904                                         goto out_fill_res;
905                                 }
906 #else
907                                 if (nhsel < 1) {
908                                         goto out_fill_res;
909                                 }
910 #endif
911                                 endfor_nexthops(fi);
912                                 continue;
913
914                         default:
915                                 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
916                                         fa->fa_type);
917                                 return -EINVAL;
918                         }
919                 }
920                 return err;
921         }
922         return 1;
923
924 out_fill_res:
925         res->prefixlen = prefixlen;
926         res->nh_sel = nh_sel;
927         res->type = fa->fa_type;
928         res->scope = fa->fa_scope;
929         res->fi = fa->fa_info;
930         atomic_inc(&res->fi->fib_clntref);
931         return 0;
932 }
933
934 /* Find appropriate source address to this destination */
935
936 __be32 __fib_res_prefsrc(struct fib_result *res)
937 {
938         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
939 }
940
941 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
942                   u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
943                   struct fib_info *fi, unsigned int flags)
944 {
945         struct nlmsghdr *nlh;
946         struct rtmsg *rtm;
947
948         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
949         if (nlh == NULL)
950                 return -EMSGSIZE;
951
952         rtm = nlmsg_data(nlh);
953         rtm->rtm_family = AF_INET;
954         rtm->rtm_dst_len = dst_len;
955         rtm->rtm_src_len = 0;
956         rtm->rtm_tos = tos;
957         rtm->rtm_table = tb_id;
958         NLA_PUT_U32(skb, RTA_TABLE, tb_id);
959         rtm->rtm_type = type;
960         rtm->rtm_flags = fi->fib_flags;
961         rtm->rtm_scope = scope;
962         rtm->rtm_protocol = fi->fib_protocol;
963
964         if (rtm->rtm_dst_len)
965                 NLA_PUT_BE32(skb, RTA_DST, dst);
966
967         if (fi->fib_priority)
968                 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
969
970         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
971                 goto nla_put_failure;
972
973         if (fi->fib_prefsrc)
974                 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
975
976         if (fi->fib_nhs == 1) {
977                 if (fi->fib_nh->nh_gw)
978                         NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
979
980                 if (fi->fib_nh->nh_oif)
981                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
982 #ifdef CONFIG_NET_CLS_ROUTE
983                 if (fi->fib_nh[0].nh_tclassid)
984                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
985 #endif
986         }
987 #ifdef CONFIG_IP_ROUTE_MULTIPATH
988         if (fi->fib_nhs > 1) {
989                 struct rtnexthop *rtnh;
990                 struct nlattr *mp;
991
992                 mp = nla_nest_start(skb, RTA_MULTIPATH);
993                 if (mp == NULL)
994                         goto nla_put_failure;
995
996                 for_nexthops(fi) {
997                         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
998                         if (rtnh == NULL)
999                                 goto nla_put_failure;
1000
1001                         rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1002                         rtnh->rtnh_hops = nh->nh_weight - 1;
1003                         rtnh->rtnh_ifindex = nh->nh_oif;
1004
1005                         if (nh->nh_gw)
1006                                 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1007 #ifdef CONFIG_NET_CLS_ROUTE
1008                         if (nh->nh_tclassid)
1009                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1010 #endif
1011                         /* length of rtnetlink header + attributes */
1012                         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1013                 } endfor_nexthops(fi);
1014
1015                 nla_nest_end(skb, mp);
1016         }
1017 #endif
1018         return nlmsg_end(skb, nlh);
1019
1020 nla_put_failure:
1021         nlmsg_cancel(skb, nlh);
1022         return -EMSGSIZE;
1023 }
1024
1025 /*
1026    Update FIB if:
1027    - local address disappeared -> we must delete all the entries
1028      referring to it.
1029    - device went down -> we must shutdown all nexthops going via it.
1030  */
1031
1032 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1033 {
1034         int ret = 0;
1035         int scope = RT_SCOPE_NOWHERE;
1036
1037         if (force)
1038                 scope = -1;
1039
1040         if (local && fib_info_laddrhash) {
1041                 unsigned int hash = fib_laddr_hashfn(local);
1042                 struct hlist_head *head = &fib_info_laddrhash[hash];
1043                 struct hlist_node *node;
1044                 struct fib_info *fi;
1045
1046                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1047                         if (fi->fib_prefsrc == local) {
1048                                 fi->fib_flags |= RTNH_F_DEAD;
1049                                 ret++;
1050                         }
1051                 }
1052         }
1053
1054         if (dev) {
1055                 struct fib_info *prev_fi = NULL;
1056                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1057                 struct hlist_head *head = &fib_info_devhash[hash];
1058                 struct hlist_node *node;
1059                 struct fib_nh *nh;
1060
1061                 hlist_for_each_entry(nh, node, head, nh_hash) {
1062                         struct fib_info *fi = nh->nh_parent;
1063                         int dead;
1064
1065                         BUG_ON(!fi->fib_nhs);
1066                         if (nh->nh_dev != dev || fi == prev_fi)
1067                                 continue;
1068                         prev_fi = fi;
1069                         dead = 0;
1070                         change_nexthops(fi) {
1071                                 if (nh->nh_flags&RTNH_F_DEAD)
1072                                         dead++;
1073                                 else if (nh->nh_dev == dev &&
1074                                          nh->nh_scope != scope) {
1075                                         nh->nh_flags |= RTNH_F_DEAD;
1076 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1077                                         spin_lock_bh(&fib_multipath_lock);
1078                                         fi->fib_power -= nh->nh_power;
1079                                         nh->nh_power = 0;
1080                                         spin_unlock_bh(&fib_multipath_lock);
1081 #endif
1082                                         dead++;
1083                                 }
1084 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1085                                 if (force > 1 && nh->nh_dev == dev) {
1086                                         dead = fi->fib_nhs;
1087                                         break;
1088                                 }
1089 #endif
1090                         } endfor_nexthops(fi)
1091                         if (dead == fi->fib_nhs) {
1092                                 fi->fib_flags |= RTNH_F_DEAD;
1093                                 ret++;
1094                         }
1095                 }
1096         }
1097
1098         return ret;
1099 }
1100
1101 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1102
1103 /*
1104    Dead device goes up. We wake up dead nexthops.
1105    It takes sense only on multipath routes.
1106  */
1107
1108 int fib_sync_up(struct net_device *dev)
1109 {
1110         struct fib_info *prev_fi;
1111         unsigned int hash;
1112         struct hlist_head *head;
1113         struct hlist_node *node;
1114         struct fib_nh *nh;
1115         int ret;
1116
1117         if (!(dev->flags&IFF_UP))
1118                 return 0;
1119
1120         prev_fi = NULL;
1121         hash = fib_devindex_hashfn(dev->ifindex);
1122         head = &fib_info_devhash[hash];
1123         ret = 0;
1124
1125         hlist_for_each_entry(nh, node, head, nh_hash) {
1126                 struct fib_info *fi = nh->nh_parent;
1127                 int alive;
1128
1129                 BUG_ON(!fi->fib_nhs);
1130                 if (nh->nh_dev != dev || fi == prev_fi)
1131                         continue;
1132
1133                 prev_fi = fi;
1134                 alive = 0;
1135                 change_nexthops(fi) {
1136                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1137                                 alive++;
1138                                 continue;
1139                         }
1140                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1141                                 continue;
1142                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1143                                 continue;
1144                         alive++;
1145                         spin_lock_bh(&fib_multipath_lock);
1146                         nh->nh_power = 0;
1147                         nh->nh_flags &= ~RTNH_F_DEAD;
1148                         spin_unlock_bh(&fib_multipath_lock);
1149                 } endfor_nexthops(fi)
1150
1151                 if (alive > 0) {
1152                         fi->fib_flags &= ~RTNH_F_DEAD;
1153                         ret++;
1154                 }
1155         }
1156
1157         return ret;
1158 }
1159
1160 /*
1161    The algorithm is suboptimal, but it provides really
1162    fair weighted route distribution.
1163  */
1164
1165 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1166 {
1167         struct fib_info *fi = res->fi;
1168         int w;
1169
1170         spin_lock_bh(&fib_multipath_lock);
1171         if (fi->fib_power <= 0) {
1172                 int power = 0;
1173                 change_nexthops(fi) {
1174                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1175                                 power += nh->nh_weight;
1176                                 nh->nh_power = nh->nh_weight;
1177                         }
1178                 } endfor_nexthops(fi);
1179                 fi->fib_power = power;
1180                 if (power <= 0) {
1181                         spin_unlock_bh(&fib_multipath_lock);
1182                         /* Race condition: route has just become dead. */
1183                         res->nh_sel = 0;
1184                         return;
1185                 }
1186         }
1187
1188
1189         /* w should be random number [0..fi->fib_power-1],
1190            it is pretty bad approximation.
1191          */
1192
1193         w = jiffies % fi->fib_power;
1194
1195         change_nexthops(fi) {
1196                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1197                         if ((w -= nh->nh_power) <= 0) {
1198                                 nh->nh_power--;
1199                                 fi->fib_power--;
1200                                 res->nh_sel = nhsel;
1201                                 spin_unlock_bh(&fib_multipath_lock);
1202                                 return;
1203                         }
1204                 }
1205         } endfor_nexthops(fi);
1206
1207         /* Race condition: route has just become dead. */
1208         res->nh_sel = 0;
1209         spin_unlock_bh(&fib_multipath_lock);
1210 }
1211 #endif