Merge ../linux-2.6
[pandora-kernel.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/arp.h>
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/ip_fib.h>
46 #include <net/ip_mp_alg.h>
47
48 #include "fib_lookup.h"
49
50 #define FSprintk(a...)
51
52 static DEFINE_RWLOCK(fib_info_lock);
53 static struct hlist_head *fib_info_hash;
54 static struct hlist_head *fib_info_laddrhash;
55 static unsigned int fib_hash_size;
56 static unsigned int fib_info_cnt;
57
58 #define DEVINDEX_HASHBITS 8
59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62 #ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64 static DEFINE_SPINLOCK(fib_multipath_lock);
65
66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72 #else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74 /* Hope, that gcc will optimize it to get rid of dummy loop */
75
76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77 for (nhsel=0; nhsel < 1; nhsel++)
78
79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80 for (nhsel=0; nhsel < 1; nhsel++)
81
82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84 #define endfor_nexthops(fi) }
85
86
87 static const struct 
88 {
89         int     error;
90         u8      scope;
91 } fib_props[RTA_MAX + 1] = {
92         {
93                 .error  = 0,
94                 .scope  = RT_SCOPE_NOWHERE,
95         },      /* RTN_UNSPEC */
96         {
97                 .error  = 0,
98                 .scope  = RT_SCOPE_UNIVERSE,
99         },      /* RTN_UNICAST */
100         {
101                 .error  = 0,
102                 .scope  = RT_SCOPE_HOST,
103         },      /* RTN_LOCAL */
104         {
105                 .error  = 0,
106                 .scope  = RT_SCOPE_LINK,
107         },      /* RTN_BROADCAST */
108         {
109                 .error  = 0,
110                 .scope  = RT_SCOPE_LINK,
111         },      /* RTN_ANYCAST */
112         {
113                 .error  = 0,
114                 .scope  = RT_SCOPE_UNIVERSE,
115         },      /* RTN_MULTICAST */
116         {
117                 .error  = -EINVAL,
118                 .scope  = RT_SCOPE_UNIVERSE,
119         },      /* RTN_BLACKHOLE */
120         {
121                 .error  = -EHOSTUNREACH,
122                 .scope  = RT_SCOPE_UNIVERSE,
123         },      /* RTN_UNREACHABLE */
124         {
125                 .error  = -EACCES,
126                 .scope  = RT_SCOPE_UNIVERSE,
127         },      /* RTN_PROHIBIT */
128         {
129                 .error  = -EAGAIN,
130                 .scope  = RT_SCOPE_UNIVERSE,
131         },      /* RTN_THROW */
132         {
133                 .error  = -EINVAL,
134                 .scope  = RT_SCOPE_NOWHERE,
135         },      /* RTN_NAT */
136         {
137                 .error  = -EINVAL,
138                 .scope  = RT_SCOPE_NOWHERE,
139         },      /* RTN_XRESOLVE */
140 };
141
142
143 /* Release a nexthop info record */
144
145 void free_fib_info(struct fib_info *fi)
146 {
147         if (fi->fib_dead == 0) {
148                 printk("Freeing alive fib_info %p\n", fi);
149                 return;
150         }
151         change_nexthops(fi) {
152                 if (nh->nh_dev)
153                         dev_put(nh->nh_dev);
154                 nh->nh_dev = NULL;
155         } endfor_nexthops(fi);
156         fib_info_cnt--;
157         kfree(fi);
158 }
159
160 void fib_release_info(struct fib_info *fi)
161 {
162         write_lock_bh(&fib_info_lock);
163         if (fi && --fi->fib_treeref == 0) {
164                 hlist_del(&fi->fib_hash);
165                 if (fi->fib_prefsrc)
166                         hlist_del(&fi->fib_lhash);
167                 change_nexthops(fi) {
168                         if (!nh->nh_dev)
169                                 continue;
170                         hlist_del(&nh->nh_hash);
171                 } endfor_nexthops(fi)
172                 fi->fib_dead = 1;
173                 fib_info_put(fi);
174         }
175         write_unlock_bh(&fib_info_lock);
176 }
177
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179 {
180         const struct fib_nh *onh = ofi->fib_nh;
181
182         for_nexthops(fi) {
183                 if (nh->nh_oif != onh->nh_oif ||
184                     nh->nh_gw  != onh->nh_gw ||
185                     nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187                     nh->nh_weight != onh->nh_weight ||
188 #endif
189 #ifdef CONFIG_NET_CLS_ROUTE
190                     nh->nh_tclassid != onh->nh_tclassid ||
191 #endif
192                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193                         return -1;
194                 onh++;
195         } endfor_nexthops(fi);
196         return 0;
197 }
198
199 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200 {
201         unsigned int mask = (fib_hash_size - 1);
202         unsigned int val = fi->fib_nhs;
203
204         val ^= fi->fib_protocol;
205         val ^= fi->fib_prefsrc;
206         val ^= fi->fib_priority;
207
208         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209 }
210
211 static struct fib_info *fib_find_info(const struct fib_info *nfi)
212 {
213         struct hlist_head *head;
214         struct hlist_node *node;
215         struct fib_info *fi;
216         unsigned int hash;
217
218         hash = fib_info_hashfn(nfi);
219         head = &fib_info_hash[hash];
220
221         hlist_for_each_entry(fi, node, head, fib_hash) {
222                 if (fi->fib_nhs != nfi->fib_nhs)
223                         continue;
224                 if (nfi->fib_protocol == fi->fib_protocol &&
225                     nfi->fib_prefsrc == fi->fib_prefsrc &&
226                     nfi->fib_priority == fi->fib_priority &&
227                     memcmp(nfi->fib_metrics, fi->fib_metrics,
228                            sizeof(fi->fib_metrics)) == 0 &&
229                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231                         return fi;
232         }
233
234         return NULL;
235 }
236
237 static inline unsigned int fib_devindex_hashfn(unsigned int val)
238 {
239         unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241         return (val ^
242                 (val >> DEVINDEX_HASHBITS) ^
243                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244 }
245
246 /* Check, that the gateway is already configured.
247    Used only by redirect accept routine.
248  */
249
250 int ip_fib_check_default(u32 gw, struct net_device *dev)
251 {
252         struct hlist_head *head;
253         struct hlist_node *node;
254         struct fib_nh *nh;
255         unsigned int hash;
256
257         read_lock(&fib_info_lock);
258
259         hash = fib_devindex_hashfn(dev->ifindex);
260         head = &fib_info_devhash[hash];
261         hlist_for_each_entry(nh, node, head, nh_hash) {
262                 if (nh->nh_dev == dev &&
263                     nh->nh_gw == gw &&
264                     !(nh->nh_flags&RTNH_F_DEAD)) {
265                         read_unlock(&fib_info_lock);
266                         return 0;
267                 }
268         }
269
270         read_unlock(&fib_info_lock);
271
272         return -1;
273 }
274
275 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276                int z, int tb_id,
277                struct nlmsghdr *n, struct netlink_skb_parms *req)
278 {
279         struct sk_buff *skb;
280         u32 pid = req ? req->pid : n->nlmsg_pid;
281         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
282
283         skb = alloc_skb(size, GFP_KERNEL);
284         if (!skb)
285                 return;
286
287         if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
288                           fa->fa_type, fa->fa_scope, &key, z,
289                           fa->fa_tos,
290                           fa->fa_info, 0) < 0) {
291                 kfree_skb(skb);
292                 return;
293         }
294         NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
295         if (n->nlmsg_flags&NLM_F_ECHO)
296                 atomic_inc(&skb->users);
297         netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
298         if (n->nlmsg_flags&NLM_F_ECHO)
299                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
300 }
301
302 /* Return the first fib alias matching TOS with
303  * priority less than or equal to PRIO.
304  */
305 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
306 {
307         if (fah) {
308                 struct fib_alias *fa;
309                 list_for_each_entry(fa, fah, fa_list) {
310                         if (fa->fa_tos > tos)
311                                 continue;
312                         if (fa->fa_info->fib_priority >= prio ||
313                             fa->fa_tos < tos)
314                                 return fa;
315                 }
316         }
317         return NULL;
318 }
319
320 int fib_detect_death(struct fib_info *fi, int order,
321                      struct fib_info **last_resort, int *last_idx, int *dflt)
322 {
323         struct neighbour *n;
324         int state = NUD_NONE;
325
326         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
327         if (n) {
328                 state = n->nud_state;
329                 neigh_release(n);
330         }
331         if (state==NUD_REACHABLE)
332                 return 0;
333         if ((state&NUD_VALID) && order != *dflt)
334                 return 0;
335         if ((state&NUD_VALID) ||
336             (*last_idx<0 && order > *dflt)) {
337                 *last_resort = fi;
338                 *last_idx = order;
339         }
340         return 1;
341 }
342
343 #ifdef CONFIG_IP_ROUTE_MULTIPATH
344
345 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
346 {
347         while (RTA_OK(attr,attrlen)) {
348                 if (attr->rta_type == type)
349                         return *(u32*)RTA_DATA(attr);
350                 attr = RTA_NEXT(attr, attrlen);
351         }
352         return 0;
353 }
354
355 static int
356 fib_count_nexthops(struct rtattr *rta)
357 {
358         int nhs = 0;
359         struct rtnexthop *nhp = RTA_DATA(rta);
360         int nhlen = RTA_PAYLOAD(rta);
361
362         while (nhlen >= (int)sizeof(struct rtnexthop)) {
363                 if ((nhlen -= nhp->rtnh_len) < 0)
364                         return 0;
365                 nhs++;
366                 nhp = RTNH_NEXT(nhp);
367         };
368         return nhs;
369 }
370
371 static int
372 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
373 {
374         struct rtnexthop *nhp = RTA_DATA(rta);
375         int nhlen = RTA_PAYLOAD(rta);
376
377         change_nexthops(fi) {
378                 int attrlen = nhlen - sizeof(struct rtnexthop);
379                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
380                         return -EINVAL;
381                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
382                 nh->nh_oif = nhp->rtnh_ifindex;
383                 nh->nh_weight = nhp->rtnh_hops + 1;
384                 if (attrlen) {
385                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
386 #ifdef CONFIG_NET_CLS_ROUTE
387                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
388 #endif
389                 }
390                 nhp = RTNH_NEXT(nhp);
391         } endfor_nexthops(fi);
392         return 0;
393 }
394
395 #endif
396
397 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
398                  struct fib_info *fi)
399 {
400 #ifdef CONFIG_IP_ROUTE_MULTIPATH
401         struct rtnexthop *nhp;
402         int nhlen;
403 #endif
404
405         if (rta->rta_priority &&
406             *rta->rta_priority != fi->fib_priority)
407                 return 1;
408
409         if (rta->rta_oif || rta->rta_gw) {
410                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
411                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
412                         return 0;
413                 return 1;
414         }
415
416 #ifdef CONFIG_IP_ROUTE_MULTIPATH
417         if (rta->rta_mp == NULL)
418                 return 0;
419         nhp = RTA_DATA(rta->rta_mp);
420         nhlen = RTA_PAYLOAD(rta->rta_mp);
421         
422         for_nexthops(fi) {
423                 int attrlen = nhlen - sizeof(struct rtnexthop);
424                 u32 gw;
425
426                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
427                         return -EINVAL;
428                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
429                         return 1;
430                 if (attrlen) {
431                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
432                         if (gw && gw != nh->nh_gw)
433                                 return 1;
434 #ifdef CONFIG_NET_CLS_ROUTE
435                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
436                         if (gw && gw != nh->nh_tclassid)
437                                 return 1;
438 #endif
439                 }
440                 nhp = RTNH_NEXT(nhp);
441         } endfor_nexthops(fi);
442 #endif
443         return 0;
444 }
445
446
447 /*
448    Picture
449    -------
450
451    Semantics of nexthop is very messy by historical reasons.
452    We have to take into account, that:
453    a) gateway can be actually local interface address,
454       so that gatewayed route is direct.
455    b) gateway must be on-link address, possibly
456       described not by an ifaddr, but also by a direct route.
457    c) If both gateway and interface are specified, they should not
458       contradict.
459    d) If we use tunnel routes, gateway could be not on-link.
460
461    Attempt to reconcile all of these (alas, self-contradictory) conditions
462    results in pretty ugly and hairy code with obscure logic.
463
464    I chose to generalized it instead, so that the size
465    of code does not increase practically, but it becomes
466    much more general.
467    Every prefix is assigned a "scope" value: "host" is local address,
468    "link" is direct route,
469    [ ... "site" ... "interior" ... ]
470    and "universe" is true gateway route with global meaning.
471
472    Every prefix refers to a set of "nexthop"s (gw, oif),
473    where gw must have narrower scope. This recursion stops
474    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
475    which means that gw is forced to be on link.
476
477    Code is still hairy, but now it is apparently logically
478    consistent and very flexible. F.e. as by-product it allows
479    to co-exists in peace independent exterior and interior
480    routing processes.
481
482    Normally it looks as following.
483
484    {universe prefix}  -> (gw, oif) [scope link]
485                           |
486                           |-> {link prefix} -> (gw, oif) [scope local]
487                                                 |
488                                                 |-> {local prefix} (terminal node)
489  */
490
491 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
492 {
493         int err;
494
495         if (nh->nh_gw) {
496                 struct fib_result res;
497
498 #ifdef CONFIG_IP_ROUTE_PERVASIVE
499                 if (nh->nh_flags&RTNH_F_PERVASIVE)
500                         return 0;
501 #endif
502                 if (nh->nh_flags&RTNH_F_ONLINK) {
503                         struct net_device *dev;
504
505                         if (r->rtm_scope >= RT_SCOPE_LINK)
506                                 return -EINVAL;
507                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508                                 return -EINVAL;
509                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
510                                 return -ENODEV;
511                         if (!(dev->flags&IFF_UP))
512                                 return -ENETDOWN;
513                         nh->nh_dev = dev;
514                         dev_hold(dev);
515                         nh->nh_scope = RT_SCOPE_LINK;
516                         return 0;
517                 }
518                 {
519                         struct flowi fl = { .nl_u = { .ip4_u =
520                                                       { .daddr = nh->nh_gw,
521                                                         .scope = r->rtm_scope + 1 } },
522                                             .oif = nh->nh_oif };
523
524                         /* It is not necessary, but requires a bit of thinking */
525                         if (fl.fl4_scope < RT_SCOPE_LINK)
526                                 fl.fl4_scope = RT_SCOPE_LINK;
527                         if ((err = fib_lookup(&fl, &res)) != 0)
528                                 return err;
529                 }
530                 err = -EINVAL;
531                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
532                         goto out;
533                 nh->nh_scope = res.scope;
534                 nh->nh_oif = FIB_RES_OIF(res);
535                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
536                         goto out;
537                 dev_hold(nh->nh_dev);
538                 err = -ENETDOWN;
539                 if (!(nh->nh_dev->flags & IFF_UP))
540                         goto out;
541                 err = 0;
542 out:
543                 fib_res_put(&res);
544                 return err;
545         } else {
546                 struct in_device *in_dev;
547
548                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
549                         return -EINVAL;
550
551                 in_dev = inetdev_by_index(nh->nh_oif);
552                 if (in_dev == NULL)
553                         return -ENODEV;
554                 if (!(in_dev->dev->flags&IFF_UP)) {
555                         in_dev_put(in_dev);
556                         return -ENETDOWN;
557                 }
558                 nh->nh_dev = in_dev->dev;
559                 dev_hold(nh->nh_dev);
560                 nh->nh_scope = RT_SCOPE_HOST;
561                 in_dev_put(in_dev);
562         }
563         return 0;
564 }
565
566 static inline unsigned int fib_laddr_hashfn(u32 val)
567 {
568         unsigned int mask = (fib_hash_size - 1);
569
570         return (val ^ (val >> 7) ^ (val >> 14)) & mask;
571 }
572
573 static struct hlist_head *fib_hash_alloc(int bytes)
574 {
575         if (bytes <= PAGE_SIZE)
576                 return kmalloc(bytes, GFP_KERNEL);
577         else
578                 return (struct hlist_head *)
579                         __get_free_pages(GFP_KERNEL, get_order(bytes));
580 }
581
582 static void fib_hash_free(struct hlist_head *hash, int bytes)
583 {
584         if (!hash)
585                 return;
586
587         if (bytes <= PAGE_SIZE)
588                 kfree(hash);
589         else
590                 free_pages((unsigned long) hash, get_order(bytes));
591 }
592
593 static void fib_hash_move(struct hlist_head *new_info_hash,
594                           struct hlist_head *new_laddrhash,
595                           unsigned int new_size)
596 {
597         struct hlist_head *old_info_hash, *old_laddrhash;
598         unsigned int old_size = fib_hash_size;
599         unsigned int i, bytes;
600
601         write_lock_bh(&fib_info_lock);
602         old_info_hash = fib_info_hash;
603         old_laddrhash = fib_info_laddrhash;
604         fib_hash_size = new_size;
605
606         for (i = 0; i < old_size; i++) {
607                 struct hlist_head *head = &fib_info_hash[i];
608                 struct hlist_node *node, *n;
609                 struct fib_info *fi;
610
611                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
612                         struct hlist_head *dest;
613                         unsigned int new_hash;
614
615                         hlist_del(&fi->fib_hash);
616
617                         new_hash = fib_info_hashfn(fi);
618                         dest = &new_info_hash[new_hash];
619                         hlist_add_head(&fi->fib_hash, dest);
620                 }
621         }
622         fib_info_hash = new_info_hash;
623
624         for (i = 0; i < old_size; i++) {
625                 struct hlist_head *lhead = &fib_info_laddrhash[i];
626                 struct hlist_node *node, *n;
627                 struct fib_info *fi;
628
629                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
630                         struct hlist_head *ldest;
631                         unsigned int new_hash;
632
633                         hlist_del(&fi->fib_lhash);
634
635                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
636                         ldest = &new_laddrhash[new_hash];
637                         hlist_add_head(&fi->fib_lhash, ldest);
638                 }
639         }
640         fib_info_laddrhash = new_laddrhash;
641
642         write_unlock_bh(&fib_info_lock);
643
644         bytes = old_size * sizeof(struct hlist_head *);
645         fib_hash_free(old_info_hash, bytes);
646         fib_hash_free(old_laddrhash, bytes);
647 }
648
649 struct fib_info *
650 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
651                 const struct nlmsghdr *nlh, int *errp)
652 {
653         int err;
654         struct fib_info *fi = NULL;
655         struct fib_info *ofi;
656 #ifdef CONFIG_IP_ROUTE_MULTIPATH
657         int nhs = 1;
658 #else
659         const int nhs = 1;
660 #endif
661 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662         u32 mp_alg = IP_MP_ALG_NONE;
663 #endif
664
665         /* Fast check to catch the most weird cases */
666         if (fib_props[r->rtm_type].scope > r->rtm_scope)
667                 goto err_inval;
668
669 #ifdef CONFIG_IP_ROUTE_MULTIPATH
670         if (rta->rta_mp) {
671                 nhs = fib_count_nexthops(rta->rta_mp);
672                 if (nhs == 0)
673                         goto err_inval;
674         }
675 #endif
676 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
677         if (rta->rta_mp_alg) {
678                 mp_alg = *rta->rta_mp_alg;
679
680                 if (mp_alg < IP_MP_ALG_NONE ||
681                     mp_alg > IP_MP_ALG_MAX)
682                         goto err_inval;
683         }
684 #endif
685
686         err = -ENOBUFS;
687         if (fib_info_cnt >= fib_hash_size) {
688                 unsigned int new_size = fib_hash_size << 1;
689                 struct hlist_head *new_info_hash;
690                 struct hlist_head *new_laddrhash;
691                 unsigned int bytes;
692
693                 if (!new_size)
694                         new_size = 1;
695                 bytes = new_size * sizeof(struct hlist_head *);
696                 new_info_hash = fib_hash_alloc(bytes);
697                 new_laddrhash = fib_hash_alloc(bytes);
698                 if (!new_info_hash || !new_laddrhash) {
699                         fib_hash_free(new_info_hash, bytes);
700                         fib_hash_free(new_laddrhash, bytes);
701                 } else {
702                         memset(new_info_hash, 0, bytes);
703                         memset(new_laddrhash, 0, bytes);
704
705                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
706                 }
707
708                 if (!fib_hash_size)
709                         goto failure;
710         }
711
712         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
713         if (fi == NULL)
714                 goto failure;
715         fib_info_cnt++;
716
717         fi->fib_protocol = r->rtm_protocol;
718
719         fi->fib_nhs = nhs;
720         change_nexthops(fi) {
721                 nh->nh_parent = fi;
722         } endfor_nexthops(fi)
723
724         fi->fib_flags = r->rtm_flags;
725         if (rta->rta_priority)
726                 fi->fib_priority = *rta->rta_priority;
727         if (rta->rta_mx) {
728                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
729                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
730
731                 while (RTA_OK(attr, attrlen)) {
732                         unsigned flavor = attr->rta_type;
733                         if (flavor) {
734                                 if (flavor > RTAX_MAX)
735                                         goto err_inval;
736                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
737                         }
738                         attr = RTA_NEXT(attr, attrlen);
739                 }
740         }
741         if (rta->rta_prefsrc)
742                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743
744         if (rta->rta_mp) {
745 #ifdef CONFIG_IP_ROUTE_MULTIPATH
746                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
747                         goto failure;
748                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
749                         goto err_inval;
750                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
751                         goto err_inval;
752 #ifdef CONFIG_NET_CLS_ROUTE
753                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
754                         goto err_inval;
755 #endif
756 #else
757                 goto err_inval;
758 #endif
759         } else {
760                 struct fib_nh *nh = fi->fib_nh;
761                 if (rta->rta_oif)
762                         nh->nh_oif = *rta->rta_oif;
763                 if (rta->rta_gw)
764                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
765 #ifdef CONFIG_NET_CLS_ROUTE
766                 if (rta->rta_flow)
767                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768 #endif
769                 nh->nh_flags = r->rtm_flags;
770 #ifdef CONFIG_IP_ROUTE_MULTIPATH
771                 nh->nh_weight = 1;
772 #endif
773         }
774
775 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776         fi->fib_mp_alg = mp_alg;
777 #endif
778
779         if (fib_props[r->rtm_type].error) {
780                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
781                         goto err_inval;
782                 goto link_it;
783         }
784
785         if (r->rtm_scope > RT_SCOPE_HOST)
786                 goto err_inval;
787
788         if (r->rtm_scope == RT_SCOPE_HOST) {
789                 struct fib_nh *nh = fi->fib_nh;
790
791                 /* Local address is added. */
792                 if (nhs != 1 || nh->nh_gw)
793                         goto err_inval;
794                 nh->nh_scope = RT_SCOPE_NOWHERE;
795                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
796                 err = -ENODEV;
797                 if (nh->nh_dev == NULL)
798                         goto failure;
799         } else {
800                 change_nexthops(fi) {
801                         if ((err = fib_check_nh(r, fi, nh)) != 0)
802                                 goto failure;
803                 } endfor_nexthops(fi)
804         }
805
806         if (fi->fib_prefsrc) {
807                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
808                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
809                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810                                 goto err_inval;
811         }
812
813 link_it:
814         if ((ofi = fib_find_info(fi)) != NULL) {
815                 fi->fib_dead = 1;
816                 free_fib_info(fi);
817                 ofi->fib_treeref++;
818                 return ofi;
819         }
820
821         fi->fib_treeref++;
822         atomic_inc(&fi->fib_clntref);
823         write_lock_bh(&fib_info_lock);
824         hlist_add_head(&fi->fib_hash,
825                        &fib_info_hash[fib_info_hashfn(fi)]);
826         if (fi->fib_prefsrc) {
827                 struct hlist_head *head;
828
829                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
830                 hlist_add_head(&fi->fib_lhash, head);
831         }
832         change_nexthops(fi) {
833                 struct hlist_head *head;
834                 unsigned int hash;
835
836                 if (!nh->nh_dev)
837                         continue;
838                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
839                 head = &fib_info_devhash[hash];
840                 hlist_add_head(&nh->nh_hash, head);
841         } endfor_nexthops(fi)
842         write_unlock_bh(&fib_info_lock);
843         return fi;
844
845 err_inval:
846         err = -EINVAL;
847
848 failure:
849         *errp = err;
850         if (fi) {
851                 fi->fib_dead = 1;
852                 free_fib_info(fi);
853         }
854         return NULL;
855 }
856
857 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
858 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
859                        struct fib_result *res, __u32 zone, __u32 mask, 
860                         int prefixlen)
861 {
862         struct fib_alias *fa;
863         int nh_sel = 0;
864
865         list_for_each_entry_rcu(fa, head, fa_list) {
866                 int err;
867
868                 if (fa->fa_tos &&
869                     fa->fa_tos != flp->fl4_tos)
870                         continue;
871
872                 if (fa->fa_scope < flp->fl4_scope)
873                         continue;
874
875                 fa->fa_state |= FA_S_ACCESSED;
876
877                 err = fib_props[fa->fa_type].error;
878                 if (err == 0) {
879                         struct fib_info *fi = fa->fa_info;
880
881                         if (fi->fib_flags & RTNH_F_DEAD)
882                                 continue;
883
884                         switch (fa->fa_type) {
885                         case RTN_UNICAST:
886                         case RTN_LOCAL:
887                         case RTN_BROADCAST:
888                         case RTN_ANYCAST:
889                         case RTN_MULTICAST:
890                                 for_nexthops(fi) {
891                                         if (nh->nh_flags&RTNH_F_DEAD)
892                                                 continue;
893                                         if (!flp->oif || flp->oif == nh->nh_oif)
894                                                 break;
895                                 }
896 #ifdef CONFIG_IP_ROUTE_MULTIPATH
897                                 if (nhsel < fi->fib_nhs) {
898                                         nh_sel = nhsel;
899                                         goto out_fill_res;
900                                 }
901 #else
902                                 if (nhsel < 1) {
903                                         goto out_fill_res;
904                                 }
905 #endif
906                                 endfor_nexthops(fi);
907                                 continue;
908
909                         default:
910                                 printk(KERN_DEBUG "impossible 102\n");
911                                 return -EINVAL;
912                         };
913                 }
914                 return err;
915         }
916         return 1;
917
918 out_fill_res:
919         res->prefixlen = prefixlen;
920         res->nh_sel = nh_sel;
921         res->type = fa->fa_type;
922         res->scope = fa->fa_scope;
923         res->fi = fa->fa_info;
924 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
925         res->netmask = mask;
926         res->network = zone &
927                 (0xFFFFFFFF >> (32 - prefixlen));
928 #endif
929         atomic_inc(&res->fi->fib_clntref);
930         return 0;
931 }
932
933 /* Find appropriate source address to this destination */
934
935 u32 __fib_res_prefsrc(struct fib_result *res)
936 {
937         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938 }
939
940 int
941 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
942               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
943               struct fib_info *fi, unsigned int flags)
944 {
945         struct rtmsg *rtm;
946         struct nlmsghdr  *nlh;
947         unsigned char    *b = skb->tail;
948
949         nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
950         rtm = NLMSG_DATA(nlh);
951         rtm->rtm_family = AF_INET;
952         rtm->rtm_dst_len = dst_len;
953         rtm->rtm_src_len = 0;
954         rtm->rtm_tos = tos;
955         rtm->rtm_table = tb_id;
956         rtm->rtm_type = type;
957         rtm->rtm_flags = fi->fib_flags;
958         rtm->rtm_scope = scope;
959         if (rtm->rtm_dst_len)
960                 RTA_PUT(skb, RTA_DST, 4, dst);
961         rtm->rtm_protocol = fi->fib_protocol;
962         if (fi->fib_priority)
963                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
964         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
965                 goto rtattr_failure;
966         if (fi->fib_prefsrc)
967                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
968         if (fi->fib_nhs == 1) {
969                 if (fi->fib_nh->nh_gw)
970                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
971                 if (fi->fib_nh->nh_oif)
972                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
973 #ifdef CONFIG_NET_CLS_ROUTE
974                 if (fi->fib_nh[0].nh_tclassid)
975                         RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
976 #endif
977         }
978 #ifdef CONFIG_IP_ROUTE_MULTIPATH
979         if (fi->fib_nhs > 1) {
980                 struct rtnexthop *nhp;
981                 struct rtattr *mp_head;
982                 if (skb_tailroom(skb) <= RTA_SPACE(0))
983                         goto rtattr_failure;
984                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
985
986                 for_nexthops(fi) {
987                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
988                                 goto rtattr_failure;
989                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
990                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
991                         nhp->rtnh_hops = nh->nh_weight-1;
992                         nhp->rtnh_ifindex = nh->nh_oif;
993                         if (nh->nh_gw)
994                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
995 #ifdef CONFIG_NET_CLS_ROUTE
996                         if (nh->nh_tclassid)
997                                 RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
998 #endif
999                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
1000                 } endfor_nexthops(fi);
1001                 mp_head->rta_type = RTA_MULTIPATH;
1002                 mp_head->rta_len = skb->tail - (u8*)mp_head;
1003         }
1004 #endif
1005         nlh->nlmsg_len = skb->tail - b;
1006         return skb->len;
1007
1008 nlmsg_failure:
1009 rtattr_failure:
1010         skb_trim(skb, b - skb->data);
1011         return -1;
1012 }
1013
1014 #ifndef CONFIG_IP_NOSIOCRT
1015
1016 int
1017 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1018                     struct kern_rta *rta, struct rtentry *r)
1019 {
1020         int    plen;
1021         u32    *ptr;
1022
1023         memset(rtm, 0, sizeof(*rtm));
1024         memset(rta, 0, sizeof(*rta));
1025
1026         if (r->rt_dst.sa_family != AF_INET)
1027                 return -EAFNOSUPPORT;
1028
1029         /* Check mask for validity:
1030            a) it must be contiguous.
1031            b) destination must have all host bits clear.
1032            c) if application forgot to set correct family (AF_INET),
1033               reject request unless it is absolutely clear i.e.
1034               both family and mask are zero.
1035          */
1036         plen = 32;
1037         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1038         if (!(r->rt_flags&RTF_HOST)) {
1039                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1040                 if (r->rt_genmask.sa_family != AF_INET) {
1041                         if (mask || r->rt_genmask.sa_family)
1042                                 return -EAFNOSUPPORT;
1043                 }
1044                 if (bad_mask(mask, *ptr))
1045                         return -EINVAL;
1046                 plen = inet_mask_len(mask);
1047         }
1048
1049         nl->nlmsg_flags = NLM_F_REQUEST;
1050         nl->nlmsg_pid = 0;
1051         nl->nlmsg_seq = 0;
1052         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1053         if (cmd == SIOCDELRT) {
1054                 nl->nlmsg_type = RTM_DELROUTE;
1055                 nl->nlmsg_flags = 0;
1056         } else {
1057                 nl->nlmsg_type = RTM_NEWROUTE;
1058                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1059                 rtm->rtm_protocol = RTPROT_BOOT;
1060         }
1061
1062         rtm->rtm_dst_len = plen;
1063         rta->rta_dst = ptr;
1064
1065         if (r->rt_metric) {
1066                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1067                 rta->rta_priority = (u32*)&r->rt_pad3;
1068         }
1069         if (r->rt_flags&RTF_REJECT) {
1070                 rtm->rtm_scope = RT_SCOPE_HOST;
1071                 rtm->rtm_type = RTN_UNREACHABLE;
1072                 return 0;
1073         }
1074         rtm->rtm_scope = RT_SCOPE_NOWHERE;
1075         rtm->rtm_type = RTN_UNICAST;
1076
1077         if (r->rt_dev) {
1078                 char *colon;
1079                 struct net_device *dev;
1080                 char   devname[IFNAMSIZ];
1081
1082                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1083                         return -EFAULT;
1084                 devname[IFNAMSIZ-1] = 0;
1085                 colon = strchr(devname, ':');
1086                 if (colon)
1087                         *colon = 0;
1088                 dev = __dev_get_by_name(devname);
1089                 if (!dev)
1090                         return -ENODEV;
1091                 rta->rta_oif = &dev->ifindex;
1092                 if (colon) {
1093                         struct in_ifaddr *ifa;
1094                         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1095                         if (!in_dev)
1096                                 return -ENODEV;
1097                         *colon = ':';
1098                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1099                                 if (strcmp(ifa->ifa_label, devname) == 0)
1100                                         break;
1101                         if (ifa == NULL)
1102                                 return -ENODEV;
1103                         rta->rta_prefsrc = &ifa->ifa_local;
1104                 }
1105         }
1106
1107         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1108         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1109                 rta->rta_gw = ptr;
1110                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1111                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1112         }
1113
1114         if (cmd == SIOCDELRT)
1115                 return 0;
1116
1117         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1118                 return -EINVAL;
1119
1120         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1121                 rtm->rtm_scope = RT_SCOPE_LINK;
1122
1123         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1124                 struct rtattr *rec;
1125                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1126                 if (mx == NULL)
1127                         return -ENOMEM;
1128                 rta->rta_mx = mx;
1129                 mx->rta_type = RTA_METRICS;
1130                 mx->rta_len  = RTA_LENGTH(0);
1131                 if (r->rt_flags&RTF_MTU) {
1132                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1133                         rec->rta_type = RTAX_ADVMSS;
1134                         rec->rta_len = RTA_LENGTH(4);
1135                         mx->rta_len += RTA_LENGTH(4);
1136                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1137                 }
1138                 if (r->rt_flags&RTF_WINDOW) {
1139                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1140                         rec->rta_type = RTAX_WINDOW;
1141                         rec->rta_len = RTA_LENGTH(4);
1142                         mx->rta_len += RTA_LENGTH(4);
1143                         *(u32*)RTA_DATA(rec) = r->rt_window;
1144                 }
1145                 if (r->rt_flags&RTF_IRTT) {
1146                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1147                         rec->rta_type = RTAX_RTT;
1148                         rec->rta_len = RTA_LENGTH(4);
1149                         mx->rta_len += RTA_LENGTH(4);
1150                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1151                 }
1152         }
1153         return 0;
1154 }
1155
1156 #endif
1157
1158 /*
1159    Update FIB if:
1160    - local address disappeared -> we must delete all the entries
1161      referring to it.
1162    - device went down -> we must shutdown all nexthops going via it.
1163  */
1164
1165 int fib_sync_down(u32 local, struct net_device *dev, int force)
1166 {
1167         int ret = 0;
1168         int scope = RT_SCOPE_NOWHERE;
1169         
1170         if (force)
1171                 scope = -1;
1172
1173         if (local && fib_info_laddrhash) {
1174                 unsigned int hash = fib_laddr_hashfn(local);
1175                 struct hlist_head *head = &fib_info_laddrhash[hash];
1176                 struct hlist_node *node;
1177                 struct fib_info *fi;
1178
1179                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1180                         if (fi->fib_prefsrc == local) {
1181                                 fi->fib_flags |= RTNH_F_DEAD;
1182                                 ret++;
1183                         }
1184                 }
1185         }
1186
1187         if (dev) {
1188                 struct fib_info *prev_fi = NULL;
1189                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1190                 struct hlist_head *head = &fib_info_devhash[hash];
1191                 struct hlist_node *node;
1192                 struct fib_nh *nh;
1193
1194                 hlist_for_each_entry(nh, node, head, nh_hash) {
1195                         struct fib_info *fi = nh->nh_parent;
1196                         int dead;
1197
1198                         BUG_ON(!fi->fib_nhs);
1199                         if (nh->nh_dev != dev || fi == prev_fi)
1200                                 continue;
1201                         prev_fi = fi;
1202                         dead = 0;
1203                         change_nexthops(fi) {
1204                                 if (nh->nh_flags&RTNH_F_DEAD)
1205                                         dead++;
1206                                 else if (nh->nh_dev == dev &&
1207                                          nh->nh_scope != scope) {
1208                                         nh->nh_flags |= RTNH_F_DEAD;
1209 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1210                                         spin_lock_bh(&fib_multipath_lock);
1211                                         fi->fib_power -= nh->nh_power;
1212                                         nh->nh_power = 0;
1213                                         spin_unlock_bh(&fib_multipath_lock);
1214 #endif
1215                                         dead++;
1216                                 }
1217 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1218                                 if (force > 1 && nh->nh_dev == dev) {
1219                                         dead = fi->fib_nhs;
1220                                         break;
1221                                 }
1222 #endif
1223                         } endfor_nexthops(fi)
1224                         if (dead == fi->fib_nhs) {
1225                                 fi->fib_flags |= RTNH_F_DEAD;
1226                                 ret++;
1227                         }
1228                 }
1229         }
1230
1231         return ret;
1232 }
1233
1234 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1235
1236 /*
1237    Dead device goes up. We wake up dead nexthops.
1238    It takes sense only on multipath routes.
1239  */
1240
1241 int fib_sync_up(struct net_device *dev)
1242 {
1243         struct fib_info *prev_fi;
1244         unsigned int hash;
1245         struct hlist_head *head;
1246         struct hlist_node *node;
1247         struct fib_nh *nh;
1248         int ret;
1249
1250         if (!(dev->flags&IFF_UP))
1251                 return 0;
1252
1253         prev_fi = NULL;
1254         hash = fib_devindex_hashfn(dev->ifindex);
1255         head = &fib_info_devhash[hash];
1256         ret = 0;
1257
1258         hlist_for_each_entry(nh, node, head, nh_hash) {
1259                 struct fib_info *fi = nh->nh_parent;
1260                 int alive;
1261
1262                 BUG_ON(!fi->fib_nhs);
1263                 if (nh->nh_dev != dev || fi == prev_fi)
1264                         continue;
1265
1266                 prev_fi = fi;
1267                 alive = 0;
1268                 change_nexthops(fi) {
1269                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1270                                 alive++;
1271                                 continue;
1272                         }
1273                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1274                                 continue;
1275                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1276                                 continue;
1277                         alive++;
1278                         spin_lock_bh(&fib_multipath_lock);
1279                         nh->nh_power = 0;
1280                         nh->nh_flags &= ~RTNH_F_DEAD;
1281                         spin_unlock_bh(&fib_multipath_lock);
1282                 } endfor_nexthops(fi)
1283
1284                 if (alive > 0) {
1285                         fi->fib_flags &= ~RTNH_F_DEAD;
1286                         ret++;
1287                 }
1288         }
1289
1290         return ret;
1291 }
1292
1293 /*
1294    The algorithm is suboptimal, but it provides really
1295    fair weighted route distribution.
1296  */
1297
1298 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1299 {
1300         struct fib_info *fi = res->fi;
1301         int w;
1302
1303         spin_lock_bh(&fib_multipath_lock);
1304         if (fi->fib_power <= 0) {
1305                 int power = 0;
1306                 change_nexthops(fi) {
1307                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1308                                 power += nh->nh_weight;
1309                                 nh->nh_power = nh->nh_weight;
1310                         }
1311                 } endfor_nexthops(fi);
1312                 fi->fib_power = power;
1313                 if (power <= 0) {
1314                         spin_unlock_bh(&fib_multipath_lock);
1315                         /* Race condition: route has just become dead. */
1316                         res->nh_sel = 0;
1317                         return;
1318                 }
1319         }
1320
1321
1322         /* w should be random number [0..fi->fib_power-1],
1323            it is pretty bad approximation.
1324          */
1325
1326         w = jiffies % fi->fib_power;
1327
1328         change_nexthops(fi) {
1329                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1330                         if ((w -= nh->nh_power) <= 0) {
1331                                 nh->nh_power--;
1332                                 fi->fib_power--;
1333                                 res->nh_sel = nhsel;
1334                                 spin_unlock_bh(&fib_multipath_lock);
1335                                 return;
1336                         }
1337                 }
1338         } endfor_nexthops(fi);
1339
1340         /* Race condition: route has just become dead. */
1341         res->nh_sel = 0;
1342         spin_unlock_bh(&fib_multipath_lock);
1343 }
1344 #endif