2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
15 * This work is based on the LPC-trie which is originally descibed in:
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
28 * Code from fib_hash has been reused which includes the following header:
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
35 * IPv4 FIB: lookup engine and maintenance routines.
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
46 #define VERSION "0.324"
48 #include <linux/config.h>
49 #include <asm/uaccess.h>
50 #include <asm/system.h>
51 #include <asm/bitops.h>
52 #include <linux/types.h>
53 #include <linux/kernel.h>
54 #include <linux/sched.h>
56 #include <linux/string.h>
57 #include <linux/socket.h>
58 #include <linux/sockios.h>
59 #include <linux/errno.h>
61 #include <linux/inet.h>
62 #include <linux/netdevice.h>
63 #include <linux/if_arp.h>
64 #include <linux/proc_fs.h>
65 #include <linux/skbuff.h>
66 #include <linux/netlink.h>
67 #include <linux/init.h>
68 #include <linux/list.h>
70 #include <net/protocol.h>
71 #include <net/route.h>
74 #include <net/ip_fib.h>
75 #include "fib_lookup.h"
77 #undef CONFIG_IP_FIB_TRIE_STATS
78 #define MAX_CHILDS 16384
80 #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81 #define KEYLENGTH (8*sizeof(t_key))
82 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
85 static DEFINE_RWLOCK(fib_lock);
87 typedef unsigned int t_key;
91 #define NODE_TYPE_MASK 0x1UL
92 #define NODE_PARENT(_node) \
93 ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94 #define NODE_SET_PARENT(_node, _ptr) \
95 ((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97 #define NODE_INIT_PARENT(_node, _type) \
98 ((_node)->_parent = (_type))
99 #define NODE_TYPE(_node) \
100 ((_node)->_parent & NODE_TYPE_MASK)
102 #define IS_TNODE(n) (!(n->_parent & T_LEAF))
103 #define IS_LEAF(n) (n->_parent & T_LEAF)
107 unsigned long _parent;
112 unsigned long _parent;
113 struct hlist_head list;
117 struct hlist_node hlist;
119 struct list_head falh;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
132 #ifdef CONFIG_IP_FIB_TRIE_STATS
133 struct trie_use_stats {
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
143 unsigned int totdepth;
144 unsigned int maxdepth;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
153 #ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
157 unsigned int revision;
160 static int trie_debug = 0;
162 static int tnode_full(struct tnode *tn, struct node *n);
163 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165 static int tnode_child_length(struct tnode *tn);
166 static struct node *resize(struct trie *t, struct tnode *tn);
167 static struct tnode *inflate(struct trie *t, struct tnode *tn);
168 static struct tnode *halve(struct trie *t, struct tnode *tn);
169 static void tnode_free(struct tnode *tn);
170 static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171 extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
172 extern int fib_detect_death(struct fib_info *fi, int order,
173 struct fib_info **last_resort, int *last_idx, int *dflt);
175 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
176 struct nlmsghdr *n, struct netlink_skb_parms *req);
178 static kmem_cache_t *fn_alias_kmem;
179 static struct trie *trie_local = NULL, *trie_main = NULL;
181 static void trie_bug(char *err)
183 printk("Trie Bug: %s\n", err);
187 static inline struct node *tnode_get_child(struct tnode *tn, int i)
189 if (i >= 1<<tn->bits)
190 trie_bug("tnode_get_child");
195 static inline int tnode_child_length(struct tnode *tn)
201 _________________________________________________________________
202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
203 ----------------------------------------------------------------
204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
206 _________________________________________________________________
207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
208 -----------------------------------------------------------------
209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
218 static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
220 if (offset < KEYLENGTH)
221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
226 static inline int tkey_equals(t_key a, t_key b)
231 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
233 if (bits == 0 || offset >= KEYLENGTH)
235 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
239 static inline int tkey_mismatch(t_key a, int offset, t_key b)
246 while((diff << i) >> (KEYLENGTH-1) == 0)
251 /* Candiate for fib_semantics */
253 static void fn_free_alias(struct fib_alias *fa)
255 fib_release_info(fa->fa_info);
256 kmem_cache_free(fn_alias_kmem, fa);
260 To understand this stuff, an understanding of keys and all their bits is
261 necessary. Every node in the trie has a key associated with it, but not
262 all of the bits in that key are significant.
264 Consider a node 'n' and its parent 'tp'.
266 If n is a leaf, every bit in its key is significant. Its presence is
267 necessitaded by path compression, since during a tree traversal (when
268 searching for a leaf - unless we are doing an insertion) we will completely
269 ignore all skipped bits we encounter. Thus we need to verify, at the end of
270 a potentially successful search, that we have indeed been walking the
273 Note that we can never "miss" the correct key in the tree if present by
274 following the wrong path. Path compression ensures that segments of the key
275 that are the same for all keys with a given prefix are skipped, but the
276 skipped part *is* identical for each node in the subtrie below the skipped
277 bit! trie_insert() in this implementation takes care of that - note the
278 call to tkey_sub_equals() in trie_insert().
280 if n is an internal node - a 'tnode' here, the various parts of its key
281 have many different meanings.
284 _________________________________________________________________
285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
286 -----------------------------------------------------------------
287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
289 _________________________________________________________________
290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
291 -----------------------------------------------------------------
292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
299 First, let's just ignore the bits that come before the parent tp, that is
300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
301 not use them for anything.
303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
304 index into the parent's child array. That is, they will be used to find
305 'n' among tp's children.
307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
310 All the bits we have seen so far are significant to the node n. The rest
311 of the bits are really not needed or indeed known in n->key.
313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
314 n's child array, and will of course be different for each child.
316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
321 static void check_tnode(struct tnode *tn)
323 if(tn && tn->pos+tn->bits > 32) {
324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
328 static int halve_threshold = 25;
329 static int inflate_threshold = 50;
331 static struct leaf *leaf_new(void)
333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
335 NODE_INIT_PARENT(l, T_LEAF);
336 INIT_HLIST_HEAD(&l->list);
341 static struct leaf_info *leaf_info_new(int plen)
343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
346 INIT_LIST_HEAD(&li->falh);
351 static inline void free_leaf(struct leaf *l)
356 static inline void free_leaf_info(struct leaf_info *li)
361 static struct tnode *tnode_alloc(unsigned int size)
363 if (size <= PAGE_SIZE) {
364 return kmalloc(size, GFP_KERNEL);
366 return (struct tnode *)
367 __get_free_pages(GFP_KERNEL, get_order(size));
371 static void __tnode_free(struct tnode *tn)
373 unsigned int size = sizeof(struct tnode) +
374 (1<<tn->bits) * sizeof(struct node *);
376 if (size <= PAGE_SIZE)
379 free_pages((unsigned long)tn, get_order(size));
382 static struct tnode* tnode_new(t_key key, int pos, int bits)
384 int nchildren = 1<<bits;
385 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
386 struct tnode *tn = tnode_alloc(sz);
390 NODE_INIT_PARENT(tn, T_TNODE);
394 tn->full_children = 0;
395 tn->empty_children = 1<<bits;
398 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
399 (unsigned int) (sizeof(struct node) * 1<<bits));
403 static void tnode_free(struct tnode *tn)
406 trie_bug("tnode_free\n");
409 free_leaf((struct leaf *)tn);
411 printk("FL %p \n", tn);
413 else if(IS_TNODE(tn)) {
416 printk("FT %p \n", tn);
419 trie_bug("tnode_free\n");
424 * Check whether a tnode 'n' is "full", i.e. it is an internal node
425 * and no bits are skipped. See discussion in dyntree paper p. 6
428 static inline int tnode_full(struct tnode *tn, struct node *n)
430 if(n == NULL || IS_LEAF(n))
433 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
436 static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
438 tnode_put_child_reorg(tn, i, n, -1);
442 * Add a child at position i overwriting the old value.
443 * Update the value of full_children and empty_children.
446 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
451 if(i >= 1<<tn->bits) {
452 printk("bits=%d, i=%d\n", tn->bits, i);
453 trie_bug("tnode_put_child_reorg bits");
455 write_lock_bh(&fib_lock);
458 /* update emptyChildren */
459 if (n == NULL && chi != NULL)
460 tn->empty_children++;
461 else if (n != NULL && chi == NULL)
462 tn->empty_children--;
464 /* update fullChildren */
466 wasfull = tnode_full(tn, chi);
468 isfull = tnode_full(tn, n);
469 if (wasfull && !isfull)
472 else if (!wasfull && isfull)
475 NODE_SET_PARENT(n, tn);
478 write_unlock_bh(&fib_lock);
481 static struct node *resize(struct trie *t, struct tnode *tn)
489 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
490 tn, inflate_threshold, halve_threshold);
493 if (tn->empty_children == tnode_child_length(tn)) {
498 if (tn->empty_children == tnode_child_length(tn) - 1)
499 for (i = 0; i < tnode_child_length(tn); i++) {
501 write_lock_bh(&fib_lock);
502 if (tn->child[i] != NULL) {
504 /* compress one level */
505 struct node *n = tn->child[i];
507 NODE_INIT_PARENT(n, NODE_TYPE(n));
509 write_unlock_bh(&fib_lock);
513 write_unlock_bh(&fib_lock);
516 * Double as long as the resulting node has a number of
517 * nonempty nodes that are above the threshold.
521 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
522 * the Helsinki University of Technology and Matti Tikkanen of Nokia
523 * Telecommunications, page 6:
524 * "A node is doubled if the ratio of non-empty children to all
525 * children in the *doubled* node is at least 'high'."
527 * 'high' in this instance is the variable 'inflate_threshold'. It
528 * is expressed as a percentage, so we multiply it with
529 * tnode_child_length() and instead of multiplying by 2 (since the
530 * child array will be doubled by inflate()) and multiplying
531 * the left-hand side by 100 (to handle the percentage thing) we
532 * multiply the left-hand side by 50.
534 * The left-hand side may look a bit weird: tnode_child_length(tn)
535 * - tn->empty_children is of course the number of non-null children
536 * in the current node. tn->full_children is the number of "full"
537 * children, that is non-null tnodes with a skip value of 0.
538 * All of those will be doubled in the resulting inflated tnode, so
539 * we just count them one extra time here.
541 * A clearer way to write this would be:
543 * to_be_doubled = tn->full_children;
544 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
547 * new_child_length = tnode_child_length(tn) * 2;
549 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
551 * if (new_fill_factor >= inflate_threshold)
553 * ...and so on, tho it would mess up the while() loop.
556 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
560 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
561 * inflate_threshold * new_child_length
563 * expand not_to_be_doubled and to_be_doubled, and shorten:
564 * 100 * (tnode_child_length(tn) - tn->empty_children +
565 * tn->full_children ) >= inflate_threshold * new_child_length
567 * expand new_child_length:
568 * 100 * (tnode_child_length(tn) - tn->empty_children +
569 * tn->full_children ) >=
570 * inflate_threshold * tnode_child_length(tn) * 2
573 * 50 * (tn->full_children + tnode_child_length(tn) -
574 * tn->empty_children ) >= inflate_threshold *
575 * tnode_child_length(tn)
581 while ((tn->full_children > 0 &&
582 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
583 inflate_threshold * tnode_child_length(tn))) {
591 * Halve as long as the number of empty children in this
592 * node is above threshold.
594 while (tn->bits > 1 &&
595 100 * (tnode_child_length(tn) - tn->empty_children) <
596 halve_threshold * tnode_child_length(tn))
600 /* Only one child remains */
602 if (tn->empty_children == tnode_child_length(tn) - 1)
603 for (i = 0; i < tnode_child_length(tn); i++) {
605 write_lock_bh(&fib_lock);
606 if (tn->child[i] != NULL) {
607 /* compress one level */
608 struct node *n = tn->child[i];
611 NODE_INIT_PARENT(n, NODE_TYPE(n));
613 write_unlock_bh(&fib_lock);
617 write_unlock_bh(&fib_lock);
620 return (struct node *) tn;
623 static struct tnode *inflate(struct trie *t, struct tnode *tn)
626 struct tnode *oldtnode = tn;
627 int olen = tnode_child_length(tn);
631 printk("In inflate\n");
633 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
636 trie_bug("tnode_new failed");
638 for(i = 0; i < olen; i++) {
639 struct node *node = tnode_get_child(oldtnode, i);
645 /* A leaf or an internal node with skipped bits */
647 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
648 tn->pos + tn->bits - 1) {
649 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
651 put_child(t, tn, 2*i, node);
653 put_child(t, tn, 2*i+1, node);
657 /* An internal node with two children */
658 inode = (struct tnode *) node;
660 if (inode->bits == 1) {
661 put_child(t, tn, 2*i, inode->child[0]);
662 put_child(t, tn, 2*i+1, inode->child[1]);
667 /* An internal node with more than two children */
669 struct tnode *left, *right;
672 /* We will replace this node 'inode' with two new
673 * ones, 'left' and 'right', each with half of the
674 * original children. The two new nodes will have
675 * a position one bit further down the key and this
676 * means that the "significant" part of their keys
677 * (see the discussion near the top of this file)
678 * will differ by one bit, which will be "0" in
679 * left's key and "1" in right's key. Since we are
680 * moving the key position by one step, the bit that
681 * we are moving away from - the bit at position
682 * (inode->pos) - is the one that will differ between
683 * left and right. So... we synthesize that bit in the
685 * The mask 'm' below will be a single "one" bit at
686 * the position (inode->pos)
689 t_key m = TKEY_GET_MASK(inode->pos, 1);
691 /* Use the old key, but set the new significant
694 left = tnode_new(inode->key&(~m), inode->pos + 1,
698 trie_bug("tnode_new failed");
701 /* Use the old key, but set the new significant
704 right = tnode_new(inode->key|m, inode->pos + 1,
708 trie_bug("tnode_new failed");
710 size = tnode_child_length(left);
711 for(j = 0; j < size; j++) {
712 put_child(t, left, j, inode->child[j]);
713 put_child(t, right, j, inode->child[j + size]);
715 put_child(t, tn, 2*i, resize(t, left));
716 put_child(t, tn, 2*i+1, resize(t, right));
721 tnode_free(oldtnode);
725 static struct tnode *halve(struct trie *t, struct tnode *tn)
727 struct tnode *oldtnode = tn;
728 struct node *left, *right;
730 int olen = tnode_child_length(tn);
732 if(trie_debug) printk("In halve\n");
734 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
737 trie_bug("tnode_new failed");
739 for(i = 0; i < olen; i += 2) {
740 left = tnode_get_child(oldtnode, i);
741 right = tnode_get_child(oldtnode, i+1);
743 /* At least one of the children is empty */
745 if (right == NULL) /* Both are empty */
747 put_child(t, tn, i/2, right);
748 } else if (right == NULL)
749 put_child(t, tn, i/2, left);
751 /* Two nonempty children */
753 struct tnode *newBinNode =
754 tnode_new(left->key, tn->pos + tn->bits, 1);
757 trie_bug("tnode_new failed");
759 put_child(t, newBinNode, 0, left);
760 put_child(t, newBinNode, 1, right);
761 put_child(t, tn, i/2, resize(t, newBinNode));
764 tnode_free(oldtnode);
768 static void *trie_init(struct trie *t)
774 #ifdef CONFIG_IP_FIB_TRIE_STATS
775 memset(&t->stats, 0, sizeof(struct trie_use_stats));
781 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
783 struct hlist_node *node;
784 struct leaf_info *li;
786 hlist_for_each_entry(li, node, head, hlist) {
788 if ( li->plen == plen )
794 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
796 struct list_head *fa_head=NULL;
797 struct leaf_info *li = find_leaf_info(&l->list, plen);
805 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
807 struct leaf_info *li=NULL, *last=NULL;
808 struct hlist_node *node, *tmp;
810 write_lock_bh(&fib_lock);
812 if(hlist_empty(head))
813 hlist_add_head(&new->hlist, head);
815 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
817 if (new->plen > li->plen)
823 hlist_add_after(&last->hlist, &new->hlist);
825 hlist_add_before(&new->hlist, &li->hlist);
827 write_unlock_bh(&fib_lock);
831 fib_find_node(struct trie *t, u32 key)
840 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
841 tn = (struct tnode *) n;
845 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
846 pos=tn->pos + tn->bits;
847 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
852 /* Case we have found a leaf. Compare prefixes */
854 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
855 struct leaf *l = (struct leaf *) n;
861 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
866 struct tnode *tp = NULL;
874 while (tn != NULL && NODE_PARENT(tn) != NULL) {
877 printk("Rebalance tn=%p \n", tn);
878 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
880 printk("Rebalance tp=%p \n", tp);
881 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
887 tp = NODE_PARENT(tn);
888 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
889 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
890 tn = (struct tnode *) resize (t, (struct tnode *)tn);
891 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
896 tn = NODE_PARENT(tn);
898 /* Handle last (top) tnode */
900 tn = (struct tnode*) resize(t, (struct tnode *)tn);
902 return (struct node*) tn;
905 static struct list_head *
906 fib_insert_node(struct trie *t, int *err, u32 key, int plen)
909 struct tnode *tp = NULL, *tn = NULL;
913 struct list_head *fa_head=NULL;
914 struct leaf_info *li;
920 /* If we point to NULL, stop. Either the tree is empty and we should
921 * just put a new leaf in if, or we have reached an empty child slot,
922 * and we should just put our new leaf in that.
923 * If we point to a T_TNODE, check if it matches our key. Note that
924 * a T_TNODE might be skipping any number of bits - its 'pos' need
925 * not be the parent's 'pos'+'bits'!
927 * If it does match the current key, get pos/bits from it, extract
928 * the index from our key, push the T_TNODE and walk the tree.
930 * If it doesn't, we have to replace it with a new T_TNODE.
932 * If we point to a T_LEAF, it might or might not have the same key
933 * as we do. If it does, just change the value, update the T_LEAF's
934 * value, and return it.
935 * If it doesn't, we need to replace it with a T_TNODE.
938 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
939 tn = (struct tnode *) n;
943 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
945 pos=tn->pos + tn->bits;
946 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
948 if(n && NODE_PARENT(n) != tn) {
949 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
958 * n ----> NULL, LEAF or TNODE
960 * tp is n's (parent) ----> NULL or TNODE
963 if(tp && IS_LEAF(tp))
967 /* Case 1: n is a leaf. Compare prefixes */
969 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
970 struct leaf *l = ( struct leaf *) n;
972 li = leaf_info_new(plen);
980 insert_leaf_info(&l->list, li);
992 li = leaf_info_new(plen);
995 tnode_free((struct tnode *) l);
1000 fa_head = &li->falh;
1001 insert_leaf_info(&l->list, li);
1003 /* Case 2: n is NULL, and will just insert a new leaf */
1004 if (t->trie && n == NULL) {
1006 NODE_SET_PARENT(l, tp);
1012 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1013 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1016 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1019 * Add a new tnode here
1020 * first tnode need some special handling
1024 pos=tp->pos+tp->bits;
1028 newpos = tkey_mismatch(key, pos, n->key);
1029 tn = tnode_new(n->key, newpos, 1);
1033 tn = tnode_new(key, newpos, 1); /* First tnode */
1038 tnode_free((struct tnode *) l);
1043 NODE_SET_PARENT(tn, tp);
1045 missbit=tkey_extract_bits(key, newpos, 1);
1046 put_child(t, tn, missbit, (struct node *)l);
1047 put_child(t, tn, 1-missbit, n);
1050 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1051 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1054 t->trie = (struct node*) tn; /* First tnode */
1058 if(tp && tp->pos+tp->bits > 32) {
1059 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1060 tp, tp->pos, tp->bits, key, plen);
1062 /* Rebalance the trie */
1063 t->trie = trie_rebalance(t, tp);
1071 fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1072 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1074 struct trie *t = (struct trie *) tb->tb_data;
1075 struct fib_alias *fa, *new_fa;
1076 struct list_head *fa_head=NULL;
1077 struct fib_info *fi;
1078 int plen = r->rtm_dst_len;
1079 int type = r->rtm_type;
1080 u8 tos = r->rtm_tos;
1090 memcpy(&key, rta->rta_dst, 4);
1095 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1097 mask = ntohl( inet_make_mask(plen) );
1104 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1107 l = fib_find_node(t, key);
1111 fa_head = get_fa_head(l, plen);
1112 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1115 /* Now fa, if non-NULL, points to the first fib alias
1116 * with the same keys [prefix,tos,priority], if such key already
1117 * exists or to the node before which we will insert new one.
1119 * If fa is NULL, we will need to allocate a new one and
1120 * insert to the head of f.
1122 * If f is NULL, no fib node matched the destination key
1123 * and we need to allocate a new one of those as well.
1127 fa->fa_info->fib_priority == fi->fib_priority) {
1128 struct fib_alias *fa_orig;
1131 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1134 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1135 struct fib_info *fi_drop;
1138 write_lock_bh(&fib_lock);
1140 fi_drop = fa->fa_info;
1143 fa->fa_scope = r->rtm_scope;
1144 state = fa->fa_state;
1145 fa->fa_state &= ~FA_S_ACCESSED;
1147 write_unlock_bh(&fib_lock);
1149 fib_release_info(fi_drop);
1150 if (state & FA_S_ACCESSED)
1155 /* Error if we find a perfect match which
1156 * uses the same scope, type, and nexthop
1160 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1161 if (fa->fa_tos != tos)
1163 if (fa->fa_info->fib_priority != fi->fib_priority)
1165 if (fa->fa_type == type &&
1166 fa->fa_scope == r->rtm_scope &&
1167 fa->fa_info == fi) {
1171 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1175 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1179 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1183 new_fa->fa_info = fi;
1184 new_fa->fa_tos = tos;
1185 new_fa->fa_type = type;
1186 new_fa->fa_scope = r->rtm_scope;
1187 new_fa->fa_state = 0;
1192 * Insert new entry to the list.
1196 fa_head = fib_insert_node(t, &err, key, plen);
1199 goto out_free_new_fa;
1202 write_lock_bh(&fib_lock);
1204 list_add_tail(&new_fa->fa_list,
1205 (fa ? &fa->fa_list : fa_head));
1207 write_unlock_bh(&fib_lock);
1210 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1215 kmem_cache_free(fn_alias_kmem, new_fa);
1217 fib_release_info(fi);
1222 static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1223 struct fib_result *res, int *err)
1227 struct leaf_info *li;
1228 struct hlist_head *hhead = &l->list;
1229 struct hlist_node *node;
1231 hlist_for_each_entry(li, node, hhead, hlist) {
1234 mask = ntohl(inet_make_mask(i));
1235 if (l->key != (key & mask))
1238 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1240 #ifdef CONFIG_IP_FIB_TRIE_STATS
1241 t->stats.semantic_match_passed++;
1245 #ifdef CONFIG_IP_FIB_TRIE_STATS
1246 t->stats.semantic_match_miss++;
1253 fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1255 struct trie *t = (struct trie *) tb->tb_data;
1260 t_key key=ntohl(flp->fl4_dst);
1263 int current_prefix_length = KEYLENGTH;
1266 read_lock(&fib_lock);
1270 #ifdef CONFIG_IP_FIB_TRIE_STATS
1276 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
1280 pn = (struct tnode *) n;
1289 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1291 n = tnode_get_child(pn, cindex);
1294 #ifdef CONFIG_IP_FIB_TRIE_STATS
1295 t->stats.null_node_hit++;
1303 struct tnode *cn = (struct tnode *)n;
1304 t_key node_prefix, key_prefix, pref_mismatch;
1308 * It's a tnode, and we can do some extra checks here if we
1309 * like, to avoid descending into a dead-end branch.
1310 * This tnode is in the parent's child array at index
1311 * key[p_pos..p_pos+p_bits] but potentially with some bits
1312 * chopped off, so in reality the index may be just a
1313 * subprefix, padded with zero at the end.
1314 * We can also take a look at any skipped bits in this
1315 * tnode - everything up to p_pos is supposed to be ok,
1316 * and the non-chopped bits of the index (se previous
1317 * paragraph) are also guaranteed ok, but the rest is
1318 * considered unknown.
1320 * The skipped bits are key[pos+bits..cn->pos].
1323 /* If current_prefix_length < pos+bits, we are already doing
1324 * actual prefix matching, which means everything from
1325 * pos+(bits-chopped_off) onward must be zero along some
1326 * branch of this subtree - otherwise there is *no* valid
1327 * prefix present. Here we can only check the skipped
1328 * bits. Remember, since we have already indexed into the
1329 * parent's child array, we know that the bits we chopped of
1333 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1335 if (current_prefix_length < pos+bits) {
1336 if (tkey_extract_bits(cn->key, current_prefix_length,
1337 cn->pos - current_prefix_length) != 0 ||
1343 * If chopped_off=0, the index is fully validated and we
1344 * only need to look at the skipped bits for this, the new,
1345 * tnode. What we actually want to do is to find out if
1346 * these skipped bits match our key perfectly, or if we will
1347 * have to count on finding a matching prefix further down,
1348 * because if we do, we would like to have some way of
1349 * verifying the existence of such a prefix at this point.
1352 /* The only thing we can do at this point is to verify that
1353 * any such matching prefix can indeed be a prefix to our
1354 * key, and if the bits in the node we are inspecting that
1355 * do not match our key are not ZERO, this cannot be true.
1356 * Thus, find out where there is a mismatch (before cn->pos)
1357 * and verify that all the mismatching bits are zero in the
1361 /* Note: We aren't very concerned about the piece of the key
1362 * that precede pn->pos+pn->bits, since these have already been
1363 * checked. The bits after cn->pos aren't checked since these are
1364 * by definition "unknown" at this point. Thus, what we want to
1365 * see is if we are about to enter the "prefix matching" state,
1366 * and in that case verify that the skipped bits that will prevail
1367 * throughout this subtree are zero, as they have to be if we are
1368 * to find a matching prefix.
1371 node_prefix = MASK_PFX(cn->key, cn->pos);
1372 key_prefix = MASK_PFX(key, cn->pos);
1373 pref_mismatch = key_prefix^node_prefix;
1376 /* In short: If skipped bits in this node do not match the search
1377 * key, enter the "prefix matching" state.directly.
1379 if (pref_mismatch) {
1380 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1382 pref_mismatch = pref_mismatch <<1;
1384 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1386 if (key_prefix != 0)
1389 if (current_prefix_length >= cn->pos)
1390 current_prefix_length=mp;
1393 pn = (struct tnode *)n; /* Descend */
1398 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1404 /* As zero don't change the child key (cindex) */
1405 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1409 /* Decrease current_... with bits chopped off */
1410 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1411 current_prefix_length = pn->pos + pn->bits - chopped_off;
1414 * Either we do the actual chop off according or if we have
1415 * chopped off all bits in this tnode walk up to our parent.
1418 if(chopped_off <= pn->bits)
1419 cindex &= ~(1 << (chopped_off-1));
1421 if( NODE_PARENT(pn) == NULL)
1424 /* Get Child's index */
1425 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1426 pn = NODE_PARENT(pn);
1429 #ifdef CONFIG_IP_FIB_TRIE_STATS
1430 t->stats.backtrack++;
1438 read_unlock(&fib_lock);
1442 static int trie_leaf_remove(struct trie *t, t_key key)
1445 struct tnode *tp = NULL;
1446 struct node *n = t->trie;
1450 printk("entering trie_leaf_remove(%p)\n", n);
1452 /* Note that in the case skipped bits, those bits are *not* checked!
1453 * When we finish this, we will have NULL or a T_LEAF, and the
1454 * T_LEAF may or may not match our key.
1457 while (n != NULL && IS_TNODE(n)) {
1458 struct tnode *tn = (struct tnode *) n;
1460 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1462 if(n && NODE_PARENT(n) != tn) {
1463 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1467 l = (struct leaf *) n;
1469 if(!n || !tkey_equals(l->key, key))
1474 * Remove the leaf and rebalance the tree
1480 tp = NODE_PARENT(n);
1481 tnode_free((struct tnode *) n);
1484 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1485 put_child(t, (struct tnode *)tp, cindex, NULL);
1486 t->trie = trie_rebalance(t, tp);
1495 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1496 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1498 struct trie *t = (struct trie *) tb->tb_data;
1500 int plen = r->rtm_dst_len;
1501 u8 tos = r->rtm_tos;
1502 struct fib_alias *fa, *fa_to_delete;
1503 struct list_head *fa_head;
1511 memcpy(&key, rta->rta_dst, 4);
1514 mask = ntohl( inet_make_mask(plen) );
1520 l = fib_find_node(t, key);
1525 fa_head = get_fa_head(l, plen);
1526 fa = fib_find_alias(fa_head, tos, 0);
1532 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1534 fa_to_delete = NULL;
1535 fa_head = fa->fa_list.prev;
1536 list_for_each_entry(fa, fa_head, fa_list) {
1537 struct fib_info *fi = fa->fa_info;
1539 if (fa->fa_tos != tos)
1542 if ((!r->rtm_type ||
1543 fa->fa_type == r->rtm_type) &&
1544 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1545 fa->fa_scope == r->rtm_scope) &&
1546 (!r->rtm_protocol ||
1547 fi->fib_protocol == r->rtm_protocol) &&
1548 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1556 struct leaf_info *li;
1559 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1561 l = fib_find_node(t, key);
1562 li = find_leaf_info(&l->list, plen);
1564 write_lock_bh(&fib_lock);
1566 list_del(&fa->fa_list);
1568 if(list_empty(fa_head)) {
1569 hlist_del(&li->hlist);
1572 write_unlock_bh(&fib_lock);
1577 if(hlist_empty(&l->list))
1578 trie_leaf_remove(t, key);
1580 if (fa->fa_state & FA_S_ACCESSED)
1589 static int trie_flush_list(struct trie *t, struct list_head *head)
1591 struct fib_alias *fa, *fa_node;
1594 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1595 struct fib_info *fi = fa->fa_info;
1597 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1599 write_lock_bh(&fib_lock);
1600 list_del(&fa->fa_list);
1601 write_unlock_bh(&fib_lock);
1610 static int trie_flush_leaf(struct trie *t, struct leaf *l)
1613 struct hlist_head *lih = &l->list;
1614 struct hlist_node *node, *tmp;
1615 struct leaf_info *li = NULL;
1617 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1619 found += trie_flush_list(t, &li->falh);
1621 if (list_empty(&li->falh)) {
1623 write_lock_bh(&fib_lock);
1624 hlist_del(&li->hlist);
1625 write_unlock_bh(&fib_lock);
1633 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1635 struct node *c = (struct node *) thisleaf;
1643 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1644 return (struct leaf *) t->trie;
1646 p = (struct tnode*) t->trie; /* Start */
1649 p = (struct tnode *) NODE_PARENT(c);
1653 /* Find the next child of the parent */
1655 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1659 last = 1 << p->bits;
1660 for(idx = pos; idx < last ; idx++) {
1661 if( p->child[idx]) {
1663 /* Decend if tnode */
1665 while (IS_TNODE(p->child[idx])) {
1666 p = (struct tnode*) p->child[idx];
1669 /* Rightmost non-NULL branch */
1670 if( p && IS_TNODE(p) )
1671 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
1673 /* Done with this tnode? */
1674 if( idx >= (1 << p->bits) || p->child[idx] == NULL )
1677 return (struct leaf*) p->child[idx];
1681 /* No more children go up one step */
1682 c = (struct node*) p;
1683 p = (struct tnode *) NODE_PARENT(p);
1685 return NULL; /* Ready. Root of trie */
1688 static int fn_trie_flush(struct fib_table *tb)
1690 struct trie *t = (struct trie *) tb->tb_data;
1691 struct leaf *ll = NULL, *l = NULL;
1696 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1697 found += trie_flush_leaf(t, l);
1699 if (ll && hlist_empty(&ll->list))
1700 trie_leaf_remove(t, ll->key);
1704 if (ll && hlist_empty(&ll->list))
1705 trie_leaf_remove(t, ll->key);
1708 printk("trie_flush found=%d\n", found);
1712 static int trie_last_dflt=-1;
1715 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1717 struct trie *t = (struct trie *) tb->tb_data;
1718 int order, last_idx;
1719 struct fib_info *fi = NULL;
1720 struct fib_info *last_resort;
1721 struct fib_alias *fa = NULL;
1722 struct list_head *fa_head;
1729 read_lock(&fib_lock);
1731 l = fib_find_node(t, 0);
1735 fa_head = get_fa_head(l, 0);
1739 if (list_empty(fa_head))
1742 list_for_each_entry(fa, fa_head, fa_list) {
1743 struct fib_info *next_fi = fa->fa_info;
1745 if (fa->fa_scope != res->scope ||
1746 fa->fa_type != RTN_UNICAST)
1749 if (next_fi->fib_priority > res->fi->fib_priority)
1751 if (!next_fi->fib_nh[0].nh_gw ||
1752 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1754 fa->fa_state |= FA_S_ACCESSED;
1757 if (next_fi != res->fi)
1759 } else if (!fib_detect_death(fi, order, &last_resort,
1760 &last_idx, &trie_last_dflt)) {
1762 fib_info_put(res->fi);
1764 atomic_inc(&fi->fib_clntref);
1765 trie_last_dflt = order;
1771 if (order <= 0 || fi == NULL) {
1772 trie_last_dflt = -1;
1776 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1778 fib_info_put(res->fi);
1780 atomic_inc(&fi->fib_clntref);
1781 trie_last_dflt = order;
1784 if (last_idx >= 0) {
1786 fib_info_put(res->fi);
1787 res->fi = last_resort;
1789 atomic_inc(&last_resort->fib_clntref);
1791 trie_last_dflt = last_idx;
1793 read_unlock(&fib_lock);
1796 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1797 struct sk_buff *skb, struct netlink_callback *cb)
1800 struct fib_alias *fa;
1802 u32 xkey=htonl(key);
1807 list_for_each_entry(fa, fah, fa_list) {
1812 if (fa->fa_info->fib_nh == NULL) {
1813 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1817 if (fa->fa_info == NULL) {
1818 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1823 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1832 fa->fa_info, 0) < 0) {
1842 static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1843 struct netlink_callback *cb)
1846 struct list_head *fa_head;
1847 struct leaf *l = NULL;
1850 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1855 memset(&cb->args[3], 0,
1856 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1858 fa_head = get_fa_head(l, plen);
1863 if(list_empty(fa_head))
1866 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1875 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1878 struct trie *t = (struct trie *) tb->tb_data;
1882 read_lock(&fib_lock);
1883 for (m=0; m<=32; m++) {
1888 memset(&cb->args[2], 0,
1889 sizeof(cb->args) - 2*sizeof(cb->args[0]));
1891 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1896 read_unlock(&fib_lock);
1900 read_unlock(&fib_lock);
1904 /* Fix more generic FIB names for init later */
1906 #ifdef CONFIG_IP_MULTIPLE_TABLES
1907 struct fib_table * fib_hash_init(int id)
1909 struct fib_table * __init fib_hash_init(int id)
1912 struct fib_table *tb;
1915 if (fn_alias_kmem == NULL)
1916 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1917 sizeof(struct fib_alias),
1918 0, SLAB_HWCACHE_ALIGN,
1921 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1927 tb->tb_lookup = fn_trie_lookup;
1928 tb->tb_insert = fn_trie_insert;
1929 tb->tb_delete = fn_trie_delete;
1930 tb->tb_flush = fn_trie_flush;
1931 tb->tb_select_default = fn_trie_select_default;
1932 tb->tb_dump = fn_trie_dump;
1933 memset(tb->tb_data, 0, sizeof(struct trie));
1935 t = (struct trie *) tb->tb_data;
1939 if (id == RT_TABLE_LOCAL)
1941 else if (id == RT_TABLE_MAIN)
1944 if (id == RT_TABLE_LOCAL)
1945 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
1950 /* Trie dump functions */
1952 static void putspace_seq(struct seq_file *seq, int n)
1954 while (n--) seq_printf(seq, " ");
1957 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
1960 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
1963 static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
1964 int pend, int cindex, int bits)
1966 putspace_seq(seq, indent);
1968 seq_printf(seq, "|");
1970 seq_printf(seq, "+");
1972 seq_printf(seq, "%d/", cindex);
1973 printbin_seq(seq, cindex, bits);
1974 seq_printf(seq, ": ");
1977 seq_printf(seq, "<root>: ");
1978 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
1981 seq_printf(seq, "key=%d.%d.%d.%d\n",
1982 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
1984 int plen=((struct tnode *)n)->pos;
1985 t_key prf=MASK_PFX(n->key, plen);
1986 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
1987 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
1990 struct leaf *l=(struct leaf *)n;
1991 struct fib_alias *fa;
1993 for (i=32; i>=0; i--)
1994 if(find_leaf_info(&l->list, i)) {
1996 struct list_head *fa_head = get_fa_head(l, i);
2001 if(list_empty(fa_head))
2004 putspace_seq(seq, indent+2);
2005 seq_printf(seq, "{/%d...dumping}\n", i);
2008 list_for_each_entry(fa, fa_head, fa_list) {
2009 putspace_seq(seq, indent+2);
2010 if (fa->fa_info->fib_nh == NULL) {
2011 seq_printf(seq, "Error _fib_nh=NULL\n");
2014 if (fa->fa_info == NULL) {
2015 seq_printf(seq, "Error fa_info=NULL\n");
2019 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2026 else if (IS_TNODE(n)) {
2027 struct tnode *tn=(struct tnode *)n;
2028 putspace_seq(seq, indent); seq_printf(seq, "| ");
2029 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
2030 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2031 seq_printf(seq, "}\n");
2032 putspace_seq(seq, indent); seq_printf(seq, "| ");
2033 seq_printf(seq, "{pos=%d", tn->pos);
2034 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2035 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2036 putspace_seq(seq, indent); seq_printf(seq, "| ");
2037 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2041 static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2043 struct node *n=t->trie;
2049 read_lock(&fib_lock);
2051 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2053 printnode_seq(seq, indent, n, pend, cindex, 0);
2055 struct tnode *tn=(struct tnode *)n;
2056 pend = tn->pos+tn->bits;
2057 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2061 while (tn && cindex < (1 << tn->bits)) {
2062 if (tn->child[cindex]) {
2066 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2067 if (IS_LEAF(tn->child[cindex])) {
2073 * New tnode. Decend one level
2077 n=tn->child[cindex];
2078 tn=(struct tnode *)n;
2079 pend=tn->pos+tn->bits;
2080 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2089 * Test if we are done
2092 while (cindex >= (1 << tn->bits)) {
2095 * Move upwards and test for root
2096 * pop off all traversed nodes
2099 if (NODE_PARENT(tn) == NULL) {
2105 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2106 tn = NODE_PARENT(tn);
2108 n=(struct node *)tn;
2109 pend=tn->pos+tn->bits;
2118 else seq_printf(seq, "------ trie is empty\n");
2120 read_unlock(&fib_lock);
2123 static struct trie_stat *trie_stat_new(void)
2125 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2133 s->nullpointers = 0;
2135 for(i=0; i< MAX_CHILDS; i++)
2136 s->nodesizes[i] = 0;
2141 static struct trie_stat *trie_collect_stats(struct trie *t)
2143 struct node *n=t->trie;
2144 struct trie_stat *s = trie_stat_new();
2150 read_lock(&fib_lock);
2155 struct tnode *tn = (struct tnode *)n;
2156 pend=tn->pos+tn->bits;
2158 s->nodesizes[tn->bits]++;
2161 while (tn && cindex < (1 << tn->bits)) {
2162 if (tn->child[cindex]) {
2165 if (IS_LEAF(tn->child[cindex])) {
2169 if (depth > s->maxdepth)
2170 s->maxdepth = depth;
2171 s->totdepth += depth;
2177 * New tnode. Decend one level
2181 s->nodesizes[tn->bits]++;
2184 n = tn->child[cindex];
2185 tn = (struct tnode *)n;
2186 pend = tn->pos+tn->bits;
2198 * Test if we are done
2201 while (cindex >= (1 << tn->bits)) {
2204 * Move upwards and test for root
2205 * pop off all traversed nodes
2209 if (NODE_PARENT(tn) == NULL) {
2215 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2216 tn = NODE_PARENT(tn);
2218 n = (struct node *)tn;
2219 pend=tn->pos+tn->bits;
2230 read_unlock(&fib_lock);
2234 #ifdef CONFIG_PROC_FS
2236 static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2241 static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2246 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2250 if (ip_fib_main_table)
2251 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2255 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2258 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2261 static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2267 * This outputs /proc/net/fib_triestats
2269 * It always works in backward compatibility mode.
2270 * The format of the file is not supposed to be changed.
2273 static void collect_and_show(struct trie *t, struct seq_file *seq)
2275 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2276 int i, max, pointers;
2277 struct trie_stat *stat;
2280 stat = trie_collect_stats(t);
2283 seq_printf(seq, "trie=%p\n", t);
2287 avdepth=stat->totdepth*100 / stat->leaves;
2290 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2291 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2293 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2294 bytes += sizeof(struct leaf) * stat->leaves;
2295 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2296 bytes += sizeof(struct tnode) * stat->tnodes;
2300 while (max >= 0 && stat->nodesizes[max] == 0)
2304 for (i = 1; i <= max; i++)
2305 if (stat->nodesizes[i] != 0) {
2306 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2307 pointers += (1<<i) * stat->nodesizes[i];
2309 seq_printf(seq, "\n");
2310 seq_printf(seq, "Pointers: %d\n", pointers);
2311 bytes += sizeof(struct node *) * pointers;
2312 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2313 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2318 #ifdef CONFIG_IP_FIB_TRIE_STATS
2319 seq_printf(seq, "Counters:\n---------\n");
2320 seq_printf(seq,"gets = %d\n", t->stats.gets);
2321 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2322 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2323 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2324 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2326 memset(&(t->stats), 0, sizeof(t->stats));
2328 #endif /* CONFIG_IP_FIB_TRIE_STATS */
2331 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2335 if (v == SEQ_START_TOKEN) {
2336 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2337 sizeof(struct leaf), sizeof(struct tnode));
2339 collect_and_show(trie_local, seq);
2342 collect_and_show(trie_main, seq);
2345 snprintf(bf, sizeof(bf),
2346 "*\t%08X\t%08X", 200, 400);
2348 seq_printf(seq, "%-127s\n", bf);
2353 static struct seq_operations fib_triestat_seq_ops = {
2354 .start = fib_triestat_seq_start,
2355 .next = fib_triestat_seq_next,
2356 .stop = fib_triestat_seq_stop,
2357 .show = fib_triestat_seq_show,
2360 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2362 struct seq_file *seq;
2365 rc = seq_open(file, &fib_triestat_seq_ops);
2369 seq = file->private_data;
2376 static struct file_operations fib_triestat_seq_fops = {
2377 .owner = THIS_MODULE,
2378 .open = fib_triestat_seq_open,
2380 .llseek = seq_lseek,
2381 .release = seq_release_private,
2384 int __init fib_stat_proc_init(void)
2386 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2391 void __init fib_stat_proc_exit(void)
2393 proc_net_remove("fib_triestat");
2396 static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2401 static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2406 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2410 if (ip_fib_main_table)
2411 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2415 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2418 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2421 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2427 * This outputs /proc/net/fib_trie.
2429 * It always works in backward compatibility mode.
2430 * The format of the file is not supposed to be changed.
2433 static int fib_trie_seq_show(struct seq_file *seq, void *v)
2437 if (v == SEQ_START_TOKEN) {
2439 trie_dump_seq(seq, trie_local);
2442 trie_dump_seq(seq, trie_main);
2446 snprintf(bf, sizeof(bf),
2447 "*\t%08X\t%08X", 200, 400);
2448 seq_printf(seq, "%-127s\n", bf);
2454 static struct seq_operations fib_trie_seq_ops = {
2455 .start = fib_trie_seq_start,
2456 .next = fib_trie_seq_next,
2457 .stop = fib_trie_seq_stop,
2458 .show = fib_trie_seq_show,
2461 static int fib_trie_seq_open(struct inode *inode, struct file *file)
2463 struct seq_file *seq;
2466 rc = seq_open(file, &fib_trie_seq_ops);
2470 seq = file->private_data;
2477 static struct file_operations fib_trie_seq_fops = {
2478 .owner = THIS_MODULE,
2479 .open = fib_trie_seq_open,
2481 .llseek = seq_lseek,
2482 .release = seq_release_private,
2485 int __init fib_proc_init(void)
2487 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2492 void __init fib_proc_exit(void)
2494 proc_net_remove("fib_trie");
2497 #endif /* CONFIG_PROC_FS */