Merge branch 'e1000-fixes' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik...
[pandora-kernel.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <net/ip.h>
32 #include <net/netlink.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         psched_tdiff_t          penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         psched_time_t           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         spinlock_t              *stats_lock;
152         struct tc_cbq_xstats    xstats;
153
154         struct tcf_proto        *filter_list;
155
156         int                     refcnt;
157         int                     filters;
158
159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
160 };
161
162 struct cbq_sched_data
163 {
164         struct cbq_class        *classes[16];           /* Hash table of all classes */
165         int                     nclasses[TC_CBQ_MAXPRIO+1];
166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
167
168         struct cbq_class        link;
169
170         unsigned                activemask;
171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
172                                                                    with backlog */
173
174 #ifdef CONFIG_NET_CLS_POLICE
175         struct cbq_class        *rx_class;
176 #endif
177         struct cbq_class        *tx_class;
178         struct cbq_class        *tx_borrowed;
179         int                     tx_len;
180         psched_time_t           now;            /* Cached timestamp */
181         psched_time_t           now_rt;         /* Cached real time */
182         unsigned                pmask;
183
184         struct hrtimer          delay_timer;
185         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
186                                                    started when CBQ has
187                                                    backlog, but cannot
188                                                    transmit just now */
189         psched_tdiff_t          wd_expires;
190         int                     toplevel;
191         u32                     hgenerator;
192 };
193
194
195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198 static __inline__ unsigned cbq_hash(u32 h)
199 {
200         h ^= h>>8;
201         h ^= h>>4;
202         return h&0xF;
203 }
204
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 {
208         struct cbq_class *cl;
209
210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211                 if (cl->classid == classid)
212                         return cl;
213         return NULL;
214 }
215
216 #ifdef CONFIG_NET_CLS_POLICE
217
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 {
221         struct cbq_class *cl, *new;
222
223         for (cl = this->tparent; cl; cl = cl->tparent)
224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225                         return new;
226
227         return NULL;
228 }
229
230 #endif
231
232 /* Classify packet. The procedure is pretty complicated, but
233    it allows us to combine link sharing and priority scheduling
234    transparently.
235
236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237    so that it resolves to split nodes. Then packets are classified
238    by logical priority, or a more specific classifier may be attached
239    to the split node.
240  */
241
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 {
245         struct cbq_sched_data *q = qdisc_priv(sch);
246         struct cbq_class *head = &q->link;
247         struct cbq_class **defmap;
248         struct cbq_class *cl = NULL;
249         u32 prio = skb->priority;
250         struct tcf_result res;
251
252         /*
253          *  Step 1. If skb->priority points to one of our classes, use it.
254          */
255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
256             (cl = cbq_class_lookup(q, prio)) != NULL)
257                 return cl;
258
259         *qerr = NET_XMIT_BYPASS;
260         for (;;) {
261                 int result = 0;
262                 defmap = head->defaults;
263
264                 /*
265                  * Step 2+n. Apply classifier.
266                  */
267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268                         goto fallback;
269
270                 if ((cl = (void*)res.class) == NULL) {
271                         if (TC_H_MAJ(res.classid))
272                                 cl = cbq_class_lookup(q, res.classid);
273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274                                 cl = defmap[TC_PRIO_BESTEFFORT];
275
276                         if (cl == NULL || cl->level >= head->level)
277                                 goto fallback;
278                 }
279
280 #ifdef CONFIG_NET_CLS_ACT
281                 switch (result) {
282                 case TC_ACT_QUEUED:
283                 case TC_ACT_STOLEN:
284                         *qerr = NET_XMIT_SUCCESS;
285                 case TC_ACT_SHOT:
286                         return NULL;
287                 }
288 #elif defined(CONFIG_NET_CLS_POLICE)
289                 switch (result) {
290                 case TC_POLICE_RECLASSIFY:
291                         return cbq_reclassify(skb, cl);
292                 case TC_POLICE_SHOT:
293                         return NULL;
294                 default:
295                         break;
296                 }
297 #endif
298                 if (cl->level == 0)
299                         return cl;
300
301                 /*
302                  * Step 3+n. If classifier selected a link sharing class,
303                  *         apply agency specific classifier.
304                  *         Repeat this procdure until we hit a leaf node.
305                  */
306                 head = cl;
307         }
308
309 fallback:
310         cl = head;
311
312         /*
313          * Step 4. No success...
314          */
315         if (TC_H_MAJ(prio) == 0 &&
316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318                 return head;
319
320         return cl;
321 }
322
323 /*
324    A packet has just been enqueued on the empty class.
325    cbq_activate_class adds it to the tail of active class list
326    of its priority band.
327  */
328
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 {
331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332         int prio = cl->cpriority;
333         struct cbq_class *cl_tail;
334
335         cl_tail = q->active[prio];
336         q->active[prio] = cl;
337
338         if (cl_tail != NULL) {
339                 cl->next_alive = cl_tail->next_alive;
340                 cl_tail->next_alive = cl;
341         } else {
342                 cl->next_alive = cl;
343                 q->activemask |= (1<<prio);
344         }
345 }
346
347 /*
348    Unlink class from active chain.
349    Note that this same procedure is done directly in cbq_dequeue*
350    during round-robin procedure.
351  */
352
353 static void cbq_deactivate_class(struct cbq_class *this)
354 {
355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356         int prio = this->cpriority;
357         struct cbq_class *cl;
358         struct cbq_class *cl_prev = q->active[prio];
359
360         do {
361                 cl = cl_prev->next_alive;
362                 if (cl == this) {
363                         cl_prev->next_alive = cl->next_alive;
364                         cl->next_alive = NULL;
365
366                         if (cl == q->active[prio]) {
367                                 q->active[prio] = cl_prev;
368                                 if (cl == q->active[prio]) {
369                                         q->active[prio] = NULL;
370                                         q->activemask &= ~(1<<prio);
371                                         return;
372                                 }
373                         }
374                         return;
375                 }
376         } while ((cl_prev = cl) != q->active[prio]);
377 }
378
379 static void
380 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 {
382         int toplevel = q->toplevel;
383
384         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385                 psched_time_t now;
386                 psched_tdiff_t incr;
387
388                 now = psched_get_time();
389                 incr = now - q->now_rt;
390                 now = q->now + incr;
391
392                 do {
393                         if (cl->undertime < now) {
394                                 q->toplevel = cl->level;
395                                 return;
396                         }
397                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398         }
399 }
400
401 static int
402 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 {
404         struct cbq_sched_data *q = qdisc_priv(sch);
405         int len = skb->len;
406         int ret;
407         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409 #ifdef CONFIG_NET_CLS_POLICE
410         q->rx_class = cl;
411 #endif
412         if (cl == NULL) {
413                 if (ret == NET_XMIT_BYPASS)
414                         sch->qstats.drops++;
415                 kfree_skb(skb);
416                 return ret;
417         }
418
419 #ifdef CONFIG_NET_CLS_POLICE
420         cl->q->__parent = sch;
421 #endif
422         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423                 sch->q.qlen++;
424                 sch->bstats.packets++;
425                 sch->bstats.bytes+=len;
426                 cbq_mark_toplevel(q, cl);
427                 if (!cl->next_alive)
428                         cbq_activate_class(cl);
429                 return ret;
430         }
431
432         sch->qstats.drops++;
433         cbq_mark_toplevel(q, cl);
434         cl->qstats.drops++;
435         return ret;
436 }
437
438 static int
439 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 {
441         struct cbq_sched_data *q = qdisc_priv(sch);
442         struct cbq_class *cl;
443         int ret;
444
445         if ((cl = q->tx_class) == NULL) {
446                 kfree_skb(skb);
447                 sch->qstats.drops++;
448                 return NET_XMIT_CN;
449         }
450         q->tx_class = NULL;
451
452         cbq_mark_toplevel(q, cl);
453
454 #ifdef CONFIG_NET_CLS_POLICE
455         q->rx_class = cl;
456         cl->q->__parent = sch;
457 #endif
458         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459                 sch->q.qlen++;
460                 sch->qstats.requeues++;
461                 if (!cl->next_alive)
462                         cbq_activate_class(cl);
463                 return 0;
464         }
465         sch->qstats.drops++;
466         cl->qstats.drops++;
467         return ret;
468 }
469
470 /* Overlimit actions */
471
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474 static void cbq_ovl_classic(struct cbq_class *cl)
475 {
476         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477         psched_tdiff_t delay = cl->undertime - q->now;
478
479         if (!cl->delayed) {
480                 delay += cl->offtime;
481
482                 /*
483                    Class goes to sleep, so that it will have no
484                    chance to work avgidle. Let's forgive it 8)
485
486                    BTW cbq-2.0 has a crap in this
487                    place, apparently they forgot to shift it by cl->ewma_log.
488                  */
489                 if (cl->avgidle < 0)
490                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491                 if (cl->avgidle < cl->minidle)
492                         cl->avgidle = cl->minidle;
493                 if (delay <= 0)
494                         delay = 1;
495                 cl->undertime = q->now + delay;
496
497                 cl->xstats.overactions++;
498                 cl->delayed = 1;
499         }
500         if (q->wd_expires == 0 || q->wd_expires > delay)
501                 q->wd_expires = delay;
502
503         /* Dirty work! We must schedule wakeups based on
504            real available rate, rather than leaf rate,
505            which may be tiny (even zero).
506          */
507         if (q->toplevel == TC_CBQ_MAXLEVEL) {
508                 struct cbq_class *b;
509                 psched_tdiff_t base_delay = q->wd_expires;
510
511                 for (b = cl->borrow; b; b = b->borrow) {
512                         delay = b->undertime - q->now;
513                         if (delay < base_delay) {
514                                 if (delay <= 0)
515                                         delay = 1;
516                                 base_delay = delay;
517                         }
518                 }
519
520                 q->wd_expires = base_delay;
521         }
522 }
523
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525    they go overlimit
526  */
527
528 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 {
530         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531         struct cbq_class *this = cl;
532
533         do {
534                 if (cl->level > q->toplevel) {
535                         cl = NULL;
536                         break;
537                 }
538         } while ((cl = cl->borrow) != NULL);
539
540         if (cl == NULL)
541                 cl = this;
542         cbq_ovl_classic(cl);
543 }
544
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547 static void cbq_ovl_delay(struct cbq_class *cl)
548 {
549         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550         psched_tdiff_t delay = cl->undertime - q->now;
551
552         if (!cl->delayed) {
553                 psched_time_t sched = q->now;
554                 ktime_t expires;
555
556                 delay += cl->offtime;
557                 if (cl->avgidle < 0)
558                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559                 if (cl->avgidle < cl->minidle)
560                         cl->avgidle = cl->minidle;
561                 cl->undertime = q->now + delay;
562
563                 if (delay > 0) {
564                         sched += delay + cl->penalty;
565                         cl->penalized = sched;
566                         cl->cpriority = TC_CBQ_MAXPRIO;
567                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
568
569                         expires = ktime_set(0, 0);
570                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
572                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
573                                                   expires)) > 0)
574                                 q->delay_timer.expires = expires;
575                         hrtimer_restart(&q->delay_timer);
576                         cl->delayed = 1;
577                         cl->xstats.overactions++;
578                         return;
579                 }
580                 delay = 1;
581         }
582         if (q->wd_expires == 0 || q->wd_expires > delay)
583                 q->wd_expires = delay;
584 }
585
586 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587
588 static void cbq_ovl_lowprio(struct cbq_class *cl)
589 {
590         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
591
592         cl->penalized = q->now + cl->penalty;
593
594         if (cl->cpriority != cl->priority2) {
595                 cl->cpriority = cl->priority2;
596                 q->pmask |= (1<<cl->cpriority);
597                 cl->xstats.overactions++;
598         }
599         cbq_ovl_classic(cl);
600 }
601
602 /* TC_CBQ_OVL_DROP: penalize class by dropping */
603
604 static void cbq_ovl_drop(struct cbq_class *cl)
605 {
606         if (cl->q->ops->drop)
607                 if (cl->q->ops->drop(cl->q))
608                         cl->qdisc->q.qlen--;
609         cl->xstats.overactions++;
610         cbq_ovl_classic(cl);
611 }
612
613 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
614                                        psched_time_t now)
615 {
616         struct cbq_class *cl;
617         struct cbq_class *cl_prev = q->active[prio];
618         psched_time_t sched = now;
619
620         if (cl_prev == NULL)
621                 return 0;
622
623         do {
624                 cl = cl_prev->next_alive;
625                 if (now - cl->penalized > 0) {
626                         cl_prev->next_alive = cl->next_alive;
627                         cl->next_alive = NULL;
628                         cl->cpriority = cl->priority;
629                         cl->delayed = 0;
630                         cbq_activate_class(cl);
631
632                         if (cl == q->active[prio]) {
633                                 q->active[prio] = cl_prev;
634                                 if (cl == q->active[prio]) {
635                                         q->active[prio] = NULL;
636                                         return 0;
637                                 }
638                         }
639
640                         cl = cl_prev->next_alive;
641                 } else if (sched - cl->penalized > 0)
642                         sched = cl->penalized;
643         } while ((cl_prev = cl) != q->active[prio]);
644
645         return sched - now;
646 }
647
648 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
649 {
650         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
651                                                 delay_timer);
652         struct Qdisc *sch = q->watchdog.qdisc;
653         psched_time_t now;
654         psched_tdiff_t delay = 0;
655         unsigned pmask;
656
657         now = psched_get_time();
658
659         pmask = q->pmask;
660         q->pmask = 0;
661
662         while (pmask) {
663                 int prio = ffz(~pmask);
664                 psched_tdiff_t tmp;
665
666                 pmask &= ~(1<<prio);
667
668                 tmp = cbq_undelay_prio(q, prio, now);
669                 if (tmp > 0) {
670                         q->pmask |= 1<<prio;
671                         if (tmp < delay || delay == 0)
672                                 delay = tmp;
673                 }
674         }
675
676         if (delay) {
677                 ktime_t time;
678
679                 time = ktime_set(0, 0);
680                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
682         }
683
684         sch->flags &= ~TCQ_F_THROTTLED;
685         netif_schedule(sch->dev);
686         return HRTIMER_NORESTART;
687 }
688
689
690 #ifdef CONFIG_NET_CLS_POLICE
691
692 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
693 {
694         int len = skb->len;
695         struct Qdisc *sch = child->__parent;
696         struct cbq_sched_data *q = qdisc_priv(sch);
697         struct cbq_class *cl = q->rx_class;
698
699         q->rx_class = NULL;
700
701         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
702
703                 cbq_mark_toplevel(q, cl);
704
705                 q->rx_class = cl;
706                 cl->q->__parent = sch;
707
708                 if (cl->q->enqueue(skb, cl->q) == 0) {
709                         sch->q.qlen++;
710                         sch->bstats.packets++;
711                         sch->bstats.bytes+=len;
712                         if (!cl->next_alive)
713                                 cbq_activate_class(cl);
714                         return 0;
715                 }
716                 sch->qstats.drops++;
717                 return 0;
718         }
719
720         sch->qstats.drops++;
721         return -1;
722 }
723 #endif
724
725 /*
726    It is mission critical procedure.
727
728    We "regenerate" toplevel cutoff, if transmitting class
729    has backlog and it is not regulated. It is not part of
730    original CBQ description, but looks more reasonable.
731    Probably, it is wrong. This question needs further investigation.
732 */
733
734 static __inline__ void
735 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736                     struct cbq_class *borrowed)
737 {
738         if (cl && q->toplevel >= borrowed->level) {
739                 if (cl->q->q.qlen > 1) {
740                         do {
741                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
742                                         q->toplevel = borrowed->level;
743                                         return;
744                                 }
745                         } while ((borrowed=borrowed->borrow) != NULL);
746                 }
747 #if 0
748         /* It is not necessary now. Uncommenting it
749            will save CPU cycles, but decrease fairness.
750          */
751                 q->toplevel = TC_CBQ_MAXLEVEL;
752 #endif
753         }
754 }
755
756 static void
757 cbq_update(struct cbq_sched_data *q)
758 {
759         struct cbq_class *this = q->tx_class;
760         struct cbq_class *cl = this;
761         int len = q->tx_len;
762
763         q->tx_class = NULL;
764
765         for ( ; cl; cl = cl->share) {
766                 long avgidle = cl->avgidle;
767                 long idle;
768
769                 cl->bstats.packets++;
770                 cl->bstats.bytes += len;
771
772                 /*
773                    (now - last) is total time between packet right edges.
774                    (last_pktlen/rate) is "virtual" busy time, so that
775
776                          idle = (now - last) - last_pktlen/rate
777                  */
778
779                 idle = q->now - cl->last;
780                 if ((unsigned long)idle > 128*1024*1024) {
781                         avgidle = cl->maxidle;
782                 } else {
783                         idle -= L2T(cl, len);
784
785                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
787                    cl->avgidle == true_avgidle/W,
788                    hence:
789                  */
790                         avgidle += idle - (avgidle>>cl->ewma_log);
791                 }
792
793                 if (avgidle <= 0) {
794                         /* Overlimit or at-limit */
795
796                         if (avgidle < cl->minidle)
797                                 avgidle = cl->minidle;
798
799                         cl->avgidle = avgidle;
800
801                         /* Calculate expected time, when this class
802                            will be allowed to send.
803                            It will occur, when:
804                            (1-W)*true_avgidle + W*delay = 0, i.e.
805                            idle = (1/W - 1)*(-true_avgidle)
806                            or
807                            idle = (1 - W)*(-cl->avgidle);
808                          */
809                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
810
811                         /*
812                            That is not all.
813                            To maintain the rate allocated to the class,
814                            we add to undertime virtual clock,
815                            necessary to complete transmitted packet.
816                            (len/phys_bandwidth has been already passed
817                            to the moment of cbq_update)
818                          */
819
820                         idle -= L2T(&q->link, len);
821                         idle += L2T(cl, len);
822
823                         cl->undertime = q->now + idle;
824                 } else {
825                         /* Underlimit */
826
827                         cl->undertime = PSCHED_PASTPERFECT;
828                         if (avgidle > cl->maxidle)
829                                 cl->avgidle = cl->maxidle;
830                         else
831                                 cl->avgidle = avgidle;
832                 }
833                 cl->last = q->now;
834         }
835
836         cbq_update_toplevel(q, this, q->tx_borrowed);
837 }
838
839 static __inline__ struct cbq_class *
840 cbq_under_limit(struct cbq_class *cl)
841 {
842         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843         struct cbq_class *this_cl = cl;
844
845         if (cl->tparent == NULL)
846                 return cl;
847
848         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
849                 cl->delayed = 0;
850                 return cl;
851         }
852
853         do {
854                 /* It is very suspicious place. Now overlimit
855                    action is generated for not bounded classes
856                    only if link is completely congested.
857                    Though it is in agree with ancestor-only paradigm,
858                    it looks very stupid. Particularly,
859                    it means that this chunk of code will either
860                    never be called or result in strong amplification
861                    of burstiness. Dangerous, silly, and, however,
862                    no another solution exists.
863                  */
864                 if ((cl = cl->borrow) == NULL) {
865                         this_cl->qstats.overlimits++;
866                         this_cl->overlimit(this_cl);
867                         return NULL;
868                 }
869                 if (cl->level > q->toplevel)
870                         return NULL;
871         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
872
873         cl->delayed = 0;
874         return cl;
875 }
876
877 static __inline__ struct sk_buff *
878 cbq_dequeue_prio(struct Qdisc *sch, int prio)
879 {
880         struct cbq_sched_data *q = qdisc_priv(sch);
881         struct cbq_class *cl_tail, *cl_prev, *cl;
882         struct sk_buff *skb;
883         int deficit;
884
885         cl_tail = cl_prev = q->active[prio];
886         cl = cl_prev->next_alive;
887
888         do {
889                 deficit = 0;
890
891                 /* Start round */
892                 do {
893                         struct cbq_class *borrow = cl;
894
895                         if (cl->q->q.qlen &&
896                             (borrow = cbq_under_limit(cl)) == NULL)
897                                 goto skip_class;
898
899                         if (cl->deficit <= 0) {
900                                 /* Class exhausted its allotment per
901                                    this round. Switch to the next one.
902                                  */
903                                 deficit = 1;
904                                 cl->deficit += cl->quantum;
905                                 goto next_class;
906                         }
907
908                         skb = cl->q->dequeue(cl->q);
909
910                         /* Class did not give us any skb :-(
911                            It could occur even if cl->q->q.qlen != 0
912                            f.e. if cl->q == "tbf"
913                          */
914                         if (skb == NULL)
915                                 goto skip_class;
916
917                         cl->deficit -= skb->len;
918                         q->tx_class = cl;
919                         q->tx_borrowed = borrow;
920                         if (borrow != cl) {
921 #ifndef CBQ_XSTATS_BORROWS_BYTES
922                                 borrow->xstats.borrows++;
923                                 cl->xstats.borrows++;
924 #else
925                                 borrow->xstats.borrows += skb->len;
926                                 cl->xstats.borrows += skb->len;
927 #endif
928                         }
929                         q->tx_len = skb->len;
930
931                         if (cl->deficit <= 0) {
932                                 q->active[prio] = cl;
933                                 cl = cl->next_alive;
934                                 cl->deficit += cl->quantum;
935                         }
936                         return skb;
937
938 skip_class:
939                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
940                                 /* Class is empty or penalized.
941                                    Unlink it from active chain.
942                                  */
943                                 cl_prev->next_alive = cl->next_alive;
944                                 cl->next_alive = NULL;
945
946                                 /* Did cl_tail point to it? */
947                                 if (cl == cl_tail) {
948                                         /* Repair it! */
949                                         cl_tail = cl_prev;
950
951                                         /* Was it the last class in this band? */
952                                         if (cl == cl_tail) {
953                                                 /* Kill the band! */
954                                                 q->active[prio] = NULL;
955                                                 q->activemask &= ~(1<<prio);
956                                                 if (cl->q->q.qlen)
957                                                         cbq_activate_class(cl);
958                                                 return NULL;
959                                         }
960
961                                         q->active[prio] = cl_tail;
962                                 }
963                                 if (cl->q->q.qlen)
964                                         cbq_activate_class(cl);
965
966                                 cl = cl_prev;
967                         }
968
969 next_class:
970                         cl_prev = cl;
971                         cl = cl->next_alive;
972                 } while (cl_prev != cl_tail);
973         } while (deficit);
974
975         q->active[prio] = cl_prev;
976
977         return NULL;
978 }
979
980 static __inline__ struct sk_buff *
981 cbq_dequeue_1(struct Qdisc *sch)
982 {
983         struct cbq_sched_data *q = qdisc_priv(sch);
984         struct sk_buff *skb;
985         unsigned activemask;
986
987         activemask = q->activemask&0xFF;
988         while (activemask) {
989                 int prio = ffz(~activemask);
990                 activemask &= ~(1<<prio);
991                 skb = cbq_dequeue_prio(sch, prio);
992                 if (skb)
993                         return skb;
994         }
995         return NULL;
996 }
997
998 static struct sk_buff *
999 cbq_dequeue(struct Qdisc *sch)
1000 {
1001         struct sk_buff *skb;
1002         struct cbq_sched_data *q = qdisc_priv(sch);
1003         psched_time_t now;
1004         psched_tdiff_t incr;
1005
1006         now = psched_get_time();
1007         incr = now - q->now_rt;
1008
1009         if (q->tx_class) {
1010                 psched_tdiff_t incr2;
1011                 /* Time integrator. We calculate EOS time
1012                    by adding expected packet transmission time.
1013                    If real time is greater, we warp artificial clock,
1014                    so that:
1015
1016                    cbq_time = max(real_time, work);
1017                  */
1018                 incr2 = L2T(&q->link, q->tx_len);
1019                 q->now += incr2;
1020                 cbq_update(q);
1021                 if ((incr -= incr2) < 0)
1022                         incr = 0;
1023         }
1024         q->now += incr;
1025         q->now_rt = now;
1026
1027         for (;;) {
1028                 q->wd_expires = 0;
1029
1030                 skb = cbq_dequeue_1(sch);
1031                 if (skb) {
1032                         sch->q.qlen--;
1033                         sch->flags &= ~TCQ_F_THROTTLED;
1034                         return skb;
1035                 }
1036
1037                 /* All the classes are overlimit.
1038
1039                    It is possible, if:
1040
1041                    1. Scheduler is empty.
1042                    2. Toplevel cutoff inhibited borrowing.
1043                    3. Root class is overlimit.
1044
1045                    Reset 2d and 3d conditions and retry.
1046
1047                    Note, that NS and cbq-2.0 are buggy, peeking
1048                    an arbitrary class is appropriate for ancestor-only
1049                    sharing, but not for toplevel algorithm.
1050
1051                    Our version is better, but slower, because it requires
1052                    two passes, but it is unavoidable with top-level sharing.
1053                 */
1054
1055                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1056                     q->link.undertime == PSCHED_PASTPERFECT)
1057                         break;
1058
1059                 q->toplevel = TC_CBQ_MAXLEVEL;
1060                 q->link.undertime = PSCHED_PASTPERFECT;
1061         }
1062
1063         /* No packets in scheduler or nobody wants to give them to us :-(
1064            Sigh... start watchdog timer in the last case. */
1065
1066         if (sch->q.qlen) {
1067                 sch->qstats.overlimits++;
1068                 if (q->wd_expires)
1069                         qdisc_watchdog_schedule(&q->watchdog,
1070                                                 now + q->wd_expires);
1071         }
1072         return NULL;
1073 }
1074
1075 /* CBQ class maintanance routines */
1076
1077 static void cbq_adjust_levels(struct cbq_class *this)
1078 {
1079         if (this == NULL)
1080                 return;
1081
1082         do {
1083                 int level = 0;
1084                 struct cbq_class *cl;
1085
1086                 if ((cl = this->children) != NULL) {
1087                         do {
1088                                 if (cl->level > level)
1089                                         level = cl->level;
1090                         } while ((cl = cl->sibling) != this->children);
1091                 }
1092                 this->level = level+1;
1093         } while ((this = this->tparent) != NULL);
1094 }
1095
1096 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1097 {
1098         struct cbq_class *cl;
1099         unsigned h;
1100
1101         if (q->quanta[prio] == 0)
1102                 return;
1103
1104         for (h=0; h<16; h++) {
1105                 for (cl = q->classes[h]; cl; cl = cl->next) {
1106                         /* BUGGGG... Beware! This expression suffer of
1107                            arithmetic overflows!
1108                          */
1109                         if (cl->priority == prio) {
1110                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1111                                         q->quanta[prio];
1112                         }
1113                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1114                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1115                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1116                         }
1117                 }
1118         }
1119 }
1120
1121 static void cbq_sync_defmap(struct cbq_class *cl)
1122 {
1123         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1124         struct cbq_class *split = cl->split;
1125         unsigned h;
1126         int i;
1127
1128         if (split == NULL)
1129                 return;
1130
1131         for (i=0; i<=TC_PRIO_MAX; i++) {
1132                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1133                         split->defaults[i] = NULL;
1134         }
1135
1136         for (i=0; i<=TC_PRIO_MAX; i++) {
1137                 int level = split->level;
1138
1139                 if (split->defaults[i])
1140                         continue;
1141
1142                 for (h=0; h<16; h++) {
1143                         struct cbq_class *c;
1144
1145                         for (c = q->classes[h]; c; c = c->next) {
1146                                 if (c->split == split && c->level < level &&
1147                                     c->defmap&(1<<i)) {
1148                                         split->defaults[i] = c;
1149                                         level = c->level;
1150                                 }
1151                         }
1152                 }
1153         }
1154 }
1155
1156 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1157 {
1158         struct cbq_class *split = NULL;
1159
1160         if (splitid == 0) {
1161                 if ((split = cl->split) == NULL)
1162                         return;
1163                 splitid = split->classid;
1164         }
1165
1166         if (split == NULL || split->classid != splitid) {
1167                 for (split = cl->tparent; split; split = split->tparent)
1168                         if (split->classid == splitid)
1169                                 break;
1170         }
1171
1172         if (split == NULL)
1173                 return;
1174
1175         if (cl->split != split) {
1176                 cl->defmap = 0;
1177                 cbq_sync_defmap(cl);
1178                 cl->split = split;
1179                 cl->defmap = def&mask;
1180         } else
1181                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1182
1183         cbq_sync_defmap(cl);
1184 }
1185
1186 static void cbq_unlink_class(struct cbq_class *this)
1187 {
1188         struct cbq_class *cl, **clp;
1189         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1190
1191         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1192                 if (cl == this) {
1193                         *clp = cl->next;
1194                         cl->next = NULL;
1195                         break;
1196                 }
1197         }
1198
1199         if (this->tparent) {
1200                 clp=&this->sibling;
1201                 cl = *clp;
1202                 do {
1203                         if (cl == this) {
1204                                 *clp = cl->sibling;
1205                                 break;
1206                         }
1207                         clp = &cl->sibling;
1208                 } while ((cl = *clp) != this->sibling);
1209
1210                 if (this->tparent->children == this) {
1211                         this->tparent->children = this->sibling;
1212                         if (this->sibling == this)
1213                                 this->tparent->children = NULL;
1214                 }
1215         } else {
1216                 BUG_TRAP(this->sibling == this);
1217         }
1218 }
1219
1220 static void cbq_link_class(struct cbq_class *this)
1221 {
1222         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1223         unsigned h = cbq_hash(this->classid);
1224         struct cbq_class *parent = this->tparent;
1225
1226         this->sibling = this;
1227         this->next = q->classes[h];
1228         q->classes[h] = this;
1229
1230         if (parent == NULL)
1231                 return;
1232
1233         if (parent->children == NULL) {
1234                 parent->children = this;
1235         } else {
1236                 this->sibling = parent->children->sibling;
1237                 parent->children->sibling = this;
1238         }
1239 }
1240
1241 static unsigned int cbq_drop(struct Qdisc* sch)
1242 {
1243         struct cbq_sched_data *q = qdisc_priv(sch);
1244         struct cbq_class *cl, *cl_head;
1245         int prio;
1246         unsigned int len;
1247
1248         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1249                 if ((cl_head = q->active[prio]) == NULL)
1250                         continue;
1251
1252                 cl = cl_head;
1253                 do {
1254                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1255                                 sch->q.qlen--;
1256                                 if (!cl->q->q.qlen)
1257                                         cbq_deactivate_class(cl);
1258                                 return len;
1259                         }
1260                 } while ((cl = cl->next_alive) != cl_head);
1261         }
1262         return 0;
1263 }
1264
1265 static void
1266 cbq_reset(struct Qdisc* sch)
1267 {
1268         struct cbq_sched_data *q = qdisc_priv(sch);
1269         struct cbq_class *cl;
1270         int prio;
1271         unsigned h;
1272
1273         q->activemask = 0;
1274         q->pmask = 0;
1275         q->tx_class = NULL;
1276         q->tx_borrowed = NULL;
1277         qdisc_watchdog_cancel(&q->watchdog);
1278         hrtimer_cancel(&q->delay_timer);
1279         q->toplevel = TC_CBQ_MAXLEVEL;
1280         q->now = psched_get_time();
1281         q->now_rt = q->now;
1282
1283         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1284                 q->active[prio] = NULL;
1285
1286         for (h = 0; h < 16; h++) {
1287                 for (cl = q->classes[h]; cl; cl = cl->next) {
1288                         qdisc_reset(cl->q);
1289
1290                         cl->next_alive = NULL;
1291                         cl->undertime = PSCHED_PASTPERFECT;
1292                         cl->avgidle = cl->maxidle;
1293                         cl->deficit = cl->quantum;
1294                         cl->cpriority = cl->priority;
1295                 }
1296         }
1297         sch->q.qlen = 0;
1298 }
1299
1300
1301 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1302 {
1303         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1304                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1305                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1306         }
1307         if (lss->change&TCF_CBQ_LSS_EWMA)
1308                 cl->ewma_log = lss->ewma_log;
1309         if (lss->change&TCF_CBQ_LSS_AVPKT)
1310                 cl->avpkt = lss->avpkt;
1311         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1312                 cl->minidle = -(long)lss->minidle;
1313         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1314                 cl->maxidle = lss->maxidle;
1315                 cl->avgidle = lss->maxidle;
1316         }
1317         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1318                 cl->offtime = lss->offtime;
1319         return 0;
1320 }
1321
1322 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1323 {
1324         q->nclasses[cl->priority]--;
1325         q->quanta[cl->priority] -= cl->weight;
1326         cbq_normalize_quanta(q, cl->priority);
1327 }
1328
1329 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1330 {
1331         q->nclasses[cl->priority]++;
1332         q->quanta[cl->priority] += cl->weight;
1333         cbq_normalize_quanta(q, cl->priority);
1334 }
1335
1336 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1337 {
1338         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1339
1340         if (wrr->allot)
1341                 cl->allot = wrr->allot;
1342         if (wrr->weight)
1343                 cl->weight = wrr->weight;
1344         if (wrr->priority) {
1345                 cl->priority = wrr->priority-1;
1346                 cl->cpriority = cl->priority;
1347                 if (cl->priority >= cl->priority2)
1348                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1349         }
1350
1351         cbq_addprio(q, cl);
1352         return 0;
1353 }
1354
1355 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1356 {
1357         switch (ovl->strategy) {
1358         case TC_CBQ_OVL_CLASSIC:
1359                 cl->overlimit = cbq_ovl_classic;
1360                 break;
1361         case TC_CBQ_OVL_DELAY:
1362                 cl->overlimit = cbq_ovl_delay;
1363                 break;
1364         case TC_CBQ_OVL_LOWPRIO:
1365                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1366                     ovl->priority2-1 <= cl->priority)
1367                         return -EINVAL;
1368                 cl->priority2 = ovl->priority2-1;
1369                 cl->overlimit = cbq_ovl_lowprio;
1370                 break;
1371         case TC_CBQ_OVL_DROP:
1372                 cl->overlimit = cbq_ovl_drop;
1373                 break;
1374         case TC_CBQ_OVL_RCLASSIC:
1375                 cl->overlimit = cbq_ovl_rclassic;
1376                 break;
1377         default:
1378                 return -EINVAL;
1379         }
1380         cl->penalty = ovl->penalty;
1381         return 0;
1382 }
1383
1384 #ifdef CONFIG_NET_CLS_POLICE
1385 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1386 {
1387         cl->police = p->police;
1388
1389         if (cl->q->handle) {
1390                 if (p->police == TC_POLICE_RECLASSIFY)
1391                         cl->q->reshape_fail = cbq_reshape_fail;
1392                 else
1393                         cl->q->reshape_fail = NULL;
1394         }
1395         return 0;
1396 }
1397 #endif
1398
1399 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1400 {
1401         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1402         return 0;
1403 }
1404
1405 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1406 {
1407         struct cbq_sched_data *q = qdisc_priv(sch);
1408         struct rtattr *tb[TCA_CBQ_MAX];
1409         struct tc_ratespec *r;
1410
1411         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1412             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1413             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1414                 return -EINVAL;
1415
1416         if (tb[TCA_CBQ_LSSOPT-1] &&
1417             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1418                 return -EINVAL;
1419
1420         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1421
1422         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1423                 return -EINVAL;
1424
1425         q->link.refcnt = 1;
1426         q->link.sibling = &q->link;
1427         q->link.classid = sch->handle;
1428         q->link.qdisc = sch;
1429         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1430                                             sch->handle)))
1431                 q->link.q = &noop_qdisc;
1432
1433         q->link.priority = TC_CBQ_MAXPRIO-1;
1434         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1435         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1436         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1437         q->link.overlimit = cbq_ovl_classic;
1438         q->link.allot = psched_mtu(sch->dev);
1439         q->link.quantum = q->link.allot;
1440         q->link.weight = q->link.R_tab->rate.rate;
1441
1442         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1443         q->link.avpkt = q->link.allot/2;
1444         q->link.minidle = -0x7FFFFFFF;
1445         q->link.stats_lock = &sch->dev->queue_lock;
1446
1447         qdisc_watchdog_init(&q->watchdog, sch);
1448         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1449         q->delay_timer.function = cbq_undelay;
1450         q->toplevel = TC_CBQ_MAXLEVEL;
1451         q->now = psched_get_time();
1452         q->now_rt = q->now;
1453
1454         cbq_link_class(&q->link);
1455
1456         if (tb[TCA_CBQ_LSSOPT-1])
1457                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1458
1459         cbq_addprio(q, &q->link);
1460         return 0;
1461 }
1462
1463 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1464 {
1465         unsigned char *b = skb_tail_pointer(skb);
1466
1467         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1468         return skb->len;
1469
1470 rtattr_failure:
1471         nlmsg_trim(skb, b);
1472         return -1;
1473 }
1474
1475 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1476 {
1477         unsigned char *b = skb_tail_pointer(skb);
1478         struct tc_cbq_lssopt opt;
1479
1480         opt.flags = 0;
1481         if (cl->borrow == NULL)
1482                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1483         if (cl->share == NULL)
1484                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1485         opt.ewma_log = cl->ewma_log;
1486         opt.level = cl->level;
1487         opt.avpkt = cl->avpkt;
1488         opt.maxidle = cl->maxidle;
1489         opt.minidle = (u32)(-cl->minidle);
1490         opt.offtime = cl->offtime;
1491         opt.change = ~0;
1492         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1493         return skb->len;
1494
1495 rtattr_failure:
1496         nlmsg_trim(skb, b);
1497         return -1;
1498 }
1499
1500 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1501 {
1502         unsigned char *b = skb_tail_pointer(skb);
1503         struct tc_cbq_wrropt opt;
1504
1505         opt.flags = 0;
1506         opt.allot = cl->allot;
1507         opt.priority = cl->priority+1;
1508         opt.cpriority = cl->cpriority+1;
1509         opt.weight = cl->weight;
1510         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1511         return skb->len;
1512
1513 rtattr_failure:
1514         nlmsg_trim(skb, b);
1515         return -1;
1516 }
1517
1518 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1519 {
1520         unsigned char *b = skb_tail_pointer(skb);
1521         struct tc_cbq_ovl opt;
1522
1523         opt.strategy = cl->ovl_strategy;
1524         opt.priority2 = cl->priority2+1;
1525         opt.pad = 0;
1526         opt.penalty = cl->penalty;
1527         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1528         return skb->len;
1529
1530 rtattr_failure:
1531         nlmsg_trim(skb, b);
1532         return -1;
1533 }
1534
1535 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1536 {
1537         unsigned char *b = skb_tail_pointer(skb);
1538         struct tc_cbq_fopt opt;
1539
1540         if (cl->split || cl->defmap) {
1541                 opt.split = cl->split ? cl->split->classid : 0;
1542                 opt.defmap = cl->defmap;
1543                 opt.defchange = ~0;
1544                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1545         }
1546         return skb->len;
1547
1548 rtattr_failure:
1549         nlmsg_trim(skb, b);
1550         return -1;
1551 }
1552
1553 #ifdef CONFIG_NET_CLS_POLICE
1554 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1555 {
1556         unsigned char *b = skb_tail_pointer(skb);
1557         struct tc_cbq_police opt;
1558
1559         if (cl->police) {
1560                 opt.police = cl->police;
1561                 opt.__res1 = 0;
1562                 opt.__res2 = 0;
1563                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1564         }
1565         return skb->len;
1566
1567 rtattr_failure:
1568         nlmsg_trim(skb, b);
1569         return -1;
1570 }
1571 #endif
1572
1573 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1574 {
1575         if (cbq_dump_lss(skb, cl) < 0 ||
1576             cbq_dump_rate(skb, cl) < 0 ||
1577             cbq_dump_wrr(skb, cl) < 0 ||
1578             cbq_dump_ovl(skb, cl) < 0 ||
1579 #ifdef CONFIG_NET_CLS_POLICE
1580             cbq_dump_police(skb, cl) < 0 ||
1581 #endif
1582             cbq_dump_fopt(skb, cl) < 0)
1583                 return -1;
1584         return 0;
1585 }
1586
1587 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1588 {
1589         struct cbq_sched_data *q = qdisc_priv(sch);
1590         unsigned char *b = skb_tail_pointer(skb);
1591         struct rtattr *rta;
1592
1593         rta = (struct rtattr*)b;
1594         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1595         if (cbq_dump_attr(skb, &q->link) < 0)
1596                 goto rtattr_failure;
1597         rta->rta_len = skb_tail_pointer(skb) - b;
1598         return skb->len;
1599
1600 rtattr_failure:
1601         nlmsg_trim(skb, b);
1602         return -1;
1603 }
1604
1605 static int
1606 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1607 {
1608         struct cbq_sched_data *q = qdisc_priv(sch);
1609
1610         q->link.xstats.avgidle = q->link.avgidle;
1611         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1612 }
1613
1614 static int
1615 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1616                struct sk_buff *skb, struct tcmsg *tcm)
1617 {
1618         struct cbq_class *cl = (struct cbq_class*)arg;
1619         unsigned char *b = skb_tail_pointer(skb);
1620         struct rtattr *rta;
1621
1622         if (cl->tparent)
1623                 tcm->tcm_parent = cl->tparent->classid;
1624         else
1625                 tcm->tcm_parent = TC_H_ROOT;
1626         tcm->tcm_handle = cl->classid;
1627         tcm->tcm_info = cl->q->handle;
1628
1629         rta = (struct rtattr*)b;
1630         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1631         if (cbq_dump_attr(skb, cl) < 0)
1632                 goto rtattr_failure;
1633         rta->rta_len = skb_tail_pointer(skb) - b;
1634         return skb->len;
1635
1636 rtattr_failure:
1637         nlmsg_trim(skb, b);
1638         return -1;
1639 }
1640
1641 static int
1642 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1643         struct gnet_dump *d)
1644 {
1645         struct cbq_sched_data *q = qdisc_priv(sch);
1646         struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648         cl->qstats.qlen = cl->q->q.qlen;
1649         cl->xstats.avgidle = cl->avgidle;
1650         cl->xstats.undertime = 0;
1651
1652         if (cl->undertime != PSCHED_PASTPERFECT)
1653                 cl->xstats.undertime = cl->undertime - q->now;
1654
1655         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1656 #ifdef CONFIG_NET_ESTIMATOR
1657             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1658 #endif
1659             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1660                 return -1;
1661
1662         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1663 }
1664
1665 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1666                      struct Qdisc **old)
1667 {
1668         struct cbq_class *cl = (struct cbq_class*)arg;
1669
1670         if (cl) {
1671                 if (new == NULL) {
1672                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1673                                                      cl->classid)) == NULL)
1674                                 return -ENOBUFS;
1675                 } else {
1676 #ifdef CONFIG_NET_CLS_POLICE
1677                         if (cl->police == TC_POLICE_RECLASSIFY)
1678                                 new->reshape_fail = cbq_reshape_fail;
1679 #endif
1680                 }
1681                 sch_tree_lock(sch);
1682                 *old = xchg(&cl->q, new);
1683                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1684                 qdisc_reset(*old);
1685                 sch_tree_unlock(sch);
1686
1687                 return 0;
1688         }
1689         return -ENOENT;
1690 }
1691
1692 static struct Qdisc *
1693 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1694 {
1695         struct cbq_class *cl = (struct cbq_class*)arg;
1696
1697         return cl ? cl->q : NULL;
1698 }
1699
1700 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1701 {
1702         struct cbq_class *cl = (struct cbq_class *)arg;
1703
1704         if (cl->q->q.qlen == 0)
1705                 cbq_deactivate_class(cl);
1706 }
1707
1708 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1709 {
1710         struct cbq_sched_data *q = qdisc_priv(sch);
1711         struct cbq_class *cl = cbq_class_lookup(q, classid);
1712
1713         if (cl) {
1714                 cl->refcnt++;
1715                 return (unsigned long)cl;
1716         }
1717         return 0;
1718 }
1719
1720 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1721 {
1722         struct cbq_sched_data *q = qdisc_priv(sch);
1723
1724         BUG_TRAP(!cl->filters);
1725
1726         tcf_destroy_chain(cl->filter_list);
1727         qdisc_destroy(cl->q);
1728         qdisc_put_rtab(cl->R_tab);
1729 #ifdef CONFIG_NET_ESTIMATOR
1730         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1731 #endif
1732         if (cl != &q->link)
1733                 kfree(cl);
1734 }
1735
1736 static void
1737 cbq_destroy(struct Qdisc* sch)
1738 {
1739         struct cbq_sched_data *q = qdisc_priv(sch);
1740         struct cbq_class *cl;
1741         unsigned h;
1742
1743 #ifdef CONFIG_NET_CLS_POLICE
1744         q->rx_class = NULL;
1745 #endif
1746         /*
1747          * Filters must be destroyed first because we don't destroy the
1748          * classes from root to leafs which means that filters can still
1749          * be bound to classes which have been destroyed already. --TGR '04
1750          */
1751         for (h = 0; h < 16; h++)
1752                 for (cl = q->classes[h]; cl; cl = cl->next)
1753                         tcf_destroy_chain(cl->filter_list);
1754
1755         for (h = 0; h < 16; h++) {
1756                 struct cbq_class *next;
1757
1758                 for (cl = q->classes[h]; cl; cl = next) {
1759                         next = cl->next;
1760                         cbq_destroy_class(sch, cl);
1761                 }
1762         }
1763 }
1764
1765 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1766 {
1767         struct cbq_class *cl = (struct cbq_class*)arg;
1768
1769         if (--cl->refcnt == 0) {
1770 #ifdef CONFIG_NET_CLS_POLICE
1771                 struct cbq_sched_data *q = qdisc_priv(sch);
1772
1773                 spin_lock_bh(&sch->dev->queue_lock);
1774                 if (q->rx_class == cl)
1775                         q->rx_class = NULL;
1776                 spin_unlock_bh(&sch->dev->queue_lock);
1777 #endif
1778
1779                 cbq_destroy_class(sch, cl);
1780         }
1781 }
1782
1783 static int
1784 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1785                  unsigned long *arg)
1786 {
1787         int err;
1788         struct cbq_sched_data *q = qdisc_priv(sch);
1789         struct cbq_class *cl = (struct cbq_class*)*arg;
1790         struct rtattr *opt = tca[TCA_OPTIONS-1];
1791         struct rtattr *tb[TCA_CBQ_MAX];
1792         struct cbq_class *parent;
1793         struct qdisc_rate_table *rtab = NULL;
1794
1795         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1796                 return -EINVAL;
1797
1798         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1799             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1800                 return -EINVAL;
1801
1802         if (tb[TCA_CBQ_FOPT-1] &&
1803             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1804                 return -EINVAL;
1805
1806         if (tb[TCA_CBQ_RATE-1] &&
1807             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1808                         return -EINVAL;
1809
1810         if (tb[TCA_CBQ_LSSOPT-1] &&
1811             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1812                         return -EINVAL;
1813
1814         if (tb[TCA_CBQ_WRROPT-1] &&
1815             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1816                         return -EINVAL;
1817
1818 #ifdef CONFIG_NET_CLS_POLICE
1819         if (tb[TCA_CBQ_POLICE-1] &&
1820             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1821                         return -EINVAL;
1822 #endif
1823
1824         if (cl) {
1825                 /* Check parent */
1826                 if (parentid) {
1827                         if (cl->tparent && cl->tparent->classid != parentid)
1828                                 return -EINVAL;
1829                         if (!cl->tparent && parentid != TC_H_ROOT)
1830                                 return -EINVAL;
1831                 }
1832
1833                 if (tb[TCA_CBQ_RATE-1]) {
1834                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1835                         if (rtab == NULL)
1836                                 return -EINVAL;
1837                 }
1838
1839                 /* Change class parameters */
1840                 sch_tree_lock(sch);
1841
1842                 if (cl->next_alive != NULL)
1843                         cbq_deactivate_class(cl);
1844
1845                 if (rtab) {
1846                         rtab = xchg(&cl->R_tab, rtab);
1847                         qdisc_put_rtab(rtab);
1848                 }
1849
1850                 if (tb[TCA_CBQ_LSSOPT-1])
1851                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1852
1853                 if (tb[TCA_CBQ_WRROPT-1]) {
1854                         cbq_rmprio(q, cl);
1855                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1856                 }
1857
1858                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1859                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1860
1861 #ifdef CONFIG_NET_CLS_POLICE
1862                 if (tb[TCA_CBQ_POLICE-1])
1863                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1864 #endif
1865
1866                 if (tb[TCA_CBQ_FOPT-1])
1867                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1868
1869                 if (cl->q->q.qlen)
1870                         cbq_activate_class(cl);
1871
1872                 sch_tree_unlock(sch);
1873
1874 #ifdef CONFIG_NET_ESTIMATOR
1875                 if (tca[TCA_RATE-1])
1876                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1877                                 cl->stats_lock, tca[TCA_RATE-1]);
1878 #endif
1879                 return 0;
1880         }
1881
1882         if (parentid == TC_H_ROOT)
1883                 return -EINVAL;
1884
1885         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1886             tb[TCA_CBQ_LSSOPT-1] == NULL)
1887                 return -EINVAL;
1888
1889         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1890         if (rtab == NULL)
1891                 return -EINVAL;
1892
1893         if (classid) {
1894                 err = -EINVAL;
1895                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1896                         goto failure;
1897         } else {
1898                 int i;
1899                 classid = TC_H_MAKE(sch->handle,0x8000);
1900
1901                 for (i=0; i<0x8000; i++) {
1902                         if (++q->hgenerator >= 0x8000)
1903                                 q->hgenerator = 1;
1904                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1905                                 break;
1906                 }
1907                 err = -ENOSR;
1908                 if (i >= 0x8000)
1909                         goto failure;
1910                 classid = classid|q->hgenerator;
1911         }
1912
1913         parent = &q->link;
1914         if (parentid) {
1915                 parent = cbq_class_lookup(q, parentid);
1916                 err = -EINVAL;
1917                 if (parent == NULL)
1918                         goto failure;
1919         }
1920
1921         err = -ENOBUFS;
1922         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1923         if (cl == NULL)
1924                 goto failure;
1925         cl->R_tab = rtab;
1926         rtab = NULL;
1927         cl->refcnt = 1;
1928         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1929                 cl->q = &noop_qdisc;
1930         cl->classid = classid;
1931         cl->tparent = parent;
1932         cl->qdisc = sch;
1933         cl->allot = parent->allot;
1934         cl->quantum = cl->allot;
1935         cl->weight = cl->R_tab->rate.rate;
1936         cl->stats_lock = &sch->dev->queue_lock;
1937
1938         sch_tree_lock(sch);
1939         cbq_link_class(cl);
1940         cl->borrow = cl->tparent;
1941         if (cl->tparent != &q->link)
1942                 cl->share = cl->tparent;
1943         cbq_adjust_levels(parent);
1944         cl->minidle = -0x7FFFFFFF;
1945         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1946         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1947         if (cl->ewma_log==0)
1948                 cl->ewma_log = q->link.ewma_log;
1949         if (cl->maxidle==0)
1950                 cl->maxidle = q->link.maxidle;
1951         if (cl->avpkt==0)
1952                 cl->avpkt = q->link.avpkt;
1953         cl->overlimit = cbq_ovl_classic;
1954         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1955                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1956 #ifdef CONFIG_NET_CLS_POLICE
1957         if (tb[TCA_CBQ_POLICE-1])
1958                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1959 #endif
1960         if (tb[TCA_CBQ_FOPT-1])
1961                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1962         sch_tree_unlock(sch);
1963
1964 #ifdef CONFIG_NET_ESTIMATOR
1965         if (tca[TCA_RATE-1])
1966                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1967                         cl->stats_lock, tca[TCA_RATE-1]);
1968 #endif
1969
1970         *arg = (unsigned long)cl;
1971         return 0;
1972
1973 failure:
1974         qdisc_put_rtab(rtab);
1975         return err;
1976 }
1977
1978 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1979 {
1980         struct cbq_sched_data *q = qdisc_priv(sch);
1981         struct cbq_class *cl = (struct cbq_class*)arg;
1982         unsigned int qlen;
1983
1984         if (cl->filters || cl->children || cl == &q->link)
1985                 return -EBUSY;
1986
1987         sch_tree_lock(sch);
1988
1989         qlen = cl->q->q.qlen;
1990         qdisc_reset(cl->q);
1991         qdisc_tree_decrease_qlen(cl->q, qlen);
1992
1993         if (cl->next_alive)
1994                 cbq_deactivate_class(cl);
1995
1996         if (q->tx_borrowed == cl)
1997                 q->tx_borrowed = q->tx_class;
1998         if (q->tx_class == cl) {
1999                 q->tx_class = NULL;
2000                 q->tx_borrowed = NULL;
2001         }
2002 #ifdef CONFIG_NET_CLS_POLICE
2003         if (q->rx_class == cl)
2004                 q->rx_class = NULL;
2005 #endif
2006
2007         cbq_unlink_class(cl);
2008         cbq_adjust_levels(cl->tparent);
2009         cl->defmap = 0;
2010         cbq_sync_defmap(cl);
2011
2012         cbq_rmprio(q, cl);
2013         sch_tree_unlock(sch);
2014
2015         if (--cl->refcnt == 0)
2016                 cbq_destroy_class(sch, cl);
2017
2018         return 0;
2019 }
2020
2021 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2022 {
2023         struct cbq_sched_data *q = qdisc_priv(sch);
2024         struct cbq_class *cl = (struct cbq_class *)arg;
2025
2026         if (cl == NULL)
2027                 cl = &q->link;
2028
2029         return &cl->filter_list;
2030 }
2031
2032 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2033                                      u32 classid)
2034 {
2035         struct cbq_sched_data *q = qdisc_priv(sch);
2036         struct cbq_class *p = (struct cbq_class*)parent;
2037         struct cbq_class *cl = cbq_class_lookup(q, classid);
2038
2039         if (cl) {
2040                 if (p && p->level <= cl->level)
2041                         return 0;
2042                 cl->filters++;
2043                 return (unsigned long)cl;
2044         }
2045         return 0;
2046 }
2047
2048 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2049 {
2050         struct cbq_class *cl = (struct cbq_class*)arg;
2051
2052         cl->filters--;
2053 }
2054
2055 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2056 {
2057         struct cbq_sched_data *q = qdisc_priv(sch);
2058         unsigned h;
2059
2060         if (arg->stop)
2061                 return;
2062
2063         for (h = 0; h < 16; h++) {
2064                 struct cbq_class *cl;
2065
2066                 for (cl = q->classes[h]; cl; cl = cl->next) {
2067                         if (arg->count < arg->skip) {
2068                                 arg->count++;
2069                                 continue;
2070                         }
2071                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2072                                 arg->stop = 1;
2073                                 return;
2074                         }
2075                         arg->count++;
2076                 }
2077         }
2078 }
2079
2080 static struct Qdisc_class_ops cbq_class_ops = {
2081         .graft          =       cbq_graft,
2082         .leaf           =       cbq_leaf,
2083         .qlen_notify    =       cbq_qlen_notify,
2084         .get            =       cbq_get,
2085         .put            =       cbq_put,
2086         .change         =       cbq_change_class,
2087         .delete         =       cbq_delete,
2088         .walk           =       cbq_walk,
2089         .tcf_chain      =       cbq_find_tcf,
2090         .bind_tcf       =       cbq_bind_filter,
2091         .unbind_tcf     =       cbq_unbind_filter,
2092         .dump           =       cbq_dump_class,
2093         .dump_stats     =       cbq_dump_class_stats,
2094 };
2095
2096 static struct Qdisc_ops cbq_qdisc_ops = {
2097         .next           =       NULL,
2098         .cl_ops         =       &cbq_class_ops,
2099         .id             =       "cbq",
2100         .priv_size      =       sizeof(struct cbq_sched_data),
2101         .enqueue        =       cbq_enqueue,
2102         .dequeue        =       cbq_dequeue,
2103         .requeue        =       cbq_requeue,
2104         .drop           =       cbq_drop,
2105         .init           =       cbq_init,
2106         .reset          =       cbq_reset,
2107         .destroy        =       cbq_destroy,
2108         .change         =       NULL,
2109         .dump           =       cbq_dump,
2110         .dump_stats     =       cbq_dump_stats,
2111         .owner          =       THIS_MODULE,
2112 };
2113
2114 static int __init cbq_module_init(void)
2115 {
2116         return register_qdisc(&cbq_qdisc_ops);
2117 }
2118 static void __exit cbq_module_exit(void)
2119 {
2120         unregister_qdisc(&cbq_qdisc_ops);
2121 }
2122 module_init(cbq_module_init)
2123 module_exit(cbq_module_exit)
2124 MODULE_LICENSE("GPL");