2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
32 #include <net/netlink.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
36 #include <net/pkt_sched.h>
39 /* Class-Based Queueing (CBQ) algorithm.
40 =======================================
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43 Management Models for Packet Networks",
44 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
54 -----------------------------------------------------------------------
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
61 --- The WRR algorithm is different. Our version looks more
62 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
87 struct cbq_sched_data;
92 struct cbq_class *next; /* hash table link */
93 struct cbq_class *next_alive; /* next class with backlog in this priority band */
97 unsigned char priority; /* class priority */
98 unsigned char priority2; /* priority to be used after overlimit */
99 unsigned char ewma_log; /* time constant for idle time calculation */
100 unsigned char ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police;
107 /* Link-sharing scheduler parameters */
108 long maxidle; /* Class parameters: see below. */
112 struct qdisc_rate_table *R_tab;
114 /* Overlimit strategy parameters */
115 void (*overlimit)(struct cbq_class *cl);
116 psched_tdiff_t penalty;
118 /* General scheduler (WRR) parameters */
120 long quantum; /* Allotment per WRR round */
121 long weight; /* Relative allotment: see below */
123 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
124 struct cbq_class *split; /* Ptr to split node */
125 struct cbq_class *share; /* Ptr to LS parent in the class tree */
126 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
127 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
129 struct cbq_class *sibling; /* Sibling chain */
130 struct cbq_class *children; /* Pointer to children chain */
132 struct Qdisc *q; /* Elementary queueing discipline */
136 unsigned char cpriority; /* Effective priority */
137 unsigned char delayed;
138 unsigned char level; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
143 psched_time_t last; /* Last end of service */
144 psched_time_t undertime;
146 long deficit; /* Saved deficit for WRR */
147 psched_time_t penalized;
148 struct gnet_stats_basic bstats;
149 struct gnet_stats_queue qstats;
150 struct gnet_stats_rate_est rate_est;
151 spinlock_t *stats_lock;
152 struct tc_cbq_xstats xstats;
154 struct tcf_proto *filter_list;
159 struct cbq_class *defaults[TC_PRIO_MAX+1];
162 struct cbq_sched_data
164 struct cbq_class *classes[16]; /* Hash table of all classes */
165 int nclasses[TC_CBQ_MAXPRIO+1];
166 unsigned quanta[TC_CBQ_MAXPRIO+1];
168 struct cbq_class link;
171 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
174 #ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class *rx_class;
177 struct cbq_class *tx_class;
178 struct cbq_class *tx_borrowed;
180 psched_time_t now; /* Cached timestamp */
181 psched_time_t now_rt; /* Cached real time */
184 struct hrtimer delay_timer;
185 struct qdisc_watchdog watchdog; /* Watchdog timer,
189 psched_tdiff_t wd_expires;
195 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
198 static __inline__ unsigned cbq_hash(u32 h)
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
208 struct cbq_class *cl;
210 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211 if (cl->classid == classid)
216 #ifdef CONFIG_NET_CLS_POLICE
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
221 struct cbq_class *cl, *new;
223 for (cl = this->tparent; cl; cl = cl->tparent)
224 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
232 /* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
245 struct cbq_sched_data *q = qdisc_priv(sch);
246 struct cbq_class *head = &q->link;
247 struct cbq_class **defmap;
248 struct cbq_class *cl = NULL;
249 u32 prio = skb->priority;
250 struct tcf_result res;
253 * Step 1. If skb->priority points to one of our classes, use it.
255 if (TC_H_MAJ(prio^sch->handle) == 0 &&
256 (cl = cbq_class_lookup(q, prio)) != NULL)
259 *qerr = NET_XMIT_BYPASS;
262 defmap = head->defaults;
265 * Step 2+n. Apply classifier.
267 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
270 if ((cl = (void*)res.class) == NULL) {
271 if (TC_H_MAJ(res.classid))
272 cl = cbq_class_lookup(q, res.classid);
273 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274 cl = defmap[TC_PRIO_BESTEFFORT];
276 if (cl == NULL || cl->level >= head->level)
280 #ifdef CONFIG_NET_CLS_ACT
284 *qerr = NET_XMIT_SUCCESS;
288 #elif defined(CONFIG_NET_CLS_POLICE)
290 case TC_POLICE_RECLASSIFY:
291 return cbq_reclassify(skb, cl);
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
313 * Step 4. No success...
315 if (TC_H_MAJ(prio) == 0 &&
316 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
331 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332 int prio = cl->cpriority;
333 struct cbq_class *cl_tail;
335 cl_tail = q->active[prio];
336 q->active[prio] = cl;
338 if (cl_tail != NULL) {
339 cl->next_alive = cl_tail->next_alive;
340 cl_tail->next_alive = cl;
343 q->activemask |= (1<<prio);
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
353 static void cbq_deactivate_class(struct cbq_class *this)
355 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356 int prio = this->cpriority;
357 struct cbq_class *cl;
358 struct cbq_class *cl_prev = q->active[prio];
361 cl = cl_prev->next_alive;
363 cl_prev->next_alive = cl->next_alive;
364 cl->next_alive = NULL;
366 if (cl == q->active[prio]) {
367 q->active[prio] = cl_prev;
368 if (cl == q->active[prio]) {
369 q->active[prio] = NULL;
370 q->activemask &= ~(1<<prio);
376 } while ((cl_prev = cl) != q->active[prio]);
380 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
382 int toplevel = q->toplevel;
384 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
388 PSCHED_GET_TIME(now);
389 incr = PSCHED_TDIFF(now, q->now_rt);
390 PSCHED_TADD2(q->now, incr, now);
393 if (PSCHED_TLESS(cl->undertime, now)) {
394 q->toplevel = cl->level;
397 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
402 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
404 struct cbq_sched_data *q = qdisc_priv(sch);
407 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
409 #ifdef CONFIG_NET_CLS_POLICE
413 if (ret == NET_XMIT_BYPASS)
419 #ifdef CONFIG_NET_CLS_POLICE
420 cl->q->__parent = sch;
422 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
424 sch->bstats.packets++;
425 sch->bstats.bytes+=len;
426 cbq_mark_toplevel(q, cl);
428 cbq_activate_class(cl);
433 cbq_mark_toplevel(q, cl);
439 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
441 struct cbq_sched_data *q = qdisc_priv(sch);
442 struct cbq_class *cl;
445 if ((cl = q->tx_class) == NULL) {
452 cbq_mark_toplevel(q, cl);
454 #ifdef CONFIG_NET_CLS_POLICE
456 cl->q->__parent = sch;
458 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
460 sch->qstats.requeues++;
462 cbq_activate_class(cl);
470 /* Overlimit actions */
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
474 static void cbq_ovl_classic(struct cbq_class *cl)
476 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
480 delay += cl->offtime;
483 Class goes to sleep, so that it will have no
484 chance to work avgidle. Let's forgive it 8)
486 BTW cbq-2.0 has a crap in this
487 place, apparently they forgot to shift it by cl->ewma_log.
490 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491 if (cl->avgidle < cl->minidle)
492 cl->avgidle = cl->minidle;
495 PSCHED_TADD2(q->now, delay, cl->undertime);
497 cl->xstats.overactions++;
500 if (q->wd_expires == 0 || q->wd_expires > delay)
501 q->wd_expires = delay;
503 /* Dirty work! We must schedule wakeups based on
504 real available rate, rather than leaf rate,
505 which may be tiny (even zero).
507 if (q->toplevel == TC_CBQ_MAXLEVEL) {
509 psched_tdiff_t base_delay = q->wd_expires;
511 for (b = cl->borrow; b; b = b->borrow) {
512 delay = PSCHED_TDIFF(b->undertime, q->now);
513 if (delay < base_delay) {
520 q->wd_expires = base_delay;
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
528 static void cbq_ovl_rclassic(struct cbq_class *cl)
530 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531 struct cbq_class *this = cl;
534 if (cl->level > q->toplevel) {
538 } while ((cl = cl->borrow) != NULL);
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
547 static void cbq_ovl_delay(struct cbq_class *cl)
549 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
553 psched_time_t sched = q->now;
556 delay += cl->offtime;
558 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559 if (cl->avgidle < cl->minidle)
560 cl->avgidle = cl->minidle;
561 PSCHED_TADD2(q->now, delay, cl->undertime);
564 sched += delay + cl->penalty;
565 cl->penalized = sched;
566 cl->cpriority = TC_CBQ_MAXPRIO;
567 q->pmask |= (1<<TC_CBQ_MAXPRIO);
569 expires = ktime_set(0, 0);
570 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571 if (hrtimer_try_to_cancel(&q->delay_timer) &&
572 ktime_to_ns(ktime_sub(q->delay_timer.expires,
574 q->delay_timer.expires = expires;
575 hrtimer_restart(&q->delay_timer);
577 cl->xstats.overactions++;
582 if (q->wd_expires == 0 || q->wd_expires > delay)
583 q->wd_expires = delay;
586 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
588 static void cbq_ovl_lowprio(struct cbq_class *cl)
590 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
592 cl->penalized = q->now + cl->penalty;
594 if (cl->cpriority != cl->priority2) {
595 cl->cpriority = cl->priority2;
596 q->pmask |= (1<<cl->cpriority);
597 cl->xstats.overactions++;
602 /* TC_CBQ_OVL_DROP: penalize class by dropping */
604 static void cbq_ovl_drop(struct cbq_class *cl)
606 if (cl->q->ops->drop)
607 if (cl->q->ops->drop(cl->q))
609 cl->xstats.overactions++;
613 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
616 struct cbq_class *cl;
617 struct cbq_class *cl_prev = q->active[prio];
618 psched_time_t sched = now;
624 cl = cl_prev->next_alive;
625 if (now - cl->penalized > 0) {
626 cl_prev->next_alive = cl->next_alive;
627 cl->next_alive = NULL;
628 cl->cpriority = cl->priority;
630 cbq_activate_class(cl);
632 if (cl == q->active[prio]) {
633 q->active[prio] = cl_prev;
634 if (cl == q->active[prio]) {
635 q->active[prio] = NULL;
640 cl = cl_prev->next_alive;
641 } else if (sched - cl->penalized > 0)
642 sched = cl->penalized;
643 } while ((cl_prev = cl) != q->active[prio]);
648 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
650 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
652 struct Qdisc *sch = q->watchdog.qdisc;
654 psched_tdiff_t delay = 0;
657 PSCHED_GET_TIME(now);
663 int prio = ffz(~pmask);
668 tmp = cbq_undelay_prio(q, prio, now);
671 if (tmp < delay || delay == 0)
679 time = ktime_set(0, 0);
680 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
684 sch->flags &= ~TCQ_F_THROTTLED;
685 netif_schedule(sch->dev);
686 return HRTIMER_NORESTART;
690 #ifdef CONFIG_NET_CLS_POLICE
692 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
695 struct Qdisc *sch = child->__parent;
696 struct cbq_sched_data *q = qdisc_priv(sch);
697 struct cbq_class *cl = q->rx_class;
701 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
703 cbq_mark_toplevel(q, cl);
706 cl->q->__parent = sch;
708 if (cl->q->enqueue(skb, cl->q) == 0) {
710 sch->bstats.packets++;
711 sch->bstats.bytes+=len;
713 cbq_activate_class(cl);
726 It is mission critical procedure.
728 We "regenerate" toplevel cutoff, if transmitting class
729 has backlog and it is not regulated. It is not part of
730 original CBQ description, but looks more reasonable.
731 Probably, it is wrong. This question needs further investigation.
734 static __inline__ void
735 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736 struct cbq_class *borrowed)
738 if (cl && q->toplevel >= borrowed->level) {
739 if (cl->q->q.qlen > 1) {
741 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
742 q->toplevel = borrowed->level;
745 } while ((borrowed=borrowed->borrow) != NULL);
748 /* It is not necessary now. Uncommenting it
749 will save CPU cycles, but decrease fairness.
751 q->toplevel = TC_CBQ_MAXLEVEL;
757 cbq_update(struct cbq_sched_data *q)
759 struct cbq_class *this = q->tx_class;
760 struct cbq_class *cl = this;
765 for ( ; cl; cl = cl->share) {
766 long avgidle = cl->avgidle;
769 cl->bstats.packets++;
770 cl->bstats.bytes += len;
773 (now - last) is total time between packet right edges.
774 (last_pktlen/rate) is "virtual" busy time, so that
776 idle = (now - last) - last_pktlen/rate
779 idle = PSCHED_TDIFF(q->now, cl->last);
780 if ((unsigned long)idle > 128*1024*1024) {
781 avgidle = cl->maxidle;
783 idle -= L2T(cl, len);
785 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786 where W=2^{-ewma_log}. But cl->avgidle is scaled:
787 cl->avgidle == true_avgidle/W,
790 avgidle += idle - (avgidle>>cl->ewma_log);
794 /* Overlimit or at-limit */
796 if (avgidle < cl->minidle)
797 avgidle = cl->minidle;
799 cl->avgidle = avgidle;
801 /* Calculate expected time, when this class
802 will be allowed to send.
804 (1-W)*true_avgidle + W*delay = 0, i.e.
805 idle = (1/W - 1)*(-true_avgidle)
807 idle = (1 - W)*(-cl->avgidle);
809 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
813 To maintain the rate allocated to the class,
814 we add to undertime virtual clock,
815 necessary to complete transmitted packet.
816 (len/phys_bandwidth has been already passed
817 to the moment of cbq_update)
820 idle -= L2T(&q->link, len);
821 idle += L2T(cl, len);
823 PSCHED_AUDIT_TDIFF(idle);
825 PSCHED_TADD2(q->now, idle, cl->undertime);
829 PSCHED_SET_PASTPERFECT(cl->undertime);
830 if (avgidle > cl->maxidle)
831 cl->avgidle = cl->maxidle;
833 cl->avgidle = avgidle;
838 cbq_update_toplevel(q, this, q->tx_borrowed);
841 static __inline__ struct cbq_class *
842 cbq_under_limit(struct cbq_class *cl)
844 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
845 struct cbq_class *this_cl = cl;
847 if (cl->tparent == NULL)
850 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
851 !PSCHED_TLESS(q->now, cl->undertime)) {
857 /* It is very suspicious place. Now overlimit
858 action is generated for not bounded classes
859 only if link is completely congested.
860 Though it is in agree with ancestor-only paradigm,
861 it looks very stupid. Particularly,
862 it means that this chunk of code will either
863 never be called or result in strong amplification
864 of burstiness. Dangerous, silly, and, however,
865 no another solution exists.
867 if ((cl = cl->borrow) == NULL) {
868 this_cl->qstats.overlimits++;
869 this_cl->overlimit(this_cl);
872 if (cl->level > q->toplevel)
874 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
875 PSCHED_TLESS(q->now, cl->undertime));
881 static __inline__ struct sk_buff *
882 cbq_dequeue_prio(struct Qdisc *sch, int prio)
884 struct cbq_sched_data *q = qdisc_priv(sch);
885 struct cbq_class *cl_tail, *cl_prev, *cl;
889 cl_tail = cl_prev = q->active[prio];
890 cl = cl_prev->next_alive;
897 struct cbq_class *borrow = cl;
900 (borrow = cbq_under_limit(cl)) == NULL)
903 if (cl->deficit <= 0) {
904 /* Class exhausted its allotment per
905 this round. Switch to the next one.
908 cl->deficit += cl->quantum;
912 skb = cl->q->dequeue(cl->q);
914 /* Class did not give us any skb :-(
915 It could occur even if cl->q->q.qlen != 0
916 f.e. if cl->q == "tbf"
921 cl->deficit -= skb->len;
923 q->tx_borrowed = borrow;
925 #ifndef CBQ_XSTATS_BORROWS_BYTES
926 borrow->xstats.borrows++;
927 cl->xstats.borrows++;
929 borrow->xstats.borrows += skb->len;
930 cl->xstats.borrows += skb->len;
933 q->tx_len = skb->len;
935 if (cl->deficit <= 0) {
936 q->active[prio] = cl;
938 cl->deficit += cl->quantum;
943 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
944 /* Class is empty or penalized.
945 Unlink it from active chain.
947 cl_prev->next_alive = cl->next_alive;
948 cl->next_alive = NULL;
950 /* Did cl_tail point to it? */
955 /* Was it the last class in this band? */
958 q->active[prio] = NULL;
959 q->activemask &= ~(1<<prio);
961 cbq_activate_class(cl);
965 q->active[prio] = cl_tail;
968 cbq_activate_class(cl);
976 } while (cl_prev != cl_tail);
979 q->active[prio] = cl_prev;
984 static __inline__ struct sk_buff *
985 cbq_dequeue_1(struct Qdisc *sch)
987 struct cbq_sched_data *q = qdisc_priv(sch);
991 activemask = q->activemask&0xFF;
993 int prio = ffz(~activemask);
994 activemask &= ~(1<<prio);
995 skb = cbq_dequeue_prio(sch, prio);
1002 static struct sk_buff *
1003 cbq_dequeue(struct Qdisc *sch)
1005 struct sk_buff *skb;
1006 struct cbq_sched_data *q = qdisc_priv(sch);
1008 psched_tdiff_t incr;
1010 PSCHED_GET_TIME(now);
1011 incr = PSCHED_TDIFF(now, q->now_rt);
1014 psched_tdiff_t incr2;
1015 /* Time integrator. We calculate EOS time
1016 by adding expected packet transmission time.
1017 If real time is greater, we warp artificial clock,
1020 cbq_time = max(real_time, work);
1022 incr2 = L2T(&q->link, q->tx_len);
1023 PSCHED_TADD(q->now, incr2);
1025 if ((incr -= incr2) < 0)
1028 PSCHED_TADD(q->now, incr);
1034 skb = cbq_dequeue_1(sch);
1037 sch->flags &= ~TCQ_F_THROTTLED;
1041 /* All the classes are overlimit.
1045 1. Scheduler is empty.
1046 2. Toplevel cutoff inhibited borrowing.
1047 3. Root class is overlimit.
1049 Reset 2d and 3d conditions and retry.
1051 Note, that NS and cbq-2.0 are buggy, peeking
1052 an arbitrary class is appropriate for ancestor-only
1053 sharing, but not for toplevel algorithm.
1055 Our version is better, but slower, because it requires
1056 two passes, but it is unavoidable with top-level sharing.
1059 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1060 PSCHED_IS_PASTPERFECT(q->link.undertime))
1063 q->toplevel = TC_CBQ_MAXLEVEL;
1064 PSCHED_SET_PASTPERFECT(q->link.undertime);
1067 /* No packets in scheduler or nobody wants to give them to us :-(
1068 Sigh... start watchdog timer in the last case. */
1071 sch->qstats.overlimits++;
1073 qdisc_watchdog_schedule(&q->watchdog,
1074 now + q->wd_expires);
1079 /* CBQ class maintanance routines */
1081 static void cbq_adjust_levels(struct cbq_class *this)
1088 struct cbq_class *cl;
1090 if ((cl = this->children) != NULL) {
1092 if (cl->level > level)
1094 } while ((cl = cl->sibling) != this->children);
1096 this->level = level+1;
1097 } while ((this = this->tparent) != NULL);
1100 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1102 struct cbq_class *cl;
1105 if (q->quanta[prio] == 0)
1108 for (h=0; h<16; h++) {
1109 for (cl = q->classes[h]; cl; cl = cl->next) {
1110 /* BUGGGG... Beware! This expression suffer of
1111 arithmetic overflows!
1113 if (cl->priority == prio) {
1114 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1117 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1118 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1119 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1125 static void cbq_sync_defmap(struct cbq_class *cl)
1127 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1128 struct cbq_class *split = cl->split;
1135 for (i=0; i<=TC_PRIO_MAX; i++) {
1136 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1137 split->defaults[i] = NULL;
1140 for (i=0; i<=TC_PRIO_MAX; i++) {
1141 int level = split->level;
1143 if (split->defaults[i])
1146 for (h=0; h<16; h++) {
1147 struct cbq_class *c;
1149 for (c = q->classes[h]; c; c = c->next) {
1150 if (c->split == split && c->level < level &&
1152 split->defaults[i] = c;
1160 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1162 struct cbq_class *split = NULL;
1165 if ((split = cl->split) == NULL)
1167 splitid = split->classid;
1170 if (split == NULL || split->classid != splitid) {
1171 for (split = cl->tparent; split; split = split->tparent)
1172 if (split->classid == splitid)
1179 if (cl->split != split) {
1181 cbq_sync_defmap(cl);
1183 cl->defmap = def&mask;
1185 cl->defmap = (cl->defmap&~mask)|(def&mask);
1187 cbq_sync_defmap(cl);
1190 static void cbq_unlink_class(struct cbq_class *this)
1192 struct cbq_class *cl, **clp;
1193 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1195 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1203 if (this->tparent) {
1212 } while ((cl = *clp) != this->sibling);
1214 if (this->tparent->children == this) {
1215 this->tparent->children = this->sibling;
1216 if (this->sibling == this)
1217 this->tparent->children = NULL;
1220 BUG_TRAP(this->sibling == this);
1224 static void cbq_link_class(struct cbq_class *this)
1226 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1227 unsigned h = cbq_hash(this->classid);
1228 struct cbq_class *parent = this->tparent;
1230 this->sibling = this;
1231 this->next = q->classes[h];
1232 q->classes[h] = this;
1237 if (parent->children == NULL) {
1238 parent->children = this;
1240 this->sibling = parent->children->sibling;
1241 parent->children->sibling = this;
1245 static unsigned int cbq_drop(struct Qdisc* sch)
1247 struct cbq_sched_data *q = qdisc_priv(sch);
1248 struct cbq_class *cl, *cl_head;
1252 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1253 if ((cl_head = q->active[prio]) == NULL)
1258 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1261 cbq_deactivate_class(cl);
1264 } while ((cl = cl->next_alive) != cl_head);
1270 cbq_reset(struct Qdisc* sch)
1272 struct cbq_sched_data *q = qdisc_priv(sch);
1273 struct cbq_class *cl;
1280 q->tx_borrowed = NULL;
1281 qdisc_watchdog_cancel(&q->watchdog);
1282 hrtimer_cancel(&q->delay_timer);
1283 q->toplevel = TC_CBQ_MAXLEVEL;
1284 PSCHED_GET_TIME(q->now);
1287 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1288 q->active[prio] = NULL;
1290 for (h = 0; h < 16; h++) {
1291 for (cl = q->classes[h]; cl; cl = cl->next) {
1294 cl->next_alive = NULL;
1295 PSCHED_SET_PASTPERFECT(cl->undertime);
1296 cl->avgidle = cl->maxidle;
1297 cl->deficit = cl->quantum;
1298 cl->cpriority = cl->priority;
1305 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1307 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1308 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1309 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1311 if (lss->change&TCF_CBQ_LSS_EWMA)
1312 cl->ewma_log = lss->ewma_log;
1313 if (lss->change&TCF_CBQ_LSS_AVPKT)
1314 cl->avpkt = lss->avpkt;
1315 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1316 cl->minidle = -(long)lss->minidle;
1317 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1318 cl->maxidle = lss->maxidle;
1319 cl->avgidle = lss->maxidle;
1321 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1322 cl->offtime = lss->offtime;
1326 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1328 q->nclasses[cl->priority]--;
1329 q->quanta[cl->priority] -= cl->weight;
1330 cbq_normalize_quanta(q, cl->priority);
1333 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1335 q->nclasses[cl->priority]++;
1336 q->quanta[cl->priority] += cl->weight;
1337 cbq_normalize_quanta(q, cl->priority);
1340 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1342 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1345 cl->allot = wrr->allot;
1347 cl->weight = wrr->weight;
1348 if (wrr->priority) {
1349 cl->priority = wrr->priority-1;
1350 cl->cpriority = cl->priority;
1351 if (cl->priority >= cl->priority2)
1352 cl->priority2 = TC_CBQ_MAXPRIO-1;
1359 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1361 switch (ovl->strategy) {
1362 case TC_CBQ_OVL_CLASSIC:
1363 cl->overlimit = cbq_ovl_classic;
1365 case TC_CBQ_OVL_DELAY:
1366 cl->overlimit = cbq_ovl_delay;
1368 case TC_CBQ_OVL_LOWPRIO:
1369 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1370 ovl->priority2-1 <= cl->priority)
1372 cl->priority2 = ovl->priority2-1;
1373 cl->overlimit = cbq_ovl_lowprio;
1375 case TC_CBQ_OVL_DROP:
1376 cl->overlimit = cbq_ovl_drop;
1378 case TC_CBQ_OVL_RCLASSIC:
1379 cl->overlimit = cbq_ovl_rclassic;
1384 cl->penalty = ovl->penalty;
1388 #ifdef CONFIG_NET_CLS_POLICE
1389 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1391 cl->police = p->police;
1393 if (cl->q->handle) {
1394 if (p->police == TC_POLICE_RECLASSIFY)
1395 cl->q->reshape_fail = cbq_reshape_fail;
1397 cl->q->reshape_fail = NULL;
1403 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1405 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1409 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1411 struct cbq_sched_data *q = qdisc_priv(sch);
1412 struct rtattr *tb[TCA_CBQ_MAX];
1413 struct tc_ratespec *r;
1415 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1416 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1417 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1420 if (tb[TCA_CBQ_LSSOPT-1] &&
1421 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1424 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1426 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1430 q->link.sibling = &q->link;
1431 q->link.classid = sch->handle;
1432 q->link.qdisc = sch;
1433 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1435 q->link.q = &noop_qdisc;
1437 q->link.priority = TC_CBQ_MAXPRIO-1;
1438 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1439 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1440 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1441 q->link.overlimit = cbq_ovl_classic;
1442 q->link.allot = psched_mtu(sch->dev);
1443 q->link.quantum = q->link.allot;
1444 q->link.weight = q->link.R_tab->rate.rate;
1446 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1447 q->link.avpkt = q->link.allot/2;
1448 q->link.minidle = -0x7FFFFFFF;
1449 q->link.stats_lock = &sch->dev->queue_lock;
1451 qdisc_watchdog_init(&q->watchdog, sch);
1452 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1453 q->delay_timer.function = cbq_undelay;
1454 q->toplevel = TC_CBQ_MAXLEVEL;
1455 PSCHED_GET_TIME(q->now);
1458 cbq_link_class(&q->link);
1460 if (tb[TCA_CBQ_LSSOPT-1])
1461 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1463 cbq_addprio(q, &q->link);
1467 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1469 unsigned char *b = skb_tail_pointer(skb);
1471 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1479 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1481 unsigned char *b = skb_tail_pointer(skb);
1482 struct tc_cbq_lssopt opt;
1485 if (cl->borrow == NULL)
1486 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1487 if (cl->share == NULL)
1488 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1489 opt.ewma_log = cl->ewma_log;
1490 opt.level = cl->level;
1491 opt.avpkt = cl->avpkt;
1492 opt.maxidle = cl->maxidle;
1493 opt.minidle = (u32)(-cl->minidle);
1494 opt.offtime = cl->offtime;
1496 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1504 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1506 unsigned char *b = skb_tail_pointer(skb);
1507 struct tc_cbq_wrropt opt;
1510 opt.allot = cl->allot;
1511 opt.priority = cl->priority+1;
1512 opt.cpriority = cl->cpriority+1;
1513 opt.weight = cl->weight;
1514 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1522 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1524 unsigned char *b = skb_tail_pointer(skb);
1525 struct tc_cbq_ovl opt;
1527 opt.strategy = cl->ovl_strategy;
1528 opt.priority2 = cl->priority2+1;
1530 opt.penalty = cl->penalty;
1531 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1539 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541 unsigned char *b = skb_tail_pointer(skb);
1542 struct tc_cbq_fopt opt;
1544 if (cl->split || cl->defmap) {
1545 opt.split = cl->split ? cl->split->classid : 0;
1546 opt.defmap = cl->defmap;
1548 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1557 #ifdef CONFIG_NET_CLS_POLICE
1558 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560 unsigned char *b = skb_tail_pointer(skb);
1561 struct tc_cbq_police opt;
1564 opt.police = cl->police;
1567 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1577 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1579 if (cbq_dump_lss(skb, cl) < 0 ||
1580 cbq_dump_rate(skb, cl) < 0 ||
1581 cbq_dump_wrr(skb, cl) < 0 ||
1582 cbq_dump_ovl(skb, cl) < 0 ||
1583 #ifdef CONFIG_NET_CLS_POLICE
1584 cbq_dump_police(skb, cl) < 0 ||
1586 cbq_dump_fopt(skb, cl) < 0)
1591 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593 struct cbq_sched_data *q = qdisc_priv(sch);
1594 unsigned char *b = skb_tail_pointer(skb);
1597 rta = (struct rtattr*)b;
1598 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1599 if (cbq_dump_attr(skb, &q->link) < 0)
1600 goto rtattr_failure;
1601 rta->rta_len = skb_tail_pointer(skb) - b;
1610 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1612 struct cbq_sched_data *q = qdisc_priv(sch);
1614 q->link.xstats.avgidle = q->link.avgidle;
1615 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1619 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1620 struct sk_buff *skb, struct tcmsg *tcm)
1622 struct cbq_class *cl = (struct cbq_class*)arg;
1623 unsigned char *b = skb_tail_pointer(skb);
1627 tcm->tcm_parent = cl->tparent->classid;
1629 tcm->tcm_parent = TC_H_ROOT;
1630 tcm->tcm_handle = cl->classid;
1631 tcm->tcm_info = cl->q->handle;
1633 rta = (struct rtattr*)b;
1634 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1635 if (cbq_dump_attr(skb, cl) < 0)
1636 goto rtattr_failure;
1637 rta->rta_len = skb_tail_pointer(skb) - b;
1646 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1647 struct gnet_dump *d)
1649 struct cbq_sched_data *q = qdisc_priv(sch);
1650 struct cbq_class *cl = (struct cbq_class*)arg;
1652 cl->qstats.qlen = cl->q->q.qlen;
1653 cl->xstats.avgidle = cl->avgidle;
1654 cl->xstats.undertime = 0;
1656 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1657 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1659 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1660 #ifdef CONFIG_NET_ESTIMATOR
1661 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1663 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1666 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1669 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1672 struct cbq_class *cl = (struct cbq_class*)arg;
1676 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1677 cl->classid)) == NULL)
1680 #ifdef CONFIG_NET_CLS_POLICE
1681 if (cl->police == TC_POLICE_RECLASSIFY)
1682 new->reshape_fail = cbq_reshape_fail;
1686 *old = xchg(&cl->q, new);
1687 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1689 sch_tree_unlock(sch);
1696 static struct Qdisc *
1697 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1699 struct cbq_class *cl = (struct cbq_class*)arg;
1701 return cl ? cl->q : NULL;
1704 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1706 struct cbq_class *cl = (struct cbq_class *)arg;
1708 if (cl->q->q.qlen == 0)
1709 cbq_deactivate_class(cl);
1712 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1714 struct cbq_sched_data *q = qdisc_priv(sch);
1715 struct cbq_class *cl = cbq_class_lookup(q, classid);
1719 return (unsigned long)cl;
1724 static void cbq_destroy_filters(struct cbq_class *cl)
1726 struct tcf_proto *tp;
1728 while ((tp = cl->filter_list) != NULL) {
1729 cl->filter_list = tp->next;
1734 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1736 struct cbq_sched_data *q = qdisc_priv(sch);
1738 BUG_TRAP(!cl->filters);
1740 cbq_destroy_filters(cl);
1741 qdisc_destroy(cl->q);
1742 qdisc_put_rtab(cl->R_tab);
1743 #ifdef CONFIG_NET_ESTIMATOR
1744 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1751 cbq_destroy(struct Qdisc* sch)
1753 struct cbq_sched_data *q = qdisc_priv(sch);
1754 struct cbq_class *cl;
1757 #ifdef CONFIG_NET_CLS_POLICE
1761 * Filters must be destroyed first because we don't destroy the
1762 * classes from root to leafs which means that filters can still
1763 * be bound to classes which have been destroyed already. --TGR '04
1765 for (h = 0; h < 16; h++)
1766 for (cl = q->classes[h]; cl; cl = cl->next)
1767 cbq_destroy_filters(cl);
1769 for (h = 0; h < 16; h++) {
1770 struct cbq_class *next;
1772 for (cl = q->classes[h]; cl; cl = next) {
1774 cbq_destroy_class(sch, cl);
1779 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1781 struct cbq_class *cl = (struct cbq_class*)arg;
1783 if (--cl->refcnt == 0) {
1784 #ifdef CONFIG_NET_CLS_POLICE
1785 struct cbq_sched_data *q = qdisc_priv(sch);
1787 spin_lock_bh(&sch->dev->queue_lock);
1788 if (q->rx_class == cl)
1790 spin_unlock_bh(&sch->dev->queue_lock);
1793 cbq_destroy_class(sch, cl);
1798 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1802 struct cbq_sched_data *q = qdisc_priv(sch);
1803 struct cbq_class *cl = (struct cbq_class*)*arg;
1804 struct rtattr *opt = tca[TCA_OPTIONS-1];
1805 struct rtattr *tb[TCA_CBQ_MAX];
1806 struct cbq_class *parent;
1807 struct qdisc_rate_table *rtab = NULL;
1809 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1812 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1813 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1816 if (tb[TCA_CBQ_FOPT-1] &&
1817 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1820 if (tb[TCA_CBQ_RATE-1] &&
1821 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1824 if (tb[TCA_CBQ_LSSOPT-1] &&
1825 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1828 if (tb[TCA_CBQ_WRROPT-1] &&
1829 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1832 #ifdef CONFIG_NET_CLS_POLICE
1833 if (tb[TCA_CBQ_POLICE-1] &&
1834 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1841 if (cl->tparent && cl->tparent->classid != parentid)
1843 if (!cl->tparent && parentid != TC_H_ROOT)
1847 if (tb[TCA_CBQ_RATE-1]) {
1848 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1853 /* Change class parameters */
1856 if (cl->next_alive != NULL)
1857 cbq_deactivate_class(cl);
1860 rtab = xchg(&cl->R_tab, rtab);
1861 qdisc_put_rtab(rtab);
1864 if (tb[TCA_CBQ_LSSOPT-1])
1865 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1867 if (tb[TCA_CBQ_WRROPT-1]) {
1869 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1872 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1873 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1875 #ifdef CONFIG_NET_CLS_POLICE
1876 if (tb[TCA_CBQ_POLICE-1])
1877 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1880 if (tb[TCA_CBQ_FOPT-1])
1881 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1884 cbq_activate_class(cl);
1886 sch_tree_unlock(sch);
1888 #ifdef CONFIG_NET_ESTIMATOR
1889 if (tca[TCA_RATE-1])
1890 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1891 cl->stats_lock, tca[TCA_RATE-1]);
1896 if (parentid == TC_H_ROOT)
1899 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1900 tb[TCA_CBQ_LSSOPT-1] == NULL)
1903 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1909 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1913 classid = TC_H_MAKE(sch->handle,0x8000);
1915 for (i=0; i<0x8000; i++) {
1916 if (++q->hgenerator >= 0x8000)
1918 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1924 classid = classid|q->hgenerator;
1929 parent = cbq_class_lookup(q, parentid);
1936 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1942 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1943 cl->q = &noop_qdisc;
1944 cl->classid = classid;
1945 cl->tparent = parent;
1947 cl->allot = parent->allot;
1948 cl->quantum = cl->allot;
1949 cl->weight = cl->R_tab->rate.rate;
1950 cl->stats_lock = &sch->dev->queue_lock;
1954 cl->borrow = cl->tparent;
1955 if (cl->tparent != &q->link)
1956 cl->share = cl->tparent;
1957 cbq_adjust_levels(parent);
1958 cl->minidle = -0x7FFFFFFF;
1959 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1960 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1961 if (cl->ewma_log==0)
1962 cl->ewma_log = q->link.ewma_log;
1964 cl->maxidle = q->link.maxidle;
1966 cl->avpkt = q->link.avpkt;
1967 cl->overlimit = cbq_ovl_classic;
1968 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1969 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1970 #ifdef CONFIG_NET_CLS_POLICE
1971 if (tb[TCA_CBQ_POLICE-1])
1972 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1974 if (tb[TCA_CBQ_FOPT-1])
1975 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1976 sch_tree_unlock(sch);
1978 #ifdef CONFIG_NET_ESTIMATOR
1979 if (tca[TCA_RATE-1])
1980 gen_new_estimator(&cl->bstats, &cl->rate_est,
1981 cl->stats_lock, tca[TCA_RATE-1]);
1984 *arg = (unsigned long)cl;
1988 qdisc_put_rtab(rtab);
1992 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1994 struct cbq_sched_data *q = qdisc_priv(sch);
1995 struct cbq_class *cl = (struct cbq_class*)arg;
1998 if (cl->filters || cl->children || cl == &q->link)
2003 qlen = cl->q->q.qlen;
2005 qdisc_tree_decrease_qlen(cl->q, qlen);
2008 cbq_deactivate_class(cl);
2010 if (q->tx_borrowed == cl)
2011 q->tx_borrowed = q->tx_class;
2012 if (q->tx_class == cl) {
2014 q->tx_borrowed = NULL;
2016 #ifdef CONFIG_NET_CLS_POLICE
2017 if (q->rx_class == cl)
2021 cbq_unlink_class(cl);
2022 cbq_adjust_levels(cl->tparent);
2024 cbq_sync_defmap(cl);
2027 sch_tree_unlock(sch);
2029 if (--cl->refcnt == 0)
2030 cbq_destroy_class(sch, cl);
2035 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2037 struct cbq_sched_data *q = qdisc_priv(sch);
2038 struct cbq_class *cl = (struct cbq_class *)arg;
2043 return &cl->filter_list;
2046 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2049 struct cbq_sched_data *q = qdisc_priv(sch);
2050 struct cbq_class *p = (struct cbq_class*)parent;
2051 struct cbq_class *cl = cbq_class_lookup(q, classid);
2054 if (p && p->level <= cl->level)
2057 return (unsigned long)cl;
2062 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2064 struct cbq_class *cl = (struct cbq_class*)arg;
2069 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2071 struct cbq_sched_data *q = qdisc_priv(sch);
2077 for (h = 0; h < 16; h++) {
2078 struct cbq_class *cl;
2080 for (cl = q->classes[h]; cl; cl = cl->next) {
2081 if (arg->count < arg->skip) {
2085 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2094 static struct Qdisc_class_ops cbq_class_ops = {
2097 .qlen_notify = cbq_qlen_notify,
2100 .change = cbq_change_class,
2101 .delete = cbq_delete,
2103 .tcf_chain = cbq_find_tcf,
2104 .bind_tcf = cbq_bind_filter,
2105 .unbind_tcf = cbq_unbind_filter,
2106 .dump = cbq_dump_class,
2107 .dump_stats = cbq_dump_class_stats,
2110 static struct Qdisc_ops cbq_qdisc_ops = {
2112 .cl_ops = &cbq_class_ops,
2114 .priv_size = sizeof(struct cbq_sched_data),
2115 .enqueue = cbq_enqueue,
2116 .dequeue = cbq_dequeue,
2117 .requeue = cbq_requeue,
2121 .destroy = cbq_destroy,
2124 .dump_stats = cbq_dump_stats,
2125 .owner = THIS_MODULE,
2128 static int __init cbq_module_init(void)
2130 return register_qdisc(&cbq_qdisc_ops);
2132 static void __exit cbq_module_exit(void)
2134 unregister_qdisc(&cbq_qdisc_ops);
2136 module_init(cbq_module_init)
2137 module_exit(cbq_module_exit)
2138 MODULE_LICENSE("GPL");