2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct Qdisc_class_common common;
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
95 struct qdisc_rate_table *R_tab;
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
99 psched_tdiff_t penalty;
101 /* General scheduler (WRR) parameters */
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
115 struct Qdisc *q; /* Elementary queueing discipline */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
129 long deficit; /* Saved deficit for WRR */
130 psched_time_t penalized;
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
134 struct tc_cbq_xstats xstats;
136 struct tcf_proto *filter_list;
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
150 struct cbq_class link;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class *rx_class;
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
166 struct hrtimer delay_timer;
167 struct qdisc_watchdog watchdog; /* Watchdog timer,
171 psched_tdiff_t wd_expires;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__ struct cbq_class *
180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182 struct Qdisc_class_common *clc;
184 clc = qdisc_class_find(&q->clhash, classid);
187 return container_of(clc, struct cbq_class, common);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class *
193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195 struct cbq_class *cl, *new;
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
236 defmap = head->defaults;
239 * Step 2+n. Apply classifier.
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
251 if (cl == NULL || cl->level >= head->level)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
311 q->activemask |= (1<<prio);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class *this)
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
329 cl = cl_prev->next_alive;
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
344 } while ((cl_prev = cl) != q->active[prio]);
348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350 int toplevel = q->toplevel;
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
356 now = psched_get_time();
357 incr = now - q->now_rt;
361 if (cl->undertime < now) {
362 q->toplevel = cl->level;
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372 struct cbq_sched_data *q = qdisc_priv(sch);
373 int uninitialized_var(ret);
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
376 #ifdef CONFIG_NET_CLS_ACT
380 if (ret & __NET_XMIT_BYPASS)
386 #ifdef CONFIG_NET_CLS_ACT
387 cl->q->__parent = sch;
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
392 sch->bstats.packets++;
393 sch->bstats.bytes += qdisc_pkt_len(skb);
394 cbq_mark_toplevel(q, cl);
396 cbq_activate_class(cl);
400 if (net_xmit_drop_count(ret)) {
402 cbq_mark_toplevel(q, cl);
409 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
411 struct cbq_sched_data *q = qdisc_priv(sch);
412 struct cbq_class *cl;
415 if ((cl = q->tx_class) == NULL) {
422 cbq_mark_toplevel(q, cl);
424 #ifdef CONFIG_NET_CLS_ACT
426 cl->q->__parent = sch;
428 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
430 sch->qstats.requeues++;
432 cbq_activate_class(cl);
435 if (net_xmit_drop_count(ret)) {
442 /* Overlimit actions */
444 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
446 static void cbq_ovl_classic(struct cbq_class *cl)
448 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
449 psched_tdiff_t delay = cl->undertime - q->now;
452 delay += cl->offtime;
455 Class goes to sleep, so that it will have no
456 chance to work avgidle. Let's forgive it 8)
458 BTW cbq-2.0 has a crap in this
459 place, apparently they forgot to shift it by cl->ewma_log.
462 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
463 if (cl->avgidle < cl->minidle)
464 cl->avgidle = cl->minidle;
467 cl->undertime = q->now + delay;
469 cl->xstats.overactions++;
472 if (q->wd_expires == 0 || q->wd_expires > delay)
473 q->wd_expires = delay;
475 /* Dirty work! We must schedule wakeups based on
476 real available rate, rather than leaf rate,
477 which may be tiny (even zero).
479 if (q->toplevel == TC_CBQ_MAXLEVEL) {
481 psched_tdiff_t base_delay = q->wd_expires;
483 for (b = cl->borrow; b; b = b->borrow) {
484 delay = b->undertime - q->now;
485 if (delay < base_delay) {
492 q->wd_expires = base_delay;
496 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
500 static void cbq_ovl_rclassic(struct cbq_class *cl)
502 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
503 struct cbq_class *this = cl;
506 if (cl->level > q->toplevel) {
510 } while ((cl = cl->borrow) != NULL);
517 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
519 static void cbq_ovl_delay(struct cbq_class *cl)
521 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
522 psched_tdiff_t delay = cl->undertime - q->now;
524 if (test_bit(__QDISC_STATE_DEACTIVATED,
525 &qdisc_root_sleeping(cl->qdisc)->state))
529 psched_time_t sched = q->now;
532 delay += cl->offtime;
534 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
535 if (cl->avgidle < cl->minidle)
536 cl->avgidle = cl->minidle;
537 cl->undertime = q->now + delay;
540 sched += delay + cl->penalty;
541 cl->penalized = sched;
542 cl->cpriority = TC_CBQ_MAXPRIO;
543 q->pmask |= (1<<TC_CBQ_MAXPRIO);
545 expires = ktime_set(0, 0);
546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
547 if (hrtimer_try_to_cancel(&q->delay_timer) &&
548 ktime_to_ns(ktime_sub(q->delay_timer.expires,
550 q->delay_timer.expires = expires;
551 hrtimer_restart(&q->delay_timer);
553 cl->xstats.overactions++;
558 if (q->wd_expires == 0 || q->wd_expires > delay)
559 q->wd_expires = delay;
562 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
564 static void cbq_ovl_lowprio(struct cbq_class *cl)
566 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
568 cl->penalized = q->now + cl->penalty;
570 if (cl->cpriority != cl->priority2) {
571 cl->cpriority = cl->priority2;
572 q->pmask |= (1<<cl->cpriority);
573 cl->xstats.overactions++;
578 /* TC_CBQ_OVL_DROP: penalize class by dropping */
580 static void cbq_ovl_drop(struct cbq_class *cl)
582 if (cl->q->ops->drop)
583 if (cl->q->ops->drop(cl->q))
585 cl->xstats.overactions++;
589 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
592 struct cbq_class *cl;
593 struct cbq_class *cl_prev = q->active[prio];
594 psched_time_t sched = now;
600 cl = cl_prev->next_alive;
601 if (now - cl->penalized > 0) {
602 cl_prev->next_alive = cl->next_alive;
603 cl->next_alive = NULL;
604 cl->cpriority = cl->priority;
606 cbq_activate_class(cl);
608 if (cl == q->active[prio]) {
609 q->active[prio] = cl_prev;
610 if (cl == q->active[prio]) {
611 q->active[prio] = NULL;
616 cl = cl_prev->next_alive;
617 } else if (sched - cl->penalized > 0)
618 sched = cl->penalized;
619 } while ((cl_prev = cl) != q->active[prio]);
624 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
626 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
628 struct Qdisc *sch = q->watchdog.qdisc;
630 psched_tdiff_t delay = 0;
633 now = psched_get_time();
639 int prio = ffz(~pmask);
644 tmp = cbq_undelay_prio(q, prio, now);
647 if (tmp < delay || delay == 0)
655 time = ktime_set(0, 0);
656 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
657 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
660 sch->flags &= ~TCQ_F_THROTTLED;
661 __netif_schedule(qdisc_root(sch));
662 return HRTIMER_NORESTART;
665 #ifdef CONFIG_NET_CLS_ACT
666 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
668 struct Qdisc *sch = child->__parent;
669 struct cbq_sched_data *q = qdisc_priv(sch);
670 struct cbq_class *cl = q->rx_class;
674 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
677 cbq_mark_toplevel(q, cl);
680 cl->q->__parent = sch;
682 ret = qdisc_enqueue(skb, cl->q);
683 if (ret == NET_XMIT_SUCCESS) {
685 sch->bstats.packets++;
686 sch->bstats.bytes += qdisc_pkt_len(skb);
688 cbq_activate_class(cl);
691 if (net_xmit_drop_count(ret))
702 It is mission critical procedure.
704 We "regenerate" toplevel cutoff, if transmitting class
705 has backlog and it is not regulated. It is not part of
706 original CBQ description, but looks more reasonable.
707 Probably, it is wrong. This question needs further investigation.
710 static __inline__ void
711 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
712 struct cbq_class *borrowed)
714 if (cl && q->toplevel >= borrowed->level) {
715 if (cl->q->q.qlen > 1) {
717 if (borrowed->undertime == PSCHED_PASTPERFECT) {
718 q->toplevel = borrowed->level;
721 } while ((borrowed=borrowed->borrow) != NULL);
724 /* It is not necessary now. Uncommenting it
725 will save CPU cycles, but decrease fairness.
727 q->toplevel = TC_CBQ_MAXLEVEL;
733 cbq_update(struct cbq_sched_data *q)
735 struct cbq_class *this = q->tx_class;
736 struct cbq_class *cl = this;
741 for ( ; cl; cl = cl->share) {
742 long avgidle = cl->avgidle;
745 cl->bstats.packets++;
746 cl->bstats.bytes += len;
749 (now - last) is total time between packet right edges.
750 (last_pktlen/rate) is "virtual" busy time, so that
752 idle = (now - last) - last_pktlen/rate
755 idle = q->now - cl->last;
756 if ((unsigned long)idle > 128*1024*1024) {
757 avgidle = cl->maxidle;
759 idle -= L2T(cl, len);
761 /* true_avgidle := (1-W)*true_avgidle + W*idle,
762 where W=2^{-ewma_log}. But cl->avgidle is scaled:
763 cl->avgidle == true_avgidle/W,
766 avgidle += idle - (avgidle>>cl->ewma_log);
770 /* Overlimit or at-limit */
772 if (avgidle < cl->minidle)
773 avgidle = cl->minidle;
775 cl->avgidle = avgidle;
777 /* Calculate expected time, when this class
778 will be allowed to send.
780 (1-W)*true_avgidle + W*delay = 0, i.e.
781 idle = (1/W - 1)*(-true_avgidle)
783 idle = (1 - W)*(-cl->avgidle);
785 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
789 To maintain the rate allocated to the class,
790 we add to undertime virtual clock,
791 necessary to complete transmitted packet.
792 (len/phys_bandwidth has been already passed
793 to the moment of cbq_update)
796 idle -= L2T(&q->link, len);
797 idle += L2T(cl, len);
799 cl->undertime = q->now + idle;
803 cl->undertime = PSCHED_PASTPERFECT;
804 if (avgidle > cl->maxidle)
805 cl->avgidle = cl->maxidle;
807 cl->avgidle = avgidle;
812 cbq_update_toplevel(q, this, q->tx_borrowed);
815 static __inline__ struct cbq_class *
816 cbq_under_limit(struct cbq_class *cl)
818 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
819 struct cbq_class *this_cl = cl;
821 if (cl->tparent == NULL)
824 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
830 /* It is very suspicious place. Now overlimit
831 action is generated for not bounded classes
832 only if link is completely congested.
833 Though it is in agree with ancestor-only paradigm,
834 it looks very stupid. Particularly,
835 it means that this chunk of code will either
836 never be called or result in strong amplification
837 of burstiness. Dangerous, silly, and, however,
838 no another solution exists.
840 if ((cl = cl->borrow) == NULL) {
841 this_cl->qstats.overlimits++;
842 this_cl->overlimit(this_cl);
845 if (cl->level > q->toplevel)
847 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
853 static __inline__ struct sk_buff *
854 cbq_dequeue_prio(struct Qdisc *sch, int prio)
856 struct cbq_sched_data *q = qdisc_priv(sch);
857 struct cbq_class *cl_tail, *cl_prev, *cl;
861 cl_tail = cl_prev = q->active[prio];
862 cl = cl_prev->next_alive;
869 struct cbq_class *borrow = cl;
872 (borrow = cbq_under_limit(cl)) == NULL)
875 if (cl->deficit <= 0) {
876 /* Class exhausted its allotment per
877 this round. Switch to the next one.
880 cl->deficit += cl->quantum;
884 skb = cl->q->dequeue(cl->q);
886 /* Class did not give us any skb :-(
887 It could occur even if cl->q->q.qlen != 0
888 f.e. if cl->q == "tbf"
893 cl->deficit -= qdisc_pkt_len(skb);
895 q->tx_borrowed = borrow;
897 #ifndef CBQ_XSTATS_BORROWS_BYTES
898 borrow->xstats.borrows++;
899 cl->xstats.borrows++;
901 borrow->xstats.borrows += qdisc_pkt_len(skb);
902 cl->xstats.borrows += qdisc_pkt_len(skb);
905 q->tx_len = qdisc_pkt_len(skb);
907 if (cl->deficit <= 0) {
908 q->active[prio] = cl;
910 cl->deficit += cl->quantum;
915 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
916 /* Class is empty or penalized.
917 Unlink it from active chain.
919 cl_prev->next_alive = cl->next_alive;
920 cl->next_alive = NULL;
922 /* Did cl_tail point to it? */
927 /* Was it the last class in this band? */
930 q->active[prio] = NULL;
931 q->activemask &= ~(1<<prio);
933 cbq_activate_class(cl);
937 q->active[prio] = cl_tail;
940 cbq_activate_class(cl);
948 } while (cl_prev != cl_tail);
951 q->active[prio] = cl_prev;
956 static __inline__ struct sk_buff *
957 cbq_dequeue_1(struct Qdisc *sch)
959 struct cbq_sched_data *q = qdisc_priv(sch);
963 activemask = q->activemask&0xFF;
965 int prio = ffz(~activemask);
966 activemask &= ~(1<<prio);
967 skb = cbq_dequeue_prio(sch, prio);
974 static struct sk_buff *
975 cbq_dequeue(struct Qdisc *sch)
978 struct cbq_sched_data *q = qdisc_priv(sch);
982 now = psched_get_time();
983 incr = now - q->now_rt;
986 psched_tdiff_t incr2;
987 /* Time integrator. We calculate EOS time
988 by adding expected packet transmission time.
989 If real time is greater, we warp artificial clock,
992 cbq_time = max(real_time, work);
994 incr2 = L2T(&q->link, q->tx_len);
997 if ((incr -= incr2) < 0)
1006 skb = cbq_dequeue_1(sch);
1009 sch->flags &= ~TCQ_F_THROTTLED;
1013 /* All the classes are overlimit.
1017 1. Scheduler is empty.
1018 2. Toplevel cutoff inhibited borrowing.
1019 3. Root class is overlimit.
1021 Reset 2d and 3d conditions and retry.
1023 Note, that NS and cbq-2.0 are buggy, peeking
1024 an arbitrary class is appropriate for ancestor-only
1025 sharing, but not for toplevel algorithm.
1027 Our version is better, but slower, because it requires
1028 two passes, but it is unavoidable with top-level sharing.
1031 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1032 q->link.undertime == PSCHED_PASTPERFECT)
1035 q->toplevel = TC_CBQ_MAXLEVEL;
1036 q->link.undertime = PSCHED_PASTPERFECT;
1039 /* No packets in scheduler or nobody wants to give them to us :-(
1040 Sigh... start watchdog timer in the last case. */
1043 sch->qstats.overlimits++;
1045 qdisc_watchdog_schedule(&q->watchdog,
1046 now + q->wd_expires);
1051 /* CBQ class maintanance routines */
1053 static void cbq_adjust_levels(struct cbq_class *this)
1060 struct cbq_class *cl;
1062 if ((cl = this->children) != NULL) {
1064 if (cl->level > level)
1066 } while ((cl = cl->sibling) != this->children);
1068 this->level = level+1;
1069 } while ((this = this->tparent) != NULL);
1072 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1074 struct cbq_class *cl;
1075 struct hlist_node *n;
1078 if (q->quanta[prio] == 0)
1081 for (h = 0; h < q->clhash.hashsize; h++) {
1082 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1083 /* BUGGGG... Beware! This expression suffer of
1084 arithmetic overflows!
1086 if (cl->priority == prio) {
1087 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1090 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1091 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1092 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1098 static void cbq_sync_defmap(struct cbq_class *cl)
1100 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1101 struct cbq_class *split = cl->split;
1108 for (i=0; i<=TC_PRIO_MAX; i++) {
1109 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1110 split->defaults[i] = NULL;
1113 for (i=0; i<=TC_PRIO_MAX; i++) {
1114 int level = split->level;
1116 if (split->defaults[i])
1119 for (h = 0; h < q->clhash.hashsize; h++) {
1120 struct hlist_node *n;
1121 struct cbq_class *c;
1123 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1125 if (c->split == split && c->level < level &&
1127 split->defaults[i] = c;
1135 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1137 struct cbq_class *split = NULL;
1140 if ((split = cl->split) == NULL)
1142 splitid = split->common.classid;
1145 if (split == NULL || split->common.classid != splitid) {
1146 for (split = cl->tparent; split; split = split->tparent)
1147 if (split->common.classid == splitid)
1154 if (cl->split != split) {
1156 cbq_sync_defmap(cl);
1158 cl->defmap = def&mask;
1160 cl->defmap = (cl->defmap&~mask)|(def&mask);
1162 cbq_sync_defmap(cl);
1165 static void cbq_unlink_class(struct cbq_class *this)
1167 struct cbq_class *cl, **clp;
1168 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1170 qdisc_class_hash_remove(&q->clhash, &this->common);
1172 if (this->tparent) {
1181 } while ((cl = *clp) != this->sibling);
1183 if (this->tparent->children == this) {
1184 this->tparent->children = this->sibling;
1185 if (this->sibling == this)
1186 this->tparent->children = NULL;
1189 WARN_ON(this->sibling != this);
1193 static void cbq_link_class(struct cbq_class *this)
1195 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1196 struct cbq_class *parent = this->tparent;
1198 this->sibling = this;
1199 qdisc_class_hash_insert(&q->clhash, &this->common);
1204 if (parent->children == NULL) {
1205 parent->children = this;
1207 this->sibling = parent->children->sibling;
1208 parent->children->sibling = this;
1212 static unsigned int cbq_drop(struct Qdisc* sch)
1214 struct cbq_sched_data *q = qdisc_priv(sch);
1215 struct cbq_class *cl, *cl_head;
1219 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1220 if ((cl_head = q->active[prio]) == NULL)
1225 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1228 cbq_deactivate_class(cl);
1231 } while ((cl = cl->next_alive) != cl_head);
1237 cbq_reset(struct Qdisc* sch)
1239 struct cbq_sched_data *q = qdisc_priv(sch);
1240 struct cbq_class *cl;
1241 struct hlist_node *n;
1248 q->tx_borrowed = NULL;
1249 qdisc_watchdog_cancel(&q->watchdog);
1250 hrtimer_cancel(&q->delay_timer);
1251 q->toplevel = TC_CBQ_MAXLEVEL;
1252 q->now = psched_get_time();
1255 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1256 q->active[prio] = NULL;
1258 for (h = 0; h < q->clhash.hashsize; h++) {
1259 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1262 cl->next_alive = NULL;
1263 cl->undertime = PSCHED_PASTPERFECT;
1264 cl->avgidle = cl->maxidle;
1265 cl->deficit = cl->quantum;
1266 cl->cpriority = cl->priority;
1273 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1275 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1276 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1277 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1279 if (lss->change&TCF_CBQ_LSS_EWMA)
1280 cl->ewma_log = lss->ewma_log;
1281 if (lss->change&TCF_CBQ_LSS_AVPKT)
1282 cl->avpkt = lss->avpkt;
1283 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1284 cl->minidle = -(long)lss->minidle;
1285 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1286 cl->maxidle = lss->maxidle;
1287 cl->avgidle = lss->maxidle;
1289 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1290 cl->offtime = lss->offtime;
1294 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1296 q->nclasses[cl->priority]--;
1297 q->quanta[cl->priority] -= cl->weight;
1298 cbq_normalize_quanta(q, cl->priority);
1301 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1303 q->nclasses[cl->priority]++;
1304 q->quanta[cl->priority] += cl->weight;
1305 cbq_normalize_quanta(q, cl->priority);
1308 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1310 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1313 cl->allot = wrr->allot;
1315 cl->weight = wrr->weight;
1316 if (wrr->priority) {
1317 cl->priority = wrr->priority-1;
1318 cl->cpriority = cl->priority;
1319 if (cl->priority >= cl->priority2)
1320 cl->priority2 = TC_CBQ_MAXPRIO-1;
1327 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1329 switch (ovl->strategy) {
1330 case TC_CBQ_OVL_CLASSIC:
1331 cl->overlimit = cbq_ovl_classic;
1333 case TC_CBQ_OVL_DELAY:
1334 cl->overlimit = cbq_ovl_delay;
1336 case TC_CBQ_OVL_LOWPRIO:
1337 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1338 ovl->priority2-1 <= cl->priority)
1340 cl->priority2 = ovl->priority2-1;
1341 cl->overlimit = cbq_ovl_lowprio;
1343 case TC_CBQ_OVL_DROP:
1344 cl->overlimit = cbq_ovl_drop;
1346 case TC_CBQ_OVL_RCLASSIC:
1347 cl->overlimit = cbq_ovl_rclassic;
1352 cl->penalty = ovl->penalty;
1356 #ifdef CONFIG_NET_CLS_ACT
1357 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1359 cl->police = p->police;
1361 if (cl->q->handle) {
1362 if (p->police == TC_POLICE_RECLASSIFY)
1363 cl->q->reshape_fail = cbq_reshape_fail;
1365 cl->q->reshape_fail = NULL;
1371 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1373 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1377 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1378 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1379 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1380 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1381 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1382 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1383 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1384 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1387 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1389 struct cbq_sched_data *q = qdisc_priv(sch);
1390 struct nlattr *tb[TCA_CBQ_MAX + 1];
1391 struct tc_ratespec *r;
1394 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1398 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1401 r = nla_data(tb[TCA_CBQ_RATE]);
1403 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1406 err = qdisc_class_hash_init(&q->clhash);
1411 q->link.sibling = &q->link;
1412 q->link.common.classid = sch->handle;
1413 q->link.qdisc = sch;
1414 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1417 q->link.q = &noop_qdisc;
1419 q->link.priority = TC_CBQ_MAXPRIO-1;
1420 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423 q->link.overlimit = cbq_ovl_classic;
1424 q->link.allot = psched_mtu(qdisc_dev(sch));
1425 q->link.quantum = q->link.allot;
1426 q->link.weight = q->link.R_tab->rate.rate;
1428 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429 q->link.avpkt = q->link.allot/2;
1430 q->link.minidle = -0x7FFFFFFF;
1432 qdisc_watchdog_init(&q->watchdog, sch);
1433 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1434 q->delay_timer.function = cbq_undelay;
1435 q->toplevel = TC_CBQ_MAXLEVEL;
1436 q->now = psched_get_time();
1439 cbq_link_class(&q->link);
1441 if (tb[TCA_CBQ_LSSOPT])
1442 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1444 cbq_addprio(q, &q->link);
1448 qdisc_put_rtab(q->link.R_tab);
1452 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1454 unsigned char *b = skb_tail_pointer(skb);
1456 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1464 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1466 unsigned char *b = skb_tail_pointer(skb);
1467 struct tc_cbq_lssopt opt;
1470 if (cl->borrow == NULL)
1471 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1472 if (cl->share == NULL)
1473 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1474 opt.ewma_log = cl->ewma_log;
1475 opt.level = cl->level;
1476 opt.avpkt = cl->avpkt;
1477 opt.maxidle = cl->maxidle;
1478 opt.minidle = (u32)(-cl->minidle);
1479 opt.offtime = cl->offtime;
1481 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1489 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1491 unsigned char *b = skb_tail_pointer(skb);
1492 struct tc_cbq_wrropt opt;
1495 opt.allot = cl->allot;
1496 opt.priority = cl->priority+1;
1497 opt.cpriority = cl->cpriority+1;
1498 opt.weight = cl->weight;
1499 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1507 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1509 unsigned char *b = skb_tail_pointer(skb);
1510 struct tc_cbq_ovl opt;
1512 opt.strategy = cl->ovl_strategy;
1513 opt.priority2 = cl->priority2+1;
1515 opt.penalty = cl->penalty;
1516 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1524 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1526 unsigned char *b = skb_tail_pointer(skb);
1527 struct tc_cbq_fopt opt;
1529 if (cl->split || cl->defmap) {
1530 opt.split = cl->split ? cl->split->common.classid : 0;
1531 opt.defmap = cl->defmap;
1533 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1542 #ifdef CONFIG_NET_CLS_ACT
1543 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1545 unsigned char *b = skb_tail_pointer(skb);
1546 struct tc_cbq_police opt;
1549 opt.police = cl->police;
1552 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1562 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1564 if (cbq_dump_lss(skb, cl) < 0 ||
1565 cbq_dump_rate(skb, cl) < 0 ||
1566 cbq_dump_wrr(skb, cl) < 0 ||
1567 cbq_dump_ovl(skb, cl) < 0 ||
1568 #ifdef CONFIG_NET_CLS_ACT
1569 cbq_dump_police(skb, cl) < 0 ||
1571 cbq_dump_fopt(skb, cl) < 0)
1576 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1578 struct cbq_sched_data *q = qdisc_priv(sch);
1579 struct nlattr *nest;
1581 nest = nla_nest_start(skb, TCA_OPTIONS);
1583 goto nla_put_failure;
1584 if (cbq_dump_attr(skb, &q->link) < 0)
1585 goto nla_put_failure;
1586 nla_nest_end(skb, nest);
1590 nla_nest_cancel(skb, nest);
1595 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1597 struct cbq_sched_data *q = qdisc_priv(sch);
1599 q->link.xstats.avgidle = q->link.avgidle;
1600 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1604 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1605 struct sk_buff *skb, struct tcmsg *tcm)
1607 struct cbq_class *cl = (struct cbq_class*)arg;
1608 struct nlattr *nest;
1611 tcm->tcm_parent = cl->tparent->common.classid;
1613 tcm->tcm_parent = TC_H_ROOT;
1614 tcm->tcm_handle = cl->common.classid;
1615 tcm->tcm_info = cl->q->handle;
1617 nest = nla_nest_start(skb, TCA_OPTIONS);
1619 goto nla_put_failure;
1620 if (cbq_dump_attr(skb, cl) < 0)
1621 goto nla_put_failure;
1622 nla_nest_end(skb, nest);
1626 nla_nest_cancel(skb, nest);
1631 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1632 struct gnet_dump *d)
1634 struct cbq_sched_data *q = qdisc_priv(sch);
1635 struct cbq_class *cl = (struct cbq_class*)arg;
1637 cl->qstats.qlen = cl->q->q.qlen;
1638 cl->xstats.avgidle = cl->avgidle;
1639 cl->xstats.undertime = 0;
1641 if (cl->undertime != PSCHED_PASTPERFECT)
1642 cl->xstats.undertime = cl->undertime - q->now;
1644 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1645 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1646 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1649 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1652 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1655 struct cbq_class *cl = (struct cbq_class*)arg;
1659 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1661 cl->common.classid);
1665 #ifdef CONFIG_NET_CLS_ACT
1666 if (cl->police == TC_POLICE_RECLASSIFY)
1667 new->reshape_fail = cbq_reshape_fail;
1671 *old = xchg(&cl->q, new);
1672 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1674 sch_tree_unlock(sch);
1681 static struct Qdisc *
1682 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1684 struct cbq_class *cl = (struct cbq_class*)arg;
1686 return cl ? cl->q : NULL;
1689 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1691 struct cbq_class *cl = (struct cbq_class *)arg;
1693 if (cl->q->q.qlen == 0)
1694 cbq_deactivate_class(cl);
1697 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1699 struct cbq_sched_data *q = qdisc_priv(sch);
1700 struct cbq_class *cl = cbq_class_lookup(q, classid);
1704 return (unsigned long)cl;
1709 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1711 struct cbq_sched_data *q = qdisc_priv(sch);
1713 WARN_ON(cl->filters);
1715 tcf_destroy_chain(&cl->filter_list);
1716 qdisc_destroy(cl->q);
1717 qdisc_put_rtab(cl->R_tab);
1718 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1724 cbq_destroy(struct Qdisc* sch)
1726 struct cbq_sched_data *q = qdisc_priv(sch);
1727 struct hlist_node *n, *next;
1728 struct cbq_class *cl;
1731 #ifdef CONFIG_NET_CLS_ACT
1735 * Filters must be destroyed first because we don't destroy the
1736 * classes from root to leafs which means that filters can still
1737 * be bound to classes which have been destroyed already. --TGR '04
1739 for (h = 0; h < q->clhash.hashsize; h++) {
1740 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1741 tcf_destroy_chain(&cl->filter_list);
1743 for (h = 0; h < q->clhash.hashsize; h++) {
1744 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1746 cbq_destroy_class(sch, cl);
1748 qdisc_class_hash_destroy(&q->clhash);
1751 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1753 struct cbq_class *cl = (struct cbq_class*)arg;
1755 if (--cl->refcnt == 0) {
1756 #ifdef CONFIG_NET_CLS_ACT
1757 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1758 struct cbq_sched_data *q = qdisc_priv(sch);
1760 spin_lock_bh(root_lock);
1761 if (q->rx_class == cl)
1763 spin_unlock_bh(root_lock);
1766 cbq_destroy_class(sch, cl);
1771 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1775 struct cbq_sched_data *q = qdisc_priv(sch);
1776 struct cbq_class *cl = (struct cbq_class*)*arg;
1777 struct nlattr *opt = tca[TCA_OPTIONS];
1778 struct nlattr *tb[TCA_CBQ_MAX + 1];
1779 struct cbq_class *parent;
1780 struct qdisc_rate_table *rtab = NULL;
1785 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1793 cl->tparent->common.classid != parentid)
1795 if (!cl->tparent && parentid != TC_H_ROOT)
1799 if (tb[TCA_CBQ_RATE]) {
1800 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1805 /* Change class parameters */
1808 if (cl->next_alive != NULL)
1809 cbq_deactivate_class(cl);
1812 rtab = xchg(&cl->R_tab, rtab);
1813 qdisc_put_rtab(rtab);
1816 if (tb[TCA_CBQ_LSSOPT])
1817 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1819 if (tb[TCA_CBQ_WRROPT]) {
1821 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1824 if (tb[TCA_CBQ_OVL_STRATEGY])
1825 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1827 #ifdef CONFIG_NET_CLS_ACT
1828 if (tb[TCA_CBQ_POLICE])
1829 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1832 if (tb[TCA_CBQ_FOPT])
1833 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1836 cbq_activate_class(cl);
1838 sch_tree_unlock(sch);
1841 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1842 qdisc_root_sleeping_lock(sch),
1847 if (parentid == TC_H_ROOT)
1850 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1851 tb[TCA_CBQ_LSSOPT] == NULL)
1854 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1860 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1864 classid = TC_H_MAKE(sch->handle,0x8000);
1866 for (i=0; i<0x8000; i++) {
1867 if (++q->hgenerator >= 0x8000)
1869 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1875 classid = classid|q->hgenerator;
1880 parent = cbq_class_lookup(q, parentid);
1887 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1893 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1894 &pfifo_qdisc_ops, classid)))
1895 cl->q = &noop_qdisc;
1896 cl->common.classid = classid;
1897 cl->tparent = parent;
1899 cl->allot = parent->allot;
1900 cl->quantum = cl->allot;
1901 cl->weight = cl->R_tab->rate.rate;
1905 cl->borrow = cl->tparent;
1906 if (cl->tparent != &q->link)
1907 cl->share = cl->tparent;
1908 cbq_adjust_levels(parent);
1909 cl->minidle = -0x7FFFFFFF;
1910 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1911 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1912 if (cl->ewma_log==0)
1913 cl->ewma_log = q->link.ewma_log;
1915 cl->maxidle = q->link.maxidle;
1917 cl->avpkt = q->link.avpkt;
1918 cl->overlimit = cbq_ovl_classic;
1919 if (tb[TCA_CBQ_OVL_STRATEGY])
1920 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1921 #ifdef CONFIG_NET_CLS_ACT
1922 if (tb[TCA_CBQ_POLICE])
1923 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1925 if (tb[TCA_CBQ_FOPT])
1926 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1927 sch_tree_unlock(sch);
1929 qdisc_class_hash_grow(sch, &q->clhash);
1932 gen_new_estimator(&cl->bstats, &cl->rate_est,
1933 qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1935 *arg = (unsigned long)cl;
1939 qdisc_put_rtab(rtab);
1943 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1945 struct cbq_sched_data *q = qdisc_priv(sch);
1946 struct cbq_class *cl = (struct cbq_class*)arg;
1949 if (cl->filters || cl->children || cl == &q->link)
1954 qlen = cl->q->q.qlen;
1956 qdisc_tree_decrease_qlen(cl->q, qlen);
1959 cbq_deactivate_class(cl);
1961 if (q->tx_borrowed == cl)
1962 q->tx_borrowed = q->tx_class;
1963 if (q->tx_class == cl) {
1965 q->tx_borrowed = NULL;
1967 #ifdef CONFIG_NET_CLS_ACT
1968 if (q->rx_class == cl)
1972 cbq_unlink_class(cl);
1973 cbq_adjust_levels(cl->tparent);
1975 cbq_sync_defmap(cl);
1978 sch_tree_unlock(sch);
1980 if (--cl->refcnt == 0)
1981 cbq_destroy_class(sch, cl);
1986 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1988 struct cbq_sched_data *q = qdisc_priv(sch);
1989 struct cbq_class *cl = (struct cbq_class *)arg;
1994 return &cl->filter_list;
1997 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2000 struct cbq_sched_data *q = qdisc_priv(sch);
2001 struct cbq_class *p = (struct cbq_class*)parent;
2002 struct cbq_class *cl = cbq_class_lookup(q, classid);
2005 if (p && p->level <= cl->level)
2008 return (unsigned long)cl;
2013 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2015 struct cbq_class *cl = (struct cbq_class*)arg;
2020 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2022 struct cbq_sched_data *q = qdisc_priv(sch);
2023 struct cbq_class *cl;
2024 struct hlist_node *n;
2030 for (h = 0; h < q->clhash.hashsize; h++) {
2031 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2032 if (arg->count < arg->skip) {
2036 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2045 static const struct Qdisc_class_ops cbq_class_ops = {
2048 .qlen_notify = cbq_qlen_notify,
2051 .change = cbq_change_class,
2052 .delete = cbq_delete,
2054 .tcf_chain = cbq_find_tcf,
2055 .bind_tcf = cbq_bind_filter,
2056 .unbind_tcf = cbq_unbind_filter,
2057 .dump = cbq_dump_class,
2058 .dump_stats = cbq_dump_class_stats,
2061 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2063 .cl_ops = &cbq_class_ops,
2065 .priv_size = sizeof(struct cbq_sched_data),
2066 .enqueue = cbq_enqueue,
2067 .dequeue = cbq_dequeue,
2068 .requeue = cbq_requeue,
2072 .destroy = cbq_destroy,
2075 .dump_stats = cbq_dump_stats,
2076 .owner = THIS_MODULE,
2079 static int __init cbq_module_init(void)
2081 return register_qdisc(&cbq_qdisc_ops);
2083 static void __exit cbq_module_exit(void)
2085 unregister_qdisc(&cbq_qdisc_ops);
2087 module_init(cbq_module_init)
2088 module_exit(cbq_module_exit)
2089 MODULE_LICENSE("GPL");