2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/netlink.h>
21 #include <net/pkt_sched.h>
24 /* Class-Based Queueing (CBQ) algorithm.
25 =======================================
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
28 Management Models for Packet Networks",
29 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
31 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
33 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
39 -----------------------------------------------------------------------
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
46 --- The WRR algorithm is different. Our version looks more
47 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
72 struct cbq_sched_data;
77 struct Qdisc_class_common common;
78 struct cbq_class *next_alive; /* next class with backlog in this priority band */
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
85 #ifdef CONFIG_NET_CLS_ACT
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
96 struct qdisc_rate_table *R_tab;
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
100 psched_tdiff_t penalty;
102 /* General scheduler (WRR) parameters */
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
116 struct Qdisc *q; /* Elementary queueing discipline */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
130 long deficit; /* Saved deficit for WRR */
131 psched_time_t penalized;
132 struct gnet_stats_basic_packed bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
135 struct tc_cbq_xstats xstats;
137 struct tcf_proto *filter_list;
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
145 struct cbq_sched_data
147 struct Qdisc_class_hash clhash; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
151 struct cbq_class link;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
157 #ifdef CONFIG_NET_CLS_ACT
158 struct cbq_class *rx_class;
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
167 struct hrtimer delay_timer;
168 struct qdisc_watchdog watchdog; /* Watchdog timer,
172 psched_tdiff_t wd_expires;
178 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
180 static __inline__ struct cbq_class *
181 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
183 struct Qdisc_class_common *clc;
185 clc = qdisc_class_find(&q->clhash, classid);
188 return container_of(clc, struct cbq_class, common);
191 #ifdef CONFIG_NET_CLS_ACT
193 static struct cbq_class *
194 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
196 struct cbq_class *cl, *new;
198 for (cl = this->tparent; cl; cl = cl->tparent)
199 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
207 /* Classify packet. The procedure is pretty complicated, but
208 it allows us to combine link sharing and priority scheduling
211 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 so that it resolves to split nodes. Then packets are classified
213 by logical priority, or a more specific classifier may be attached
217 static struct cbq_class *
218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
220 struct cbq_sched_data *q = qdisc_priv(sch);
221 struct cbq_class *head = &q->link;
222 struct cbq_class **defmap;
223 struct cbq_class *cl = NULL;
224 u32 prio = skb->priority;
225 struct tcf_result res;
228 * Step 1. If skb->priority points to one of our classes, use it.
230 if (TC_H_MAJ(prio^sch->handle) == 0 &&
231 (cl = cbq_class_lookup(q, prio)) != NULL)
234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
237 defmap = head->defaults;
240 * Step 2+n. Apply classifier.
242 if (!head->filter_list ||
243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
246 if ((cl = (void*)res.class) == NULL) {
247 if (TC_H_MAJ(res.classid))
248 cl = cbq_class_lookup(q, res.classid);
249 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
250 cl = defmap[TC_PRIO_BESTEFFORT];
252 if (cl == NULL || cl->level >= head->level)
256 #ifdef CONFIG_NET_CLS_ACT
260 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
263 case TC_ACT_RECLASSIFY:
264 return cbq_reclassify(skb, cl);
271 * Step 3+n. If classifier selected a link sharing class,
272 * apply agency specific classifier.
273 * Repeat this procdure until we hit a leaf node.
282 * Step 4. No success...
284 if (TC_H_MAJ(prio) == 0 &&
285 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
286 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
293 A packet has just been enqueued on the empty class.
294 cbq_activate_class adds it to the tail of active class list
295 of its priority band.
298 static __inline__ void cbq_activate_class(struct cbq_class *cl)
300 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
301 int prio = cl->cpriority;
302 struct cbq_class *cl_tail;
304 cl_tail = q->active[prio];
305 q->active[prio] = cl;
307 if (cl_tail != NULL) {
308 cl->next_alive = cl_tail->next_alive;
309 cl_tail->next_alive = cl;
312 q->activemask |= (1<<prio);
317 Unlink class from active chain.
318 Note that this same procedure is done directly in cbq_dequeue*
319 during round-robin procedure.
322 static void cbq_deactivate_class(struct cbq_class *this)
324 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
325 int prio = this->cpriority;
326 struct cbq_class *cl;
327 struct cbq_class *cl_prev = q->active[prio];
330 cl = cl_prev->next_alive;
332 cl_prev->next_alive = cl->next_alive;
333 cl->next_alive = NULL;
335 if (cl == q->active[prio]) {
336 q->active[prio] = cl_prev;
337 if (cl == q->active[prio]) {
338 q->active[prio] = NULL;
339 q->activemask &= ~(1<<prio);
345 } while ((cl_prev = cl) != q->active[prio]);
349 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
351 int toplevel = q->toplevel;
353 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
357 now = psched_get_time();
358 incr = now - q->now_rt;
362 if (cl->undertime < now) {
363 q->toplevel = cl->level;
366 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
371 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
373 struct cbq_sched_data *q = qdisc_priv(sch);
374 int uninitialized_var(ret);
375 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
377 #ifdef CONFIG_NET_CLS_ACT
381 if (ret & __NET_XMIT_BYPASS)
387 #ifdef CONFIG_NET_CLS_ACT
388 cl->q->__parent = sch;
390 ret = qdisc_enqueue(skb, cl->q);
391 if (ret == NET_XMIT_SUCCESS) {
393 qdisc_bstats_update(sch, skb);
394 cbq_mark_toplevel(q, cl);
396 cbq_activate_class(cl);
400 if (net_xmit_drop_count(ret)) {
402 cbq_mark_toplevel(q, cl);
408 /* Overlimit actions */
410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
412 static void cbq_ovl_classic(struct cbq_class *cl)
414 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
415 psched_tdiff_t delay = cl->undertime - q->now;
418 delay += cl->offtime;
421 Class goes to sleep, so that it will have no
422 chance to work avgidle. Let's forgive it 8)
424 BTW cbq-2.0 has a crap in this
425 place, apparently they forgot to shift it by cl->ewma_log.
428 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
429 if (cl->avgidle < cl->minidle)
430 cl->avgidle = cl->minidle;
433 cl->undertime = q->now + delay;
435 cl->xstats.overactions++;
438 if (q->wd_expires == 0 || q->wd_expires > delay)
439 q->wd_expires = delay;
441 /* Dirty work! We must schedule wakeups based on
442 real available rate, rather than leaf rate,
443 which may be tiny (even zero).
445 if (q->toplevel == TC_CBQ_MAXLEVEL) {
447 psched_tdiff_t base_delay = q->wd_expires;
449 for (b = cl->borrow; b; b = b->borrow) {
450 delay = b->undertime - q->now;
451 if (delay < base_delay) {
458 q->wd_expires = base_delay;
462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
466 static void cbq_ovl_rclassic(struct cbq_class *cl)
468 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
469 struct cbq_class *this = cl;
472 if (cl->level > q->toplevel) {
476 } while ((cl = cl->borrow) != NULL);
483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
485 static void cbq_ovl_delay(struct cbq_class *cl)
487 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
488 psched_tdiff_t delay = cl->undertime - q->now;
490 if (test_bit(__QDISC_STATE_DEACTIVATED,
491 &qdisc_root_sleeping(cl->qdisc)->state))
495 psched_time_t sched = q->now;
498 delay += cl->offtime;
500 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
501 if (cl->avgidle < cl->minidle)
502 cl->avgidle = cl->minidle;
503 cl->undertime = q->now + delay;
506 sched += delay + cl->penalty;
507 cl->penalized = sched;
508 cl->cpriority = TC_CBQ_MAXPRIO;
509 q->pmask |= (1<<TC_CBQ_MAXPRIO);
511 expires = ktime_set(0, 0);
512 expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
513 if (hrtimer_try_to_cancel(&q->delay_timer) &&
514 ktime_to_ns(ktime_sub(
515 hrtimer_get_expires(&q->delay_timer),
517 hrtimer_set_expires(&q->delay_timer, expires);
518 hrtimer_restart(&q->delay_timer);
520 cl->xstats.overactions++;
525 if (q->wd_expires == 0 || q->wd_expires > delay)
526 q->wd_expires = delay;
529 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
531 static void cbq_ovl_lowprio(struct cbq_class *cl)
533 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
535 cl->penalized = q->now + cl->penalty;
537 if (cl->cpriority != cl->priority2) {
538 cl->cpriority = cl->priority2;
539 q->pmask |= (1<<cl->cpriority);
540 cl->xstats.overactions++;
545 /* TC_CBQ_OVL_DROP: penalize class by dropping */
547 static void cbq_ovl_drop(struct cbq_class *cl)
549 if (cl->q->ops->drop)
550 if (cl->q->ops->drop(cl->q))
552 cl->xstats.overactions++;
556 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
559 struct cbq_class *cl;
560 struct cbq_class *cl_prev = q->active[prio];
561 psched_time_t sched = now;
567 cl = cl_prev->next_alive;
568 if (now - cl->penalized > 0) {
569 cl_prev->next_alive = cl->next_alive;
570 cl->next_alive = NULL;
571 cl->cpriority = cl->priority;
573 cbq_activate_class(cl);
575 if (cl == q->active[prio]) {
576 q->active[prio] = cl_prev;
577 if (cl == q->active[prio]) {
578 q->active[prio] = NULL;
583 cl = cl_prev->next_alive;
584 } else if (sched - cl->penalized > 0)
585 sched = cl->penalized;
586 } while ((cl_prev = cl) != q->active[prio]);
591 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
593 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
595 struct Qdisc *sch = q->watchdog.qdisc;
597 psched_tdiff_t delay = 0;
600 now = psched_get_time();
606 int prio = ffz(~pmask);
611 tmp = cbq_undelay_prio(q, prio, now);
614 if (tmp < delay || delay == 0)
622 time = ktime_set(0, 0);
623 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
627 sch->flags &= ~TCQ_F_THROTTLED;
628 __netif_schedule(qdisc_root(sch));
629 return HRTIMER_NORESTART;
632 #ifdef CONFIG_NET_CLS_ACT
633 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
635 struct Qdisc *sch = child->__parent;
636 struct cbq_sched_data *q = qdisc_priv(sch);
637 struct cbq_class *cl = q->rx_class;
641 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
644 cbq_mark_toplevel(q, cl);
647 cl->q->__parent = sch;
649 ret = qdisc_enqueue(skb, cl->q);
650 if (ret == NET_XMIT_SUCCESS) {
652 qdisc_bstats_update(sch, skb);
654 cbq_activate_class(cl);
657 if (net_xmit_drop_count(ret))
668 It is mission critical procedure.
670 We "regenerate" toplevel cutoff, if transmitting class
671 has backlog and it is not regulated. It is not part of
672 original CBQ description, but looks more reasonable.
673 Probably, it is wrong. This question needs further investigation.
676 static __inline__ void
677 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
678 struct cbq_class *borrowed)
680 if (cl && q->toplevel >= borrowed->level) {
681 if (cl->q->q.qlen > 1) {
683 if (borrowed->undertime == PSCHED_PASTPERFECT) {
684 q->toplevel = borrowed->level;
687 } while ((borrowed=borrowed->borrow) != NULL);
690 /* It is not necessary now. Uncommenting it
691 will save CPU cycles, but decrease fairness.
693 q->toplevel = TC_CBQ_MAXLEVEL;
699 cbq_update(struct cbq_sched_data *q)
701 struct cbq_class *this = q->tx_class;
702 struct cbq_class *cl = this;
707 for ( ; cl; cl = cl->share) {
708 long avgidle = cl->avgidle;
711 cl->bstats.packets++;
712 cl->bstats.bytes += len;
715 (now - last) is total time between packet right edges.
716 (last_pktlen/rate) is "virtual" busy time, so that
718 idle = (now - last) - last_pktlen/rate
721 idle = q->now - cl->last;
722 if ((unsigned long)idle > 128*1024*1024) {
723 avgidle = cl->maxidle;
725 idle -= L2T(cl, len);
727 /* true_avgidle := (1-W)*true_avgidle + W*idle,
728 where W=2^{-ewma_log}. But cl->avgidle is scaled:
729 cl->avgidle == true_avgidle/W,
732 avgidle += idle - (avgidle>>cl->ewma_log);
736 /* Overlimit or at-limit */
738 if (avgidle < cl->minidle)
739 avgidle = cl->minidle;
741 cl->avgidle = avgidle;
743 /* Calculate expected time, when this class
744 will be allowed to send.
746 (1-W)*true_avgidle + W*delay = 0, i.e.
747 idle = (1/W - 1)*(-true_avgidle)
749 idle = (1 - W)*(-cl->avgidle);
751 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
755 To maintain the rate allocated to the class,
756 we add to undertime virtual clock,
757 necessary to complete transmitted packet.
758 (len/phys_bandwidth has been already passed
759 to the moment of cbq_update)
762 idle -= L2T(&q->link, len);
763 idle += L2T(cl, len);
765 cl->undertime = q->now + idle;
769 cl->undertime = PSCHED_PASTPERFECT;
770 if (avgidle > cl->maxidle)
771 cl->avgidle = cl->maxidle;
773 cl->avgidle = avgidle;
778 cbq_update_toplevel(q, this, q->tx_borrowed);
781 static __inline__ struct cbq_class *
782 cbq_under_limit(struct cbq_class *cl)
784 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
785 struct cbq_class *this_cl = cl;
787 if (cl->tparent == NULL)
790 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
796 /* It is very suspicious place. Now overlimit
797 action is generated for not bounded classes
798 only if link is completely congested.
799 Though it is in agree with ancestor-only paradigm,
800 it looks very stupid. Particularly,
801 it means that this chunk of code will either
802 never be called or result in strong amplification
803 of burstiness. Dangerous, silly, and, however,
804 no another solution exists.
806 if ((cl = cl->borrow) == NULL) {
807 this_cl->qstats.overlimits++;
808 this_cl->overlimit(this_cl);
811 if (cl->level > q->toplevel)
813 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
819 static __inline__ struct sk_buff *
820 cbq_dequeue_prio(struct Qdisc *sch, int prio)
822 struct cbq_sched_data *q = qdisc_priv(sch);
823 struct cbq_class *cl_tail, *cl_prev, *cl;
827 cl_tail = cl_prev = q->active[prio];
828 cl = cl_prev->next_alive;
835 struct cbq_class *borrow = cl;
838 (borrow = cbq_under_limit(cl)) == NULL)
841 if (cl->deficit <= 0) {
842 /* Class exhausted its allotment per
843 this round. Switch to the next one.
846 cl->deficit += cl->quantum;
850 skb = cl->q->dequeue(cl->q);
852 /* Class did not give us any skb :-(
853 It could occur even if cl->q->q.qlen != 0
854 f.e. if cl->q == "tbf"
859 cl->deficit -= qdisc_pkt_len(skb);
861 q->tx_borrowed = borrow;
863 #ifndef CBQ_XSTATS_BORROWS_BYTES
864 borrow->xstats.borrows++;
865 cl->xstats.borrows++;
867 borrow->xstats.borrows += qdisc_pkt_len(skb);
868 cl->xstats.borrows += qdisc_pkt_len(skb);
871 q->tx_len = qdisc_pkt_len(skb);
873 if (cl->deficit <= 0) {
874 q->active[prio] = cl;
876 cl->deficit += cl->quantum;
881 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
882 /* Class is empty or penalized.
883 Unlink it from active chain.
885 cl_prev->next_alive = cl->next_alive;
886 cl->next_alive = NULL;
888 /* Did cl_tail point to it? */
893 /* Was it the last class in this band? */
896 q->active[prio] = NULL;
897 q->activemask &= ~(1<<prio);
899 cbq_activate_class(cl);
903 q->active[prio] = cl_tail;
906 cbq_activate_class(cl);
914 } while (cl_prev != cl_tail);
917 q->active[prio] = cl_prev;
922 static __inline__ struct sk_buff *
923 cbq_dequeue_1(struct Qdisc *sch)
925 struct cbq_sched_data *q = qdisc_priv(sch);
929 activemask = q->activemask&0xFF;
931 int prio = ffz(~activemask);
932 activemask &= ~(1<<prio);
933 skb = cbq_dequeue_prio(sch, prio);
940 static struct sk_buff *
941 cbq_dequeue(struct Qdisc *sch)
944 struct cbq_sched_data *q = qdisc_priv(sch);
948 now = psched_get_time();
949 incr = now - q->now_rt;
952 psched_tdiff_t incr2;
953 /* Time integrator. We calculate EOS time
954 by adding expected packet transmission time.
955 If real time is greater, we warp artificial clock,
958 cbq_time = max(real_time, work);
960 incr2 = L2T(&q->link, q->tx_len);
963 if ((incr -= incr2) < 0)
972 skb = cbq_dequeue_1(sch);
975 sch->flags &= ~TCQ_F_THROTTLED;
979 /* All the classes are overlimit.
983 1. Scheduler is empty.
984 2. Toplevel cutoff inhibited borrowing.
985 3. Root class is overlimit.
987 Reset 2d and 3d conditions and retry.
989 Note, that NS and cbq-2.0 are buggy, peeking
990 an arbitrary class is appropriate for ancestor-only
991 sharing, but not for toplevel algorithm.
993 Our version is better, but slower, because it requires
994 two passes, but it is unavoidable with top-level sharing.
997 if (q->toplevel == TC_CBQ_MAXLEVEL &&
998 q->link.undertime == PSCHED_PASTPERFECT)
1001 q->toplevel = TC_CBQ_MAXLEVEL;
1002 q->link.undertime = PSCHED_PASTPERFECT;
1005 /* No packets in scheduler or nobody wants to give them to us :-(
1006 Sigh... start watchdog timer in the last case. */
1009 sch->qstats.overlimits++;
1011 qdisc_watchdog_schedule(&q->watchdog,
1012 now + q->wd_expires);
1017 /* CBQ class maintanance routines */
1019 static void cbq_adjust_levels(struct cbq_class *this)
1026 struct cbq_class *cl;
1028 if ((cl = this->children) != NULL) {
1030 if (cl->level > level)
1032 } while ((cl = cl->sibling) != this->children);
1034 this->level = level+1;
1035 } while ((this = this->tparent) != NULL);
1038 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1040 struct cbq_class *cl;
1041 struct hlist_node *n;
1044 if (q->quanta[prio] == 0)
1047 for (h = 0; h < q->clhash.hashsize; h++) {
1048 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1049 /* BUGGGG... Beware! This expression suffer of
1050 arithmetic overflows!
1052 if (cl->priority == prio) {
1053 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1056 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1057 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1058 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1064 static void cbq_sync_defmap(struct cbq_class *cl)
1066 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1067 struct cbq_class *split = cl->split;
1074 for (i=0; i<=TC_PRIO_MAX; i++) {
1075 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1076 split->defaults[i] = NULL;
1079 for (i=0; i<=TC_PRIO_MAX; i++) {
1080 int level = split->level;
1082 if (split->defaults[i])
1085 for (h = 0; h < q->clhash.hashsize; h++) {
1086 struct hlist_node *n;
1087 struct cbq_class *c;
1089 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1091 if (c->split == split && c->level < level &&
1093 split->defaults[i] = c;
1101 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1103 struct cbq_class *split = NULL;
1106 if ((split = cl->split) == NULL)
1108 splitid = split->common.classid;
1111 if (split == NULL || split->common.classid != splitid) {
1112 for (split = cl->tparent; split; split = split->tparent)
1113 if (split->common.classid == splitid)
1120 if (cl->split != split) {
1122 cbq_sync_defmap(cl);
1124 cl->defmap = def&mask;
1126 cl->defmap = (cl->defmap&~mask)|(def&mask);
1128 cbq_sync_defmap(cl);
1131 static void cbq_unlink_class(struct cbq_class *this)
1133 struct cbq_class *cl, **clp;
1134 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1136 qdisc_class_hash_remove(&q->clhash, &this->common);
1138 if (this->tparent) {
1147 } while ((cl = *clp) != this->sibling);
1149 if (this->tparent->children == this) {
1150 this->tparent->children = this->sibling;
1151 if (this->sibling == this)
1152 this->tparent->children = NULL;
1155 WARN_ON(this->sibling != this);
1159 static void cbq_link_class(struct cbq_class *this)
1161 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1162 struct cbq_class *parent = this->tparent;
1164 this->sibling = this;
1165 qdisc_class_hash_insert(&q->clhash, &this->common);
1170 if (parent->children == NULL) {
1171 parent->children = this;
1173 this->sibling = parent->children->sibling;
1174 parent->children->sibling = this;
1178 static unsigned int cbq_drop(struct Qdisc* sch)
1180 struct cbq_sched_data *q = qdisc_priv(sch);
1181 struct cbq_class *cl, *cl_head;
1185 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1186 if ((cl_head = q->active[prio]) == NULL)
1191 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1194 cbq_deactivate_class(cl);
1197 } while ((cl = cl->next_alive) != cl_head);
1203 cbq_reset(struct Qdisc* sch)
1205 struct cbq_sched_data *q = qdisc_priv(sch);
1206 struct cbq_class *cl;
1207 struct hlist_node *n;
1214 q->tx_borrowed = NULL;
1215 qdisc_watchdog_cancel(&q->watchdog);
1216 hrtimer_cancel(&q->delay_timer);
1217 q->toplevel = TC_CBQ_MAXLEVEL;
1218 q->now = psched_get_time();
1221 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1222 q->active[prio] = NULL;
1224 for (h = 0; h < q->clhash.hashsize; h++) {
1225 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1228 cl->next_alive = NULL;
1229 cl->undertime = PSCHED_PASTPERFECT;
1230 cl->avgidle = cl->maxidle;
1231 cl->deficit = cl->quantum;
1232 cl->cpriority = cl->priority;
1239 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1241 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1242 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1243 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1245 if (lss->change&TCF_CBQ_LSS_EWMA)
1246 cl->ewma_log = lss->ewma_log;
1247 if (lss->change&TCF_CBQ_LSS_AVPKT)
1248 cl->avpkt = lss->avpkt;
1249 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1250 cl->minidle = -(long)lss->minidle;
1251 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1252 cl->maxidle = lss->maxidle;
1253 cl->avgidle = lss->maxidle;
1255 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1256 cl->offtime = lss->offtime;
1260 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1262 q->nclasses[cl->priority]--;
1263 q->quanta[cl->priority] -= cl->weight;
1264 cbq_normalize_quanta(q, cl->priority);
1267 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1269 q->nclasses[cl->priority]++;
1270 q->quanta[cl->priority] += cl->weight;
1271 cbq_normalize_quanta(q, cl->priority);
1274 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1276 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1279 cl->allot = wrr->allot;
1281 cl->weight = wrr->weight;
1282 if (wrr->priority) {
1283 cl->priority = wrr->priority-1;
1284 cl->cpriority = cl->priority;
1285 if (cl->priority >= cl->priority2)
1286 cl->priority2 = TC_CBQ_MAXPRIO-1;
1293 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1295 switch (ovl->strategy) {
1296 case TC_CBQ_OVL_CLASSIC:
1297 cl->overlimit = cbq_ovl_classic;
1299 case TC_CBQ_OVL_DELAY:
1300 cl->overlimit = cbq_ovl_delay;
1302 case TC_CBQ_OVL_LOWPRIO:
1303 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1304 ovl->priority2-1 <= cl->priority)
1306 cl->priority2 = ovl->priority2-1;
1307 cl->overlimit = cbq_ovl_lowprio;
1309 case TC_CBQ_OVL_DROP:
1310 cl->overlimit = cbq_ovl_drop;
1312 case TC_CBQ_OVL_RCLASSIC:
1313 cl->overlimit = cbq_ovl_rclassic;
1318 cl->penalty = ovl->penalty;
1322 #ifdef CONFIG_NET_CLS_ACT
1323 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1325 cl->police = p->police;
1327 if (cl->q->handle) {
1328 if (p->police == TC_POLICE_RECLASSIFY)
1329 cl->q->reshape_fail = cbq_reshape_fail;
1331 cl->q->reshape_fail = NULL;
1337 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1339 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1343 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1344 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1345 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1346 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1347 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1348 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1349 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1350 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1353 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1355 struct cbq_sched_data *q = qdisc_priv(sch);
1356 struct nlattr *tb[TCA_CBQ_MAX + 1];
1357 struct tc_ratespec *r;
1360 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1364 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1367 r = nla_data(tb[TCA_CBQ_RATE]);
1369 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1372 err = qdisc_class_hash_init(&q->clhash);
1377 q->link.sibling = &q->link;
1378 q->link.common.classid = sch->handle;
1379 q->link.qdisc = sch;
1380 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1383 q->link.q = &noop_qdisc;
1385 q->link.priority = TC_CBQ_MAXPRIO-1;
1386 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1387 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1388 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1389 q->link.overlimit = cbq_ovl_classic;
1390 q->link.allot = psched_mtu(qdisc_dev(sch));
1391 q->link.quantum = q->link.allot;
1392 q->link.weight = q->link.R_tab->rate.rate;
1394 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1395 q->link.avpkt = q->link.allot/2;
1396 q->link.minidle = -0x7FFFFFFF;
1398 qdisc_watchdog_init(&q->watchdog, sch);
1399 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1400 q->delay_timer.function = cbq_undelay;
1401 q->toplevel = TC_CBQ_MAXLEVEL;
1402 q->now = psched_get_time();
1405 cbq_link_class(&q->link);
1407 if (tb[TCA_CBQ_LSSOPT])
1408 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1410 cbq_addprio(q, &q->link);
1414 qdisc_put_rtab(q->link.R_tab);
1418 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1420 unsigned char *b = skb_tail_pointer(skb);
1422 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1430 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1432 unsigned char *b = skb_tail_pointer(skb);
1433 struct tc_cbq_lssopt opt;
1436 if (cl->borrow == NULL)
1437 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1438 if (cl->share == NULL)
1439 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1440 opt.ewma_log = cl->ewma_log;
1441 opt.level = cl->level;
1442 opt.avpkt = cl->avpkt;
1443 opt.maxidle = cl->maxidle;
1444 opt.minidle = (u32)(-cl->minidle);
1445 opt.offtime = cl->offtime;
1447 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1455 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1457 unsigned char *b = skb_tail_pointer(skb);
1458 struct tc_cbq_wrropt opt;
1461 opt.allot = cl->allot;
1462 opt.priority = cl->priority+1;
1463 opt.cpriority = cl->cpriority+1;
1464 opt.weight = cl->weight;
1465 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1473 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1475 unsigned char *b = skb_tail_pointer(skb);
1476 struct tc_cbq_ovl opt;
1478 opt.strategy = cl->ovl_strategy;
1479 opt.priority2 = cl->priority2+1;
1481 opt.penalty = cl->penalty;
1482 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1490 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1492 unsigned char *b = skb_tail_pointer(skb);
1493 struct tc_cbq_fopt opt;
1495 if (cl->split || cl->defmap) {
1496 opt.split = cl->split ? cl->split->common.classid : 0;
1497 opt.defmap = cl->defmap;
1499 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1508 #ifdef CONFIG_NET_CLS_ACT
1509 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1511 unsigned char *b = skb_tail_pointer(skb);
1512 struct tc_cbq_police opt;
1515 opt.police = cl->police;
1518 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1528 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1530 if (cbq_dump_lss(skb, cl) < 0 ||
1531 cbq_dump_rate(skb, cl) < 0 ||
1532 cbq_dump_wrr(skb, cl) < 0 ||
1533 cbq_dump_ovl(skb, cl) < 0 ||
1534 #ifdef CONFIG_NET_CLS_ACT
1535 cbq_dump_police(skb, cl) < 0 ||
1537 cbq_dump_fopt(skb, cl) < 0)
1542 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1544 struct cbq_sched_data *q = qdisc_priv(sch);
1545 struct nlattr *nest;
1547 nest = nla_nest_start(skb, TCA_OPTIONS);
1549 goto nla_put_failure;
1550 if (cbq_dump_attr(skb, &q->link) < 0)
1551 goto nla_put_failure;
1552 nla_nest_end(skb, nest);
1556 nla_nest_cancel(skb, nest);
1561 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1563 struct cbq_sched_data *q = qdisc_priv(sch);
1565 q->link.xstats.avgidle = q->link.avgidle;
1566 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1570 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1571 struct sk_buff *skb, struct tcmsg *tcm)
1573 struct cbq_class *cl = (struct cbq_class*)arg;
1574 struct nlattr *nest;
1577 tcm->tcm_parent = cl->tparent->common.classid;
1579 tcm->tcm_parent = TC_H_ROOT;
1580 tcm->tcm_handle = cl->common.classid;
1581 tcm->tcm_info = cl->q->handle;
1583 nest = nla_nest_start(skb, TCA_OPTIONS);
1585 goto nla_put_failure;
1586 if (cbq_dump_attr(skb, cl) < 0)
1587 goto nla_put_failure;
1588 nla_nest_end(skb, nest);
1592 nla_nest_cancel(skb, nest);
1597 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1598 struct gnet_dump *d)
1600 struct cbq_sched_data *q = qdisc_priv(sch);
1601 struct cbq_class *cl = (struct cbq_class*)arg;
1603 cl->qstats.qlen = cl->q->q.qlen;
1604 cl->xstats.avgidle = cl->avgidle;
1605 cl->xstats.undertime = 0;
1607 if (cl->undertime != PSCHED_PASTPERFECT)
1608 cl->xstats.undertime = cl->undertime - q->now;
1610 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1611 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1612 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1615 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1618 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1621 struct cbq_class *cl = (struct cbq_class*)arg;
1624 new = qdisc_create_dflt(sch->dev_queue,
1625 &pfifo_qdisc_ops, cl->common.classid);
1629 #ifdef CONFIG_NET_CLS_ACT
1630 if (cl->police == TC_POLICE_RECLASSIFY)
1631 new->reshape_fail = cbq_reshape_fail;
1637 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1639 sch_tree_unlock(sch);
1644 static struct Qdisc *
1645 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1647 struct cbq_class *cl = (struct cbq_class*)arg;
1652 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1654 struct cbq_class *cl = (struct cbq_class *)arg;
1656 if (cl->q->q.qlen == 0)
1657 cbq_deactivate_class(cl);
1660 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1662 struct cbq_sched_data *q = qdisc_priv(sch);
1663 struct cbq_class *cl = cbq_class_lookup(q, classid);
1667 return (unsigned long)cl;
1672 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1674 struct cbq_sched_data *q = qdisc_priv(sch);
1676 WARN_ON(cl->filters);
1678 tcf_destroy_chain(&cl->filter_list);
1679 qdisc_destroy(cl->q);
1680 qdisc_put_rtab(cl->R_tab);
1681 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1687 cbq_destroy(struct Qdisc* sch)
1689 struct cbq_sched_data *q = qdisc_priv(sch);
1690 struct hlist_node *n, *next;
1691 struct cbq_class *cl;
1694 #ifdef CONFIG_NET_CLS_ACT
1698 * Filters must be destroyed first because we don't destroy the
1699 * classes from root to leafs which means that filters can still
1700 * be bound to classes which have been destroyed already. --TGR '04
1702 for (h = 0; h < q->clhash.hashsize; h++) {
1703 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1704 tcf_destroy_chain(&cl->filter_list);
1706 for (h = 0; h < q->clhash.hashsize; h++) {
1707 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1709 cbq_destroy_class(sch, cl);
1711 qdisc_class_hash_destroy(&q->clhash);
1714 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1716 struct cbq_class *cl = (struct cbq_class*)arg;
1718 if (--cl->refcnt == 0) {
1719 #ifdef CONFIG_NET_CLS_ACT
1720 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1721 struct cbq_sched_data *q = qdisc_priv(sch);
1723 spin_lock_bh(root_lock);
1724 if (q->rx_class == cl)
1726 spin_unlock_bh(root_lock);
1729 cbq_destroy_class(sch, cl);
1734 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1738 struct cbq_sched_data *q = qdisc_priv(sch);
1739 struct cbq_class *cl = (struct cbq_class*)*arg;
1740 struct nlattr *opt = tca[TCA_OPTIONS];
1741 struct nlattr *tb[TCA_CBQ_MAX + 1];
1742 struct cbq_class *parent;
1743 struct qdisc_rate_table *rtab = NULL;
1748 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1756 cl->tparent->common.classid != parentid)
1758 if (!cl->tparent && parentid != TC_H_ROOT)
1762 if (tb[TCA_CBQ_RATE]) {
1763 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1769 if (tca[TCA_RATE]) {
1770 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1771 qdisc_root_sleeping_lock(sch),
1775 qdisc_put_rtab(rtab);
1780 /* Change class parameters */
1783 if (cl->next_alive != NULL)
1784 cbq_deactivate_class(cl);
1787 qdisc_put_rtab(cl->R_tab);
1791 if (tb[TCA_CBQ_LSSOPT])
1792 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1794 if (tb[TCA_CBQ_WRROPT]) {
1796 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1799 if (tb[TCA_CBQ_OVL_STRATEGY])
1800 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1802 #ifdef CONFIG_NET_CLS_ACT
1803 if (tb[TCA_CBQ_POLICE])
1804 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1807 if (tb[TCA_CBQ_FOPT])
1808 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1811 cbq_activate_class(cl);
1813 sch_tree_unlock(sch);
1818 if (parentid == TC_H_ROOT)
1821 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1822 tb[TCA_CBQ_LSSOPT] == NULL)
1825 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1831 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1835 classid = TC_H_MAKE(sch->handle,0x8000);
1837 for (i=0; i<0x8000; i++) {
1838 if (++q->hgenerator >= 0x8000)
1840 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1846 classid = classid|q->hgenerator;
1851 parent = cbq_class_lookup(q, parentid);
1858 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1862 if (tca[TCA_RATE]) {
1863 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1864 qdisc_root_sleeping_lock(sch),
1875 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1877 cl->q = &noop_qdisc;
1878 cl->common.classid = classid;
1879 cl->tparent = parent;
1881 cl->allot = parent->allot;
1882 cl->quantum = cl->allot;
1883 cl->weight = cl->R_tab->rate.rate;
1887 cl->borrow = cl->tparent;
1888 if (cl->tparent != &q->link)
1889 cl->share = cl->tparent;
1890 cbq_adjust_levels(parent);
1891 cl->minidle = -0x7FFFFFFF;
1892 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1893 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1894 if (cl->ewma_log==0)
1895 cl->ewma_log = q->link.ewma_log;
1897 cl->maxidle = q->link.maxidle;
1899 cl->avpkt = q->link.avpkt;
1900 cl->overlimit = cbq_ovl_classic;
1901 if (tb[TCA_CBQ_OVL_STRATEGY])
1902 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1903 #ifdef CONFIG_NET_CLS_ACT
1904 if (tb[TCA_CBQ_POLICE])
1905 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1907 if (tb[TCA_CBQ_FOPT])
1908 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1909 sch_tree_unlock(sch);
1911 qdisc_class_hash_grow(sch, &q->clhash);
1913 *arg = (unsigned long)cl;
1917 qdisc_put_rtab(rtab);
1921 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1923 struct cbq_sched_data *q = qdisc_priv(sch);
1924 struct cbq_class *cl = (struct cbq_class*)arg;
1927 if (cl->filters || cl->children || cl == &q->link)
1932 qlen = cl->q->q.qlen;
1934 qdisc_tree_decrease_qlen(cl->q, qlen);
1937 cbq_deactivate_class(cl);
1939 if (q->tx_borrowed == cl)
1940 q->tx_borrowed = q->tx_class;
1941 if (q->tx_class == cl) {
1943 q->tx_borrowed = NULL;
1945 #ifdef CONFIG_NET_CLS_ACT
1946 if (q->rx_class == cl)
1950 cbq_unlink_class(cl);
1951 cbq_adjust_levels(cl->tparent);
1953 cbq_sync_defmap(cl);
1956 sch_tree_unlock(sch);
1958 BUG_ON(--cl->refcnt == 0);
1960 * This shouldn't happen: we "hold" one cops->get() when called
1961 * from tc_ctl_tclass; the destroy method is done from cops->put().
1967 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1969 struct cbq_sched_data *q = qdisc_priv(sch);
1970 struct cbq_class *cl = (struct cbq_class *)arg;
1975 return &cl->filter_list;
1978 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1981 struct cbq_sched_data *q = qdisc_priv(sch);
1982 struct cbq_class *p = (struct cbq_class*)parent;
1983 struct cbq_class *cl = cbq_class_lookup(q, classid);
1986 if (p && p->level <= cl->level)
1989 return (unsigned long)cl;
1994 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1996 struct cbq_class *cl = (struct cbq_class*)arg;
2001 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2003 struct cbq_sched_data *q = qdisc_priv(sch);
2004 struct cbq_class *cl;
2005 struct hlist_node *n;
2011 for (h = 0; h < q->clhash.hashsize; h++) {
2012 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2013 if (arg->count < arg->skip) {
2017 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2026 static const struct Qdisc_class_ops cbq_class_ops = {
2029 .qlen_notify = cbq_qlen_notify,
2032 .change = cbq_change_class,
2033 .delete = cbq_delete,
2035 .tcf_chain = cbq_find_tcf,
2036 .bind_tcf = cbq_bind_filter,
2037 .unbind_tcf = cbq_unbind_filter,
2038 .dump = cbq_dump_class,
2039 .dump_stats = cbq_dump_class_stats,
2042 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2044 .cl_ops = &cbq_class_ops,
2046 .priv_size = sizeof(struct cbq_sched_data),
2047 .enqueue = cbq_enqueue,
2048 .dequeue = cbq_dequeue,
2049 .peek = qdisc_peek_dequeued,
2053 .destroy = cbq_destroy,
2056 .dump_stats = cbq_dump_stats,
2057 .owner = THIS_MODULE,
2060 static int __init cbq_module_init(void)
2062 return register_qdisc(&cbq_qdisc_ops);
2064 static void __exit cbq_module_exit(void)
2066 unregister_qdisc(&cbq_qdisc_ops);
2068 module_init(cbq_module_init)
2069 module_exit(cbq_module_exit)
2070 MODULE_LICENSE("GPL");