net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142 };
 143
 144 struct cbq_sched_data {
 145         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146         int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147         unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149         struct cbq_class        link;
 150
 151         unsigned int            activemask;
 152         struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                    with backlog */
 154
 155 #ifdef CONFIG_NET_CLS_ACT
 156         struct cbq_class        *rx_class;
 157 #endif
 158         struct cbq_class        *tx_class;
 159         struct cbq_class        *tx_borrowed;
 160         int                     tx_len;
 161         psched_time_t           now;            /* Cached timestamp */
 162         psched_time_t           now_rt;         /* Cached real time */
 163         unsigned int            pmask;
 164
 165         struct hrtimer          delay_timer;
 166         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 167                                                    started when CBQ has
 168                                                    backlog, but cannot
 169                                                    transmit just now */
 170         psched_tdiff_t          wd_expires;
 171         int                     toplevel;
 172         u32                     hgenerator;
 173 };
 174
 175
 176 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 177
 178 static inline struct cbq_class *
 179 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180 {
 181         struct Qdisc_class_common *clc;
 182
 183         clc = qdisc_class_find(&q->clhash, classid);
 184         if (clc == NULL)
 185                 return NULL;
 186         return container_of(clc, struct cbq_class, common);
 187 }
 188
 189 #ifdef CONFIG_NET_CLS_ACT
 190
 191 static struct cbq_class *
 192 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193 {
 194         struct cbq_class *cl;
 195
 196         for (cl = this->tparent; cl; cl = cl->tparent) {
 197                 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199                 if (new != NULL && new != this)
 200                         return new;
 201         }
 202         return NULL;
 203 }
 204
 205 #endif
 206
 207 /* Classify packet. The procedure is pretty complicated, but
 208  * it allows us to combine link sharing and priority scheduling
 209  * transparently.
 210  *
 211  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212  * so that it resolves to split nodes. Then packets are classified
 213  * by logical priority, or a more specific classifier may be attached
 214  * to the split node.
 215  */
 216
 217 static struct cbq_class *
 218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219 {
 220         struct cbq_sched_data *q = qdisc_priv(sch);
 221         struct cbq_class *head = &q->link;
 222         struct cbq_class **defmap;
 223         struct cbq_class *cl = NULL;
 224         u32 prio = skb->priority;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 /*
 240                  * Step 2+n. Apply classifier.
 241                  */
 242                 if (!head->filter_list ||
 243                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                         goto fallback;
 245
 246                 cl = (void *)res.class;
 247                 if (!cl) {
 248                         if (TC_H_MAJ(res.classid))
 249                                 cl = cbq_class_lookup(q, res.classid);
 250                         else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251                                 cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253                         if (cl == NULL)
 254                                 goto fallback;
 255                 }
 256                 if (cl->level >= head->level)
 257                         goto fallback;
 258 #ifdef CONFIG_NET_CLS_ACT
 259                 switch (result) {
 260                 case TC_ACT_QUEUED:
 261                 case TC_ACT_STOLEN:
 262                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 263                 case TC_ACT_SHOT:
 264                         return NULL;
 265                 case TC_ACT_RECLASSIFY:
 266                         return cbq_reclassify(skb, cl);
 267                 }
 268 #endif
 269                 if (cl->level == 0)
 270                         return cl;
 271
 272                 /*
 273                  * Step 3+n. If classifier selected a link sharing class,
 274                  *         apply agency specific classifier.
 275                  *         Repeat this procdure until we hit a leaf node.
 276                  */
 277                 head = cl;
 278         }
 279
 280 fallback:
 281         cl = head;
 282
 283         /*
 284          * Step 4. No success...
 285          */
 286         if (TC_H_MAJ(prio) == 0 &&
 287             !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 288             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 289                 return head;
 290
 291         return cl;
 292 }
 293
 294 /*
 295  * A packet has just been enqueued on the empty class.
 296  * cbq_activate_class adds it to the tail of active class list
 297  * of its priority band.
 298  */
 299
 300 static inline void cbq_activate_class(struct cbq_class *cl)
 301 {
 302         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 303         int prio = cl->cpriority;
 304         struct cbq_class *cl_tail;
 305
 306         cl_tail = q->active[prio];
 307         q->active[prio] = cl;
 308
 309         if (cl_tail != NULL) {
 310                 cl->next_alive = cl_tail->next_alive;
 311                 cl_tail->next_alive = cl;
 312         } else {
 313                 cl->next_alive = cl;
 314                 q->activemask |= (1<<prio);
 315         }
 316 }
 317
 318 /*
 319  * Unlink class from active chain.
 320  * Note that this same procedure is done directly in cbq_dequeue*
 321  * during round-robin procedure.
 322  */
 323
 324 static void cbq_deactivate_class(struct cbq_class *this)
 325 {
 326         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 327         int prio = this->cpriority;
 328         struct cbq_class *cl;
 329         struct cbq_class *cl_prev = q->active[prio];
 330
 331         do {
 332                 cl = cl_prev->next_alive;
 333                 if (cl == this) {
 334                         cl_prev->next_alive = cl->next_alive;
 335                         cl->next_alive = NULL;
 336
 337                         if (cl == q->active[prio]) {
 338                                 q->active[prio] = cl_prev;
 339                                 if (cl == q->active[prio]) {
 340                                         q->active[prio] = NULL;
 341                                         q->activemask &= ~(1<<prio);
 342                                         return;
 343                                 }
 344                         }
 345                         return;
 346                 }
 347         } while ((cl_prev = cl) != q->active[prio]);
 348 }
 349
 350 static void
 351 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 352 {
 353         int toplevel = q->toplevel;
 354
 355         if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 356                 psched_time_t now;
 357                 psched_tdiff_t incr;
 358
 359                 now = psched_get_time();
 360                 incr = now - q->now_rt;
 361                 now = q->now + incr;
 362
 363                 do {
 364                         if (cl->undertime < now) {
 365                                 q->toplevel = cl->level;
 366                                 return;
 367                         }
 368                 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 369         }
 370 }
 371
 372 static int
 373 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 374 {
 375         struct cbq_sched_data *q = qdisc_priv(sch);
 376         int uninitialized_var(ret);
 377         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 378
 379 #ifdef CONFIG_NET_CLS_ACT
 380         q->rx_class = cl;
 381 #endif
 382         if (cl == NULL) {
 383                 if (ret & __NET_XMIT_BYPASS)
 384                         sch->qstats.drops++;
 385                 kfree_skb(skb);
 386                 return ret;
 387         }
 388
 389 #ifdef CONFIG_NET_CLS_ACT
 390         cl->q->__parent = sch;
 391 #endif
 392         ret = qdisc_enqueue(skb, cl->q);
 393         if (ret == NET_XMIT_SUCCESS) {
 394                 sch->q.qlen++;
 395                 cbq_mark_toplevel(q, cl);
 396                 if (!cl->next_alive)
 397                         cbq_activate_class(cl);
 398                 return ret;
 399         }
 400
 401         if (net_xmit_drop_count(ret)) {
 402                 sch->qstats.drops++;
 403                 cbq_mark_toplevel(q, cl);
 404                 cl->qstats.drops++;
 405         }
 406         return ret;
 407 }
 408
 409 /* Overlimit actions */
 410
 411 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 412
 413 static void cbq_ovl_classic(struct cbq_class *cl)
 414 {
 415         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 416         psched_tdiff_t delay = cl->undertime - q->now;
 417
 418         if (!cl->delayed) {
 419                 delay += cl->offtime;
 420
 421                 /*
 422                  * Class goes to sleep, so that it will have no
 423                  * chance to work avgidle. Let's forgive it 8)
 424                  *
 425                  * BTW cbq-2.0 has a crap in this
 426                  * place, apparently they forgot to shift it by cl->ewma_log.
 427                  */
 428                 if (cl->avgidle < 0)
 429                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 430                 if (cl->avgidle < cl->minidle)
 431                         cl->avgidle = cl->minidle;
 432                 if (delay <= 0)
 433                         delay = 1;
 434                 cl->undertime = q->now + delay;
 435
 436                 cl->xstats.overactions++;
 437                 cl->delayed = 1;
 438         }
 439         if (q->wd_expires == 0 || q->wd_expires > delay)
 440                 q->wd_expires = delay;
 441
 442         /* Dirty work! We must schedule wakeups based on
 443          * real available rate, rather than leaf rate,
 444          * which may be tiny (even zero).
 445          */
 446         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 447                 struct cbq_class *b;
 448                 psched_tdiff_t base_delay = q->wd_expires;
 449
 450                 for (b = cl->borrow; b; b = b->borrow) {
 451                         delay = b->undertime - q->now;
 452                         if (delay < base_delay) {
 453                                 if (delay <= 0)
 454                                         delay = 1;
 455                                 base_delay = delay;
 456                         }
 457                 }
 458
 459                 q->wd_expires = base_delay;
 460         }
 461 }
 462
 463 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 464  * they go overlimit
 465  */
 466
 467 static void cbq_ovl_rclassic(struct cbq_class *cl)
 468 {
 469         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 470         struct cbq_class *this = cl;
 471
 472         do {
 473                 if (cl->level > q->toplevel) {
 474                         cl = NULL;
 475                         break;
 476                 }
 477         } while ((cl = cl->borrow) != NULL);
 478
 479         if (cl == NULL)
 480                 cl = this;
 481         cbq_ovl_classic(cl);
 482 }
 483
 484 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 485
 486 static void cbq_ovl_delay(struct cbq_class *cl)
 487 {
 488         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 489         psched_tdiff_t delay = cl->undertime - q->now;
 490
 491         if (test_bit(__QDISC_STATE_DEACTIVATED,
 492                      &qdisc_root_sleeping(cl->qdisc)->state))
 493                 return;
 494
 495         if (!cl->delayed) {
 496                 psched_time_t sched = q->now;
 497                 ktime_t expires;
 498
 499                 delay += cl->offtime;
 500                 if (cl->avgidle < 0)
 501                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 502                 if (cl->avgidle < cl->minidle)
 503                         cl->avgidle = cl->minidle;
 504                 cl->undertime = q->now + delay;
 505
 506                 if (delay > 0) {
 507                         sched += delay + cl->penalty;
 508                         cl->penalized = sched;
 509                         cl->cpriority = TC_CBQ_MAXPRIO;
 510                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 511
 512                         expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
 513                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514                             ktime_to_ns(ktime_sub(
 515                                         hrtimer_get_expires(&q->delay_timer),
 516                                         expires)) > 0)
 517                                 hrtimer_set_expires(&q->delay_timer, expires);
 518                         hrtimer_restart(&q->delay_timer);
 519                         cl->delayed = 1;
 520                         cl->xstats.overactions++;
 521                         return;
 522                 }
 523                 delay = 1;
 524         }
 525         if (q->wd_expires == 0 || q->wd_expires > delay)
 526                 q->wd_expires = delay;
 527 }
 528
 529 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531 static void cbq_ovl_lowprio(struct cbq_class *cl)
 532 {
 533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535         cl->penalized = q->now + cl->penalty;
 536
 537         if (cl->cpriority != cl->priority2) {
 538                 cl->cpriority = cl->priority2;
 539                 q->pmask |= (1<<cl->cpriority);
 540                 cl->xstats.overactions++;
 541         }
 542         cbq_ovl_classic(cl);
 543 }
 544
 545 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547 static void cbq_ovl_drop(struct cbq_class *cl)
 548 {
 549         if (cl->q->ops->drop)
 550                 if (cl->q->ops->drop(cl->q))
 551                         cl->qdisc->q.qlen--;
 552         cl->xstats.overactions++;
 553         cbq_ovl_classic(cl);
 554 }
 555
 556 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557                                        psched_time_t now)
 558 {
 559         struct cbq_class *cl;
 560         struct cbq_class *cl_prev = q->active[prio];
 561         psched_time_t sched = now;
 562
 563         if (cl_prev == NULL)
 564                 return 0;
 565
 566         do {
 567                 cl = cl_prev->next_alive;
 568                 if (now - cl->penalized > 0) {
 569                         cl_prev->next_alive = cl->next_alive;
 570                         cl->next_alive = NULL;
 571                         cl->cpriority = cl->priority;
 572                         cl->delayed = 0;
 573                         cbq_activate_class(cl);
 574
 575                         if (cl == q->active[prio]) {
 576                                 q->active[prio] = cl_prev;
 577                                 if (cl == q->active[prio]) {
 578                                         q->active[prio] = NULL;
 579                                         return 0;
 580                                 }
 581                         }
 582
 583                         cl = cl_prev->next_alive;
 584                 } else if (sched - cl->penalized > 0)
 585                         sched = cl->penalized;
 586         } while ((cl_prev = cl) != q->active[prio]);
 587
 588         return sched - now;
 589 }
 590
 591 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592 {
 593         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594                                                 delay_timer);
 595         struct Qdisc *sch = q->watchdog.qdisc;
 596         psched_time_t now;
 597         psched_tdiff_t delay = 0;
 598         unsigned int pmask;
 599
 600         now = psched_get_time();
 601
 602         pmask = q->pmask;
 603         q->pmask = 0;
 604
 605         while (pmask) {
 606                 int prio = ffz(~pmask);
 607                 psched_tdiff_t tmp;
 608
 609                 pmask &= ~(1<<prio);
 610
 611                 tmp = cbq_undelay_prio(q, prio, now);
 612                 if (tmp > 0) {
 613                         q->pmask |= 1<<prio;
 614                         if (tmp < delay || delay == 0)
 615                                 delay = tmp;
 616                 }
 617         }
 618
 619         if (delay) {
 620                 ktime_t time;
 621
 622                 time = ktime_set(0, 0);
 623                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625         }
 626
 627         qdisc_unthrottled(sch);
 628         __netif_schedule(qdisc_root(sch));
 629         return HRTIMER_NORESTART;
 630 }
 631
 632 #ifdef CONFIG_NET_CLS_ACT
 633 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634 {
 635         struct Qdisc *sch = child->__parent;
 636         struct cbq_sched_data *q = qdisc_priv(sch);
 637         struct cbq_class *cl = q->rx_class;
 638
 639         q->rx_class = NULL;
 640
 641         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642                 int ret;
 643
 644                 cbq_mark_toplevel(q, cl);
 645
 646                 q->rx_class = cl;
 647                 cl->q->__parent = sch;
 648
 649                 ret = qdisc_enqueue(skb, cl->q);
 650                 if (ret == NET_XMIT_SUCCESS) {
 651                         sch->q.qlen++;
 652                         if (!cl->next_alive)
 653                                 cbq_activate_class(cl);
 654                         return 0;
 655                 }
 656                 if (net_xmit_drop_count(ret))
 657                         sch->qstats.drops++;
 658                 return 0;
 659         }
 660
 661         sch->qstats.drops++;
 662         return -1;
 663 }
 664 #endif
 665
 666 /*
 667  * It is mission critical procedure.
 668  *
 669  * We "regenerate" toplevel cutoff, if transmitting class
 670  * has backlog and it is not regulated. It is not part of
 671  * original CBQ description, but looks more reasonable.
 672  * Probably, it is wrong. This question needs further investigation.
 673  */
 674
 675 static inline void
 676 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 677                     struct cbq_class *borrowed)
 678 {
 679         if (cl && q->toplevel >= borrowed->level) {
 680                 if (cl->q->q.qlen > 1) {
 681                         do {
 682                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 683                                         q->toplevel = borrowed->level;
 684                                         return;
 685                                 }
 686                         } while ((borrowed = borrowed->borrow) != NULL);
 687                 }
 688 #if 0
 689         /* It is not necessary now. Uncommenting it
 690            will save CPU cycles, but decrease fairness.
 691          */
 692                 q->toplevel = TC_CBQ_MAXLEVEL;
 693 #endif
 694         }
 695 }
 696
 697 static void
 698 cbq_update(struct cbq_sched_data *q)
 699 {
 700         struct cbq_class *this = q->tx_class;
 701         struct cbq_class *cl = this;
 702         int len = q->tx_len;
 703
 704         q->tx_class = NULL;
 705
 706         for ( ; cl; cl = cl->share) {
 707                 long avgidle = cl->avgidle;
 708                 long idle;
 709
 710                 cl->bstats.packets++;
 711                 cl->bstats.bytes += len;
 712
 713                 /*
 714                  * (now - last) is total time between packet right edges.
 715                  * (last_pktlen/rate) is "virtual" busy time, so that
 716                  *
 717                  *      idle = (now - last) - last_pktlen/rate
 718                  */
 719
 720                 idle = q->now - cl->last;
 721                 if ((unsigned long)idle > 128*1024*1024) {
 722                         avgidle = cl->maxidle;
 723                 } else {
 724                         idle -= L2T(cl, len);
 725
 726                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 727                  * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 728                  * cl->avgidle == true_avgidle/W,
 729                  * hence:
 730                  */
 731                         avgidle += idle - (avgidle>>cl->ewma_log);
 732                 }
 733
 734                 if (avgidle <= 0) {
 735                         /* Overlimit or at-limit */
 736
 737                         if (avgidle < cl->minidle)
 738                                 avgidle = cl->minidle;
 739
 740                         cl->avgidle = avgidle;
 741
 742                         /* Calculate expected time, when this class
 743                          * will be allowed to send.
 744                          * It will occur, when:
 745                          * (1-W)*true_avgidle + W*delay = 0, i.e.
 746                          * idle = (1/W - 1)*(-true_avgidle)
 747                          * or
 748                          * idle = (1 - W)*(-cl->avgidle);
 749                          */
 750                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 751
 752                         /*
 753                          * That is not all.
 754                          * To maintain the rate allocated to the class,
 755                          * we add to undertime virtual clock,
 756                          * necessary to complete transmitted packet.
 757                          * (len/phys_bandwidth has been already passed
 758                          * to the moment of cbq_update)
 759                          */
 760
 761                         idle -= L2T(&q->link, len);
 762                         idle += L2T(cl, len);
 763
 764                         cl->undertime = q->now + idle;
 765                 } else {
 766                         /* Underlimit */
 767
 768                         cl->undertime = PSCHED_PASTPERFECT;
 769                         if (avgidle > cl->maxidle)
 770                                 cl->avgidle = cl->maxidle;
 771                         else
 772                                 cl->avgidle = avgidle;
 773                 }
 774                 cl->last = q->now;
 775         }
 776
 777         cbq_update_toplevel(q, this, q->tx_borrowed);
 778 }
 779
 780 static inline struct cbq_class *
 781 cbq_under_limit(struct cbq_class *cl)
 782 {
 783         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784         struct cbq_class *this_cl = cl;
 785
 786         if (cl->tparent == NULL)
 787                 return cl;
 788
 789         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790                 cl->delayed = 0;
 791                 return cl;
 792         }
 793
 794         do {
 795                 /* It is very suspicious place. Now overlimit
 796                  * action is generated for not bounded classes
 797                  * only if link is completely congested.
 798                  * Though it is in agree with ancestor-only paradigm,
 799                  * it looks very stupid. Particularly,
 800                  * it means that this chunk of code will either
 801                  * never be called or result in strong amplification
 802                  * of burstiness. Dangerous, silly, and, however,
 803                  * no another solution exists.
 804                  */
 805                 cl = cl->borrow;
 806                 if (!cl) {
 807                         this_cl->qstats.overlimits++;
 808                         this_cl->overlimit(this_cl);
 809                         return NULL;
 810                 }
 811                 if (cl->level > q->toplevel)
 812                         return NULL;
 813         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815         cl->delayed = 0;
 816         return cl;
 817 }
 818
 819 static inline struct sk_buff *
 820 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821 {
 822         struct cbq_sched_data *q = qdisc_priv(sch);
 823         struct cbq_class *cl_tail, *cl_prev, *cl;
 824         struct sk_buff *skb;
 825         int deficit;
 826
 827         cl_tail = cl_prev = q->active[prio];
 828         cl = cl_prev->next_alive;
 829
 830         do {
 831                 deficit = 0;
 832
 833                 /* Start round */
 834                 do {
 835                         struct cbq_class *borrow = cl;
 836
 837                         if (cl->q->q.qlen &&
 838                             (borrow = cbq_under_limit(cl)) == NULL)
 839                                 goto skip_class;
 840
 841                         if (cl->deficit <= 0) {
 842                                 /* Class exhausted its allotment per
 843                                  * this round. Switch to the next one.
 844                                  */
 845                                 deficit = 1;
 846                                 cl->deficit += cl->quantum;
 847                                 goto next_class;
 848                         }
 849
 850                         skb = cl->q->dequeue(cl->q);
 851
 852                         /* Class did not give us any skb :-(
 853                          * It could occur even if cl->q->q.qlen != 0
 854                          * f.e. if cl->q == "tbf"
 855                          */
 856                         if (skb == NULL)
 857                                 goto skip_class;
 858
 859                         cl->deficit -= qdisc_pkt_len(skb);
 860                         q->tx_class = cl;
 861                         q->tx_borrowed = borrow;
 862                         if (borrow != cl) {
 863 #ifndef CBQ_XSTATS_BORROWS_BYTES
 864                                 borrow->xstats.borrows++;
 865                                 cl->xstats.borrows++;
 866 #else
 867                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 868                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 869 #endif
 870                         }
 871                         q->tx_len = qdisc_pkt_len(skb);
 872
 873                         if (cl->deficit <= 0) {
 874                                 q->active[prio] = cl;
 875                                 cl = cl->next_alive;
 876                                 cl->deficit += cl->quantum;
 877                         }
 878                         return skb;
 879
 880 skip_class:
 881                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882                                 /* Class is empty or penalized.
 883                                  * Unlink it from active chain.
 884                                  */
 885                                 cl_prev->next_alive = cl->next_alive;
 886                                 cl->next_alive = NULL;
 887
 888                                 /* Did cl_tail point to it? */
 889                                 if (cl == cl_tail) {
 890                                         /* Repair it! */
 891                                         cl_tail = cl_prev;
 892
 893                                         /* Was it the last class in this band? */
 894                                         if (cl == cl_tail) {
 895                                                 /* Kill the band! */
 896                                                 q->active[prio] = NULL;
 897                                                 q->activemask &= ~(1<<prio);
 898                                                 if (cl->q->q.qlen)
 899                                                         cbq_activate_class(cl);
 900                                                 return NULL;
 901                                         }
 902
 903                                         q->active[prio] = cl_tail;
 904                                 }
 905                                 if (cl->q->q.qlen)
 906                                         cbq_activate_class(cl);
 907
 908                                 cl = cl_prev;
 909                         }
 910
 911 next_class:
 912                         cl_prev = cl;
 913                         cl = cl->next_alive;
 914                 } while (cl_prev != cl_tail);
 915         } while (deficit);
 916
 917         q->active[prio] = cl_prev;
 918
 919         return NULL;
 920 }
 921
 922 static inline struct sk_buff *
 923 cbq_dequeue_1(struct Qdisc *sch)
 924 {
 925         struct cbq_sched_data *q = qdisc_priv(sch);
 926         struct sk_buff *skb;
 927         unsigned int activemask;
 928
 929         activemask = q->activemask & 0xFF;
 930         while (activemask) {
 931                 int prio = ffz(~activemask);
 932                 activemask &= ~(1<<prio);
 933                 skb = cbq_dequeue_prio(sch, prio);
 934                 if (skb)
 935                         return skb;
 936         }
 937         return NULL;
 938 }
 939
 940 static struct sk_buff *
 941 cbq_dequeue(struct Qdisc *sch)
 942 {
 943         struct sk_buff *skb;
 944         struct cbq_sched_data *q = qdisc_priv(sch);
 945         psched_time_t now;
 946         psched_tdiff_t incr;
 947
 948         now = psched_get_time();
 949         incr = now - q->now_rt;
 950
 951         if (q->tx_class) {
 952                 psched_tdiff_t incr2;
 953                 /* Time integrator. We calculate EOS time
 954                  * by adding expected packet transmission time.
 955                  * If real time is greater, we warp artificial clock,
 956                  * so that:
 957                  *
 958                  * cbq_time = max(real_time, work);
 959                  */
 960                 incr2 = L2T(&q->link, q->tx_len);
 961                 q->now += incr2;
 962                 cbq_update(q);
 963                 if ((incr -= incr2) < 0)
 964                         incr = 0;
 965         }
 966         q->now += incr;
 967         q->now_rt = now;
 968
 969         for (;;) {
 970                 q->wd_expires = 0;
 971
 972                 skb = cbq_dequeue_1(sch);
 973                 if (skb) {
 974                         qdisc_bstats_update(sch, skb);
 975                         sch->q.qlen--;
 976                         qdisc_unthrottled(sch);
 977                         return skb;
 978                 }
 979
 980                 /* All the classes are overlimit.
 981                  *
 982                  * It is possible, if:
 983                  *
 984                  * 1. Scheduler is empty.
 985                  * 2. Toplevel cutoff inhibited borrowing.
 986                  * 3. Root class is overlimit.
 987                  *
 988                  * Reset 2d and 3d conditions and retry.
 989                  *
 990                  * Note, that NS and cbq-2.0 are buggy, peeking
 991                  * an arbitrary class is appropriate for ancestor-only
 992                  * sharing, but not for toplevel algorithm.
 993                  *
 994                  * Our version is better, but slower, because it requires
 995                  * two passes, but it is unavoidable with top-level sharing.
 996                  */
 997
 998                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 999                     q->link.undertime == PSCHED_PASTPERFECT)
1000                         break;
1001
1002                 q->toplevel = TC_CBQ_MAXLEVEL;
1003                 q->link.undertime = PSCHED_PASTPERFECT;
1004         }
1005
1006         /* No packets in scheduler or nobody wants to give them to us :-(
1007          * Sigh... start watchdog timer in the last case.
1008          */
1009
1010         if (sch->q.qlen) {
1011                 sch->qstats.overlimits++;
1012                 if (q->wd_expires)
1013                         qdisc_watchdog_schedule(&q->watchdog,
1014                                                 now + q->wd_expires);
1015         }
1016         return NULL;
1017 }
1018
1019 /* CBQ class maintanance routines */
1020
1021 static void cbq_adjust_levels(struct cbq_class *this)
1022 {
1023         if (this == NULL)
1024                 return;
1025
1026         do {
1027                 int level = 0;
1028                 struct cbq_class *cl;
1029
1030                 cl = this->children;
1031                 if (cl) {
1032                         do {
1033                                 if (cl->level > level)
1034                                         level = cl->level;
1035                         } while ((cl = cl->sibling) != this->children);
1036                 }
1037                 this->level = level + 1;
1038         } while ((this = this->tparent) != NULL);
1039 }
1040
1041 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1042 {
1043         struct cbq_class *cl;
1044         unsigned int h;
1045
1046         if (q->quanta[prio] == 0)
1047                 return;
1048
1049         for (h = 0; h < q->clhash.hashsize; h++) {
1050                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1051                         /* BUGGGG... Beware! This expression suffer of
1052                          * arithmetic overflows!
1053                          */
1054                         if (cl->priority == prio) {
1055                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1056                                         q->quanta[prio];
1057                         }
1058                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1059                                 pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1060                                            cl->common.classid, cl->quantum);
1061                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1062                         }
1063                 }
1064         }
1065 }
1066
1067 static void cbq_sync_defmap(struct cbq_class *cl)
1068 {
1069         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1070         struct cbq_class *split = cl->split;
1071         unsigned int h;
1072         int i;
1073
1074         if (split == NULL)
1075                 return;
1076
1077         for (i = 0; i <= TC_PRIO_MAX; i++) {
1078                 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1079                         split->defaults[i] = NULL;
1080         }
1081
1082         for (i = 0; i <= TC_PRIO_MAX; i++) {
1083                 int level = split->level;
1084
1085                 if (split->defaults[i])
1086                         continue;
1087
1088                 for (h = 0; h < q->clhash.hashsize; h++) {
1089                         struct cbq_class *c;
1090
1091                         hlist_for_each_entry(c, &q->clhash.hash[h],
1092                                              common.hnode) {
1093                                 if (c->split == split && c->level < level &&
1094                                     c->defmap & (1<<i)) {
1095                                         split->defaults[i] = c;
1096                                         level = c->level;
1097                                 }
1098                         }
1099                 }
1100         }
1101 }
1102
1103 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1104 {
1105         struct cbq_class *split = NULL;
1106
1107         if (splitid == 0) {
1108                 split = cl->split;
1109                 if (!split)
1110                         return;
1111                 splitid = split->common.classid;
1112         }
1113
1114         if (split == NULL || split->common.classid != splitid) {
1115                 for (split = cl->tparent; split; split = split->tparent)
1116                         if (split->common.classid == splitid)
1117                                 break;
1118         }
1119
1120         if (split == NULL)
1121                 return;
1122
1123         if (cl->split != split) {
1124                 cl->defmap = 0;
1125                 cbq_sync_defmap(cl);
1126                 cl->split = split;
1127                 cl->defmap = def & mask;
1128         } else
1129                 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1130
1131         cbq_sync_defmap(cl);
1132 }
1133
1134 static void cbq_unlink_class(struct cbq_class *this)
1135 {
1136         struct cbq_class *cl, **clp;
1137         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1138
1139         qdisc_class_hash_remove(&q->clhash, &this->common);
1140
1141         if (this->tparent) {
1142                 clp = &this->sibling;
1143                 cl = *clp;
1144                 do {
1145                         if (cl == this) {
1146                                 *clp = cl->sibling;
1147                                 break;
1148                         }
1149                         clp = &cl->sibling;
1150                 } while ((cl = *clp) != this->sibling);
1151
1152                 if (this->tparent->children == this) {
1153                         this->tparent->children = this->sibling;
1154                         if (this->sibling == this)
1155                                 this->tparent->children = NULL;
1156                 }
1157         } else {
1158                 WARN_ON(this->sibling != this);
1159         }
1160 }
1161
1162 static void cbq_link_class(struct cbq_class *this)
1163 {
1164         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165         struct cbq_class *parent = this->tparent;
1166
1167         this->sibling = this;
1168         qdisc_class_hash_insert(&q->clhash, &this->common);
1169
1170         if (parent == NULL)
1171                 return;
1172
1173         if (parent->children == NULL) {
1174                 parent->children = this;
1175         } else {
1176                 this->sibling = parent->children->sibling;
1177                 parent->children->sibling = this;
1178         }
1179 }
1180
1181 static unsigned int cbq_drop(struct Qdisc *sch)
1182 {
1183         struct cbq_sched_data *q = qdisc_priv(sch);
1184         struct cbq_class *cl, *cl_head;
1185         int prio;
1186         unsigned int len;
1187
1188         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1189                 cl_head = q->active[prio];
1190                 if (!cl_head)
1191                         continue;
1192
1193                 cl = cl_head;
1194                 do {
1195                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1196                                 sch->q.qlen--;
1197                                 if (!cl->q->q.qlen)
1198                                         cbq_deactivate_class(cl);
1199                                 return len;
1200                         }
1201                 } while ((cl = cl->next_alive) != cl_head);
1202         }
1203         return 0;
1204 }
1205
1206 static void
1207 cbq_reset(struct Qdisc *sch)
1208 {
1209         struct cbq_sched_data *q = qdisc_priv(sch);
1210         struct cbq_class *cl;
1211         int prio;
1212         unsigned int h;
1213
1214         q->activemask = 0;
1215         q->pmask = 0;
1216         q->tx_class = NULL;
1217         q->tx_borrowed = NULL;
1218         qdisc_watchdog_cancel(&q->watchdog);
1219         hrtimer_cancel(&q->delay_timer);
1220         q->toplevel = TC_CBQ_MAXLEVEL;
1221         q->now = psched_get_time();
1222         q->now_rt = q->now;
1223
1224         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1225                 q->active[prio] = NULL;
1226
1227         for (h = 0; h < q->clhash.hashsize; h++) {
1228                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1229                         qdisc_reset(cl->q);
1230
1231                         cl->next_alive = NULL;
1232                         cl->undertime = PSCHED_PASTPERFECT;
1233                         cl->avgidle = cl->maxidle;
1234                         cl->deficit = cl->quantum;
1235                         cl->cpriority = cl->priority;
1236                 }
1237         }
1238         sch->q.qlen = 0;
1239 }
1240
1241
1242 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1243 {
1244         if (lss->change & TCF_CBQ_LSS_FLAGS) {
1245                 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1246                 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1247         }
1248         if (lss->change & TCF_CBQ_LSS_EWMA)
1249                 cl->ewma_log = lss->ewma_log;
1250         if (lss->change & TCF_CBQ_LSS_AVPKT)
1251                 cl->avpkt = lss->avpkt;
1252         if (lss->change & TCF_CBQ_LSS_MINIDLE)
1253                 cl->minidle = -(long)lss->minidle;
1254         if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1255                 cl->maxidle = lss->maxidle;
1256                 cl->avgidle = lss->maxidle;
1257         }
1258         if (lss->change & TCF_CBQ_LSS_OFFTIME)
1259                 cl->offtime = lss->offtime;
1260         return 0;
1261 }
1262
1263 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1264 {
1265         q->nclasses[cl->priority]--;
1266         q->quanta[cl->priority] -= cl->weight;
1267         cbq_normalize_quanta(q, cl->priority);
1268 }
1269
1270 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1271 {
1272         q->nclasses[cl->priority]++;
1273         q->quanta[cl->priority] += cl->weight;
1274         cbq_normalize_quanta(q, cl->priority);
1275 }
1276
1277 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1278 {
1279         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1280
1281         if (wrr->allot)
1282                 cl->allot = wrr->allot;
1283         if (wrr->weight)
1284                 cl->weight = wrr->weight;
1285         if (wrr->priority) {
1286                 cl->priority = wrr->priority - 1;
1287                 cl->cpriority = cl->priority;
1288                 if (cl->priority >= cl->priority2)
1289                         cl->priority2 = TC_CBQ_MAXPRIO - 1;
1290         }
1291
1292         cbq_addprio(q, cl);
1293         return 0;
1294 }
1295
1296 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1297 {
1298         switch (ovl->strategy) {
1299         case TC_CBQ_OVL_CLASSIC:
1300                 cl->overlimit = cbq_ovl_classic;
1301                 break;
1302         case TC_CBQ_OVL_DELAY:
1303                 cl->overlimit = cbq_ovl_delay;
1304                 break;
1305         case TC_CBQ_OVL_LOWPRIO:
1306                 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1307                     ovl->priority2 - 1 <= cl->priority)
1308                         return -EINVAL;
1309                 cl->priority2 = ovl->priority2 - 1;
1310                 cl->overlimit = cbq_ovl_lowprio;
1311                 break;
1312         case TC_CBQ_OVL_DROP:
1313                 cl->overlimit = cbq_ovl_drop;
1314                 break;
1315         case TC_CBQ_OVL_RCLASSIC:
1316                 cl->overlimit = cbq_ovl_rclassic;
1317                 break;
1318         default:
1319                 return -EINVAL;
1320         }
1321         cl->penalty = ovl->penalty;
1322         return 0;
1323 }
1324
1325 #ifdef CONFIG_NET_CLS_ACT
1326 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1327 {
1328         cl->police = p->police;
1329
1330         if (cl->q->handle) {
1331                 if (p->police == TC_POLICE_RECLASSIFY)
1332                         cl->q->reshape_fail = cbq_reshape_fail;
1333                 else
1334                         cl->q->reshape_fail = NULL;
1335         }
1336         return 0;
1337 }
1338 #endif
1339
1340 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1341 {
1342         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1343         return 0;
1344 }
1345
1346 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1347         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1348         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1349         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1350         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1351         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1352         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1353         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1354 };
1355
1356 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1357 {
1358         struct cbq_sched_data *q = qdisc_priv(sch);
1359         struct nlattr *tb[TCA_CBQ_MAX + 1];
1360         struct tc_ratespec *r;
1361         int err;
1362
1363         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1364         if (err < 0)
1365                 return err;
1366
1367         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1368                 return -EINVAL;
1369
1370         r = nla_data(tb[TCA_CBQ_RATE]);
1371
1372         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1373                 return -EINVAL;
1374
1375         err = qdisc_class_hash_init(&q->clhash);
1376         if (err < 0)
1377                 goto put_rtab;
1378
1379         q->link.refcnt = 1;
1380         q->link.sibling = &q->link;
1381         q->link.common.classid = sch->handle;
1382         q->link.qdisc = sch;
1383         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1384                                       sch->handle);
1385         if (!q->link.q)
1386                 q->link.q = &noop_qdisc;
1387
1388         q->link.priority = TC_CBQ_MAXPRIO - 1;
1389         q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1390         q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1391         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1392         q->link.overlimit = cbq_ovl_classic;
1393         q->link.allot = psched_mtu(qdisc_dev(sch));
1394         q->link.quantum = q->link.allot;
1395         q->link.weight = q->link.R_tab->rate.rate;
1396
1397         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1398         q->link.avpkt = q->link.allot/2;
1399         q->link.minidle = -0x7FFFFFFF;
1400
1401         qdisc_watchdog_init(&q->watchdog, sch);
1402         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1403         q->delay_timer.function = cbq_undelay;
1404         q->toplevel = TC_CBQ_MAXLEVEL;
1405         q->now = psched_get_time();
1406         q->now_rt = q->now;
1407
1408         cbq_link_class(&q->link);
1409
1410         if (tb[TCA_CBQ_LSSOPT])
1411                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1412
1413         cbq_addprio(q, &q->link);
1414         return 0;
1415
1416 put_rtab:
1417         qdisc_put_rtab(q->link.R_tab);
1418         return err;
1419 }
1420
1421 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1422 {
1423         unsigned char *b = skb_tail_pointer(skb);
1424
1425         if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1426                 goto nla_put_failure;
1427         return skb->len;
1428
1429 nla_put_failure:
1430         nlmsg_trim(skb, b);
1431         return -1;
1432 }
1433
1434 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1435 {
1436         unsigned char *b = skb_tail_pointer(skb);
1437         struct tc_cbq_lssopt opt;
1438
1439         opt.flags = 0;
1440         if (cl->borrow == NULL)
1441                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1442         if (cl->share == NULL)
1443                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1444         opt.ewma_log = cl->ewma_log;
1445         opt.level = cl->level;
1446         opt.avpkt = cl->avpkt;
1447         opt.maxidle = cl->maxidle;
1448         opt.minidle = (u32)(-cl->minidle);
1449         opt.offtime = cl->offtime;
1450         opt.change = ~0;
1451         if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1452                 goto nla_put_failure;
1453         return skb->len;
1454
1455 nla_put_failure:
1456         nlmsg_trim(skb, b);
1457         return -1;
1458 }
1459
1460 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1461 {
1462         unsigned char *b = skb_tail_pointer(skb);
1463         struct tc_cbq_wrropt opt;
1464
1465         opt.flags = 0;
1466         opt.allot = cl->allot;
1467         opt.priority = cl->priority + 1;
1468         opt.cpriority = cl->cpriority + 1;
1469         opt.weight = cl->weight;
1470         if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1471                 goto nla_put_failure;
1472         return skb->len;
1473
1474 nla_put_failure:
1475         nlmsg_trim(skb, b);
1476         return -1;
1477 }
1478
1479 static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1480 {
1481         unsigned char *b = skb_tail_pointer(skb);
1482         struct tc_cbq_ovl opt;
1483
1484         opt.strategy = cl->ovl_strategy;
1485         opt.priority2 = cl->priority2 + 1;
1486         opt.pad = 0;
1487         opt.penalty = cl->penalty;
1488         if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1489                 goto nla_put_failure;
1490         return skb->len;
1491
1492 nla_put_failure:
1493         nlmsg_trim(skb, b);
1494         return -1;
1495 }
1496
1497 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1498 {
1499         unsigned char *b = skb_tail_pointer(skb);
1500         struct tc_cbq_fopt opt;
1501
1502         if (cl->split || cl->defmap) {
1503                 opt.split = cl->split ? cl->split->common.classid : 0;
1504                 opt.defmap = cl->defmap;
1505                 opt.defchange = ~0;
1506                 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1507                         goto nla_put_failure;
1508         }
1509         return skb->len;
1510
1511 nla_put_failure:
1512         nlmsg_trim(skb, b);
1513         return -1;
1514 }
1515
1516 #ifdef CONFIG_NET_CLS_ACT
1517 static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1518 {
1519         unsigned char *b = skb_tail_pointer(skb);
1520         struct tc_cbq_police opt;
1521
1522         if (cl->police) {
1523                 opt.police = cl->police;
1524                 opt.__res1 = 0;
1525                 opt.__res2 = 0;
1526                 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1527                         goto nla_put_failure;
1528         }
1529         return skb->len;
1530
1531 nla_put_failure:
1532         nlmsg_trim(skb, b);
1533         return -1;
1534 }
1535 #endif
1536
1537 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1538 {
1539         if (cbq_dump_lss(skb, cl) < 0 ||
1540             cbq_dump_rate(skb, cl) < 0 ||
1541             cbq_dump_wrr(skb, cl) < 0 ||
1542             cbq_dump_ovl(skb, cl) < 0 ||
1543 #ifdef CONFIG_NET_CLS_ACT
1544             cbq_dump_police(skb, cl) < 0 ||
1545 #endif
1546             cbq_dump_fopt(skb, cl) < 0)
1547                 return -1;
1548         return 0;
1549 }
1550
1551 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1552 {
1553         struct cbq_sched_data *q = qdisc_priv(sch);
1554         struct nlattr *nest;
1555
1556         nest = nla_nest_start(skb, TCA_OPTIONS);
1557         if (nest == NULL)
1558                 goto nla_put_failure;
1559         if (cbq_dump_attr(skb, &q->link) < 0)
1560                 goto nla_put_failure;
1561         nla_nest_end(skb, nest);
1562         return skb->len;
1563
1564 nla_put_failure:
1565         nla_nest_cancel(skb, nest);
1566         return -1;
1567 }
1568
1569 static int
1570 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1571 {
1572         struct cbq_sched_data *q = qdisc_priv(sch);
1573
1574         q->link.xstats.avgidle = q->link.avgidle;
1575         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1576 }
1577
1578 static int
1579 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1580                struct sk_buff *skb, struct tcmsg *tcm)
1581 {
1582         struct cbq_class *cl = (struct cbq_class *)arg;
1583         struct nlattr *nest;
1584
1585         if (cl->tparent)
1586                 tcm->tcm_parent = cl->tparent->common.classid;
1587         else
1588                 tcm->tcm_parent = TC_H_ROOT;
1589         tcm->tcm_handle = cl->common.classid;
1590         tcm->tcm_info = cl->q->handle;
1591
1592         nest = nla_nest_start(skb, TCA_OPTIONS);
1593         if (nest == NULL)
1594                 goto nla_put_failure;
1595         if (cbq_dump_attr(skb, cl) < 0)
1596                 goto nla_put_failure;
1597         nla_nest_end(skb, nest);
1598         return skb->len;
1599
1600 nla_put_failure:
1601         nla_nest_cancel(skb, nest);
1602         return -1;
1603 }
1604
1605 static int
1606 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1607         struct gnet_dump *d)
1608 {
1609         struct cbq_sched_data *q = qdisc_priv(sch);
1610         struct cbq_class *cl = (struct cbq_class *)arg;
1611
1612         cl->qstats.qlen = cl->q->q.qlen;
1613         cl->xstats.avgidle = cl->avgidle;
1614         cl->xstats.undertime = 0;
1615
1616         if (cl->undertime != PSCHED_PASTPERFECT)
1617                 cl->xstats.undertime = cl->undertime - q->now;
1618
1619         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1620             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1621             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1622                 return -1;
1623
1624         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1625 }
1626
1627 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1628                      struct Qdisc **old)
1629 {
1630         struct cbq_class *cl = (struct cbq_class *)arg;
1631
1632         if (new == NULL) {
1633                 new = qdisc_create_dflt(sch->dev_queue,
1634                                         &pfifo_qdisc_ops, cl->common.classid);
1635                 if (new == NULL)
1636                         return -ENOBUFS;
1637         } else {
1638 #ifdef CONFIG_NET_CLS_ACT
1639                 if (cl->police == TC_POLICE_RECLASSIFY)
1640                         new->reshape_fail = cbq_reshape_fail;
1641 #endif
1642         }
1643         sch_tree_lock(sch);
1644         *old = cl->q;
1645         cl->q = new;
1646         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1647         qdisc_reset(*old);
1648         sch_tree_unlock(sch);
1649
1650         return 0;
1651 }
1652
1653 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1654 {
1655         struct cbq_class *cl = (struct cbq_class *)arg;
1656
1657         return cl->q;
1658 }
1659
1660 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1661 {
1662         struct cbq_class *cl = (struct cbq_class *)arg;
1663
1664         if (cl->q->q.qlen == 0)
1665                 cbq_deactivate_class(cl);
1666 }
1667
1668 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1669 {
1670         struct cbq_sched_data *q = qdisc_priv(sch);
1671         struct cbq_class *cl = cbq_class_lookup(q, classid);
1672
1673         if (cl) {
1674                 cl->refcnt++;
1675                 return (unsigned long)cl;
1676         }
1677         return 0;
1678 }
1679
1680 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1681 {
1682         struct cbq_sched_data *q = qdisc_priv(sch);
1683
1684         WARN_ON(cl->filters);
1685
1686         tcf_destroy_chain(&cl->filter_list);
1687         qdisc_destroy(cl->q);
1688         qdisc_put_rtab(cl->R_tab);
1689         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1690         if (cl != &q->link)
1691                 kfree(cl);
1692 }
1693
1694 static void cbq_destroy(struct Qdisc *sch)
1695 {
1696         struct cbq_sched_data *q = qdisc_priv(sch);
1697         struct hlist_node *next;
1698         struct cbq_class *cl;
1699         unsigned int h;
1700
1701 #ifdef CONFIG_NET_CLS_ACT
1702         q->rx_class = NULL;
1703 #endif
1704         /*
1705          * Filters must be destroyed first because we don't destroy the
1706          * classes from root to leafs which means that filters can still
1707          * be bound to classes which have been destroyed already. --TGR '04
1708          */
1709         for (h = 0; h < q->clhash.hashsize; h++) {
1710                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
1711                         tcf_destroy_chain(&cl->filter_list);
1712         }
1713         for (h = 0; h < q->clhash.hashsize; h++) {
1714                 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1715                                           common.hnode)
1716                         cbq_destroy_class(sch, cl);
1717         }
1718         qdisc_class_hash_destroy(&q->clhash);
1719 }
1720
1721 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1722 {
1723         struct cbq_class *cl = (struct cbq_class *)arg;
1724
1725         if (--cl->refcnt == 0) {
1726 #ifdef CONFIG_NET_CLS_ACT
1727                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1728                 struct cbq_sched_data *q = qdisc_priv(sch);
1729
1730                 spin_lock_bh(root_lock);
1731                 if (q->rx_class == cl)
1732                         q->rx_class = NULL;
1733                 spin_unlock_bh(root_lock);
1734 #endif
1735
1736                 cbq_destroy_class(sch, cl);
1737         }
1738 }
1739
1740 static int
1741 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1742                  unsigned long *arg)
1743 {
1744         int err;
1745         struct cbq_sched_data *q = qdisc_priv(sch);
1746         struct cbq_class *cl = (struct cbq_class *)*arg;
1747         struct nlattr *opt = tca[TCA_OPTIONS];
1748         struct nlattr *tb[TCA_CBQ_MAX + 1];
1749         struct cbq_class *parent;
1750         struct qdisc_rate_table *rtab = NULL;
1751
1752         if (opt == NULL)
1753                 return -EINVAL;
1754
1755         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1756         if (err < 0)
1757                 return err;
1758
1759         if (cl) {
1760                 /* Check parent */
1761                 if (parentid) {
1762                         if (cl->tparent &&
1763                             cl->tparent->common.classid != parentid)
1764                                 return -EINVAL;
1765                         if (!cl->tparent && parentid != TC_H_ROOT)
1766                                 return -EINVAL;
1767                 }
1768
1769                 if (tb[TCA_CBQ_RATE]) {
1770                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1771                                               tb[TCA_CBQ_RTAB]);
1772                         if (rtab == NULL)
1773                                 return -EINVAL;
1774                 }
1775
1776                 if (tca[TCA_RATE]) {
1777                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1778                                                     qdisc_root_sleeping_lock(sch),
1779                                                     tca[TCA_RATE]);
1780                         if (err) {
1781                                 if (rtab)
1782                                         qdisc_put_rtab(rtab);
1783                                 return err;
1784                         }
1785                 }
1786
1787                 /* Change class parameters */
1788                 sch_tree_lock(sch);
1789
1790                 if (cl->next_alive != NULL)
1791                         cbq_deactivate_class(cl);
1792
1793                 if (rtab) {
1794                         qdisc_put_rtab(cl->R_tab);
1795                         cl->R_tab = rtab;
1796                 }
1797
1798                 if (tb[TCA_CBQ_LSSOPT])
1799                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1800
1801                 if (tb[TCA_CBQ_WRROPT]) {
1802                         cbq_rmprio(q, cl);
1803                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1804                 }
1805
1806                 if (tb[TCA_CBQ_OVL_STRATEGY])
1807                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1808
1809 #ifdef CONFIG_NET_CLS_ACT
1810                 if (tb[TCA_CBQ_POLICE])
1811                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1812 #endif
1813
1814                 if (tb[TCA_CBQ_FOPT])
1815                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1816
1817                 if (cl->q->q.qlen)
1818                         cbq_activate_class(cl);
1819
1820                 sch_tree_unlock(sch);
1821
1822                 return 0;
1823         }
1824
1825         if (parentid == TC_H_ROOT)
1826                 return -EINVAL;
1827
1828         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1829             tb[TCA_CBQ_LSSOPT] == NULL)
1830                 return -EINVAL;
1831
1832         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1833         if (rtab == NULL)
1834                 return -EINVAL;
1835
1836         if (classid) {
1837                 err = -EINVAL;
1838                 if (TC_H_MAJ(classid ^ sch->handle) ||
1839                     cbq_class_lookup(q, classid))
1840                         goto failure;
1841         } else {
1842                 int i;
1843                 classid = TC_H_MAKE(sch->handle, 0x8000);
1844
1845                 for (i = 0; i < 0x8000; i++) {
1846                         if (++q->hgenerator >= 0x8000)
1847                                 q->hgenerator = 1;
1848                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1849                                 break;
1850                 }
1851                 err = -ENOSR;
1852                 if (i >= 0x8000)
1853                         goto failure;
1854                 classid = classid|q->hgenerator;
1855         }
1856
1857         parent = &q->link;
1858         if (parentid) {
1859                 parent = cbq_class_lookup(q, parentid);
1860                 err = -EINVAL;
1861                 if (parent == NULL)
1862                         goto failure;
1863         }
1864
1865         err = -ENOBUFS;
1866         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1867         if (cl == NULL)
1868                 goto failure;
1869
1870         if (tca[TCA_RATE]) {
1871                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1872                                         qdisc_root_sleeping_lock(sch),
1873                                         tca[TCA_RATE]);
1874                 if (err) {
1875                         kfree(cl);
1876                         goto failure;
1877                 }
1878         }
1879
1880         cl->R_tab = rtab;
1881         rtab = NULL;
1882         cl->refcnt = 1;
1883         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1884         if (!cl->q)
1885                 cl->q = &noop_qdisc;
1886         cl->common.classid = classid;
1887         cl->tparent = parent;
1888         cl->qdisc = sch;
1889         cl->allot = parent->allot;
1890         cl->quantum = cl->allot;
1891         cl->weight = cl->R_tab->rate.rate;
1892
1893         sch_tree_lock(sch);
1894         cbq_link_class(cl);
1895         cl->borrow = cl->tparent;
1896         if (cl->tparent != &q->link)
1897                 cl->share = cl->tparent;
1898         cbq_adjust_levels(parent);
1899         cl->minidle = -0x7FFFFFFF;
1900         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1901         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1902         if (cl->ewma_log == 0)
1903                 cl->ewma_log = q->link.ewma_log;
1904         if (cl->maxidle == 0)
1905                 cl->maxidle = q->link.maxidle;
1906         if (cl->avpkt == 0)
1907                 cl->avpkt = q->link.avpkt;
1908         cl->overlimit = cbq_ovl_classic;
1909         if (tb[TCA_CBQ_OVL_STRATEGY])
1910                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1911 #ifdef CONFIG_NET_CLS_ACT
1912         if (tb[TCA_CBQ_POLICE])
1913                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1914 #endif
1915         if (tb[TCA_CBQ_FOPT])
1916                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1917         sch_tree_unlock(sch);
1918
1919         qdisc_class_hash_grow(sch, &q->clhash);
1920
1921         *arg = (unsigned long)cl;
1922         return 0;
1923
1924 failure:
1925         qdisc_put_rtab(rtab);
1926         return err;
1927 }
1928
1929 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1930 {
1931         struct cbq_sched_data *q = qdisc_priv(sch);
1932         struct cbq_class *cl = (struct cbq_class *)arg;
1933         unsigned int qlen;
1934
1935         if (cl->filters || cl->children || cl == &q->link)
1936                 return -EBUSY;
1937
1938         sch_tree_lock(sch);
1939
1940         qlen = cl->q->q.qlen;
1941         qdisc_reset(cl->q);
1942         qdisc_tree_decrease_qlen(cl->q, qlen);
1943
1944         if (cl->next_alive)
1945                 cbq_deactivate_class(cl);
1946
1947         if (q->tx_borrowed == cl)
1948                 q->tx_borrowed = q->tx_class;
1949         if (q->tx_class == cl) {
1950                 q->tx_class = NULL;
1951                 q->tx_borrowed = NULL;
1952         }
1953 #ifdef CONFIG_NET_CLS_ACT
1954         if (q->rx_class == cl)
1955                 q->rx_class = NULL;
1956 #endif
1957
1958         cbq_unlink_class(cl);
1959         cbq_adjust_levels(cl->tparent);
1960         cl->defmap = 0;
1961         cbq_sync_defmap(cl);
1962
1963         cbq_rmprio(q, cl);
1964         sch_tree_unlock(sch);
1965
1966         BUG_ON(--cl->refcnt == 0);
1967         /*
1968          * This shouldn't happen: we "hold" one cops->get() when called
1969          * from tc_ctl_tclass; the destroy method is done from cops->put().
1970          */
1971
1972         return 0;
1973 }
1974
1975 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1976 {
1977         struct cbq_sched_data *q = qdisc_priv(sch);
1978         struct cbq_class *cl = (struct cbq_class *)arg;
1979
1980         if (cl == NULL)
1981                 cl = &q->link;
1982
1983         return &cl->filter_list;
1984 }
1985
1986 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1987                                      u32 classid)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *p = (struct cbq_class *)parent;
1991         struct cbq_class *cl = cbq_class_lookup(q, classid);
1992
1993         if (cl) {
1994                 if (p && p->level <= cl->level)
1995                         return 0;
1996                 cl->filters++;
1997                 return (unsigned long)cl;
1998         }
1999         return 0;
2000 }
2001
2002 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2003 {
2004         struct cbq_class *cl = (struct cbq_class *)arg;
2005
2006         cl->filters--;
2007 }
2008
2009 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2010 {
2011         struct cbq_sched_data *q = qdisc_priv(sch);
2012         struct cbq_class *cl;
2013         unsigned int h;
2014
2015         if (arg->stop)
2016                 return;
2017
2018         for (h = 0; h < q->clhash.hashsize; h++) {
2019                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
2020                         if (arg->count < arg->skip) {
2021                                 arg->count++;
2022                                 continue;
2023                         }
2024                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2025                                 arg->stop = 1;
2026                                 return;
2027                         }
2028                         arg->count++;
2029                 }
2030         }
2031 }
2032
2033 static const struct Qdisc_class_ops cbq_class_ops = {
2034         .graft          =       cbq_graft,
2035         .leaf           =       cbq_leaf,
2036         .qlen_notify    =       cbq_qlen_notify,
2037         .get            =       cbq_get,
2038         .put            =       cbq_put,
2039         .change         =       cbq_change_class,
2040         .delete         =       cbq_delete,
2041         .walk           =       cbq_walk,
2042         .tcf_chain      =       cbq_find_tcf,
2043         .bind_tcf       =       cbq_bind_filter,
2044         .unbind_tcf     =       cbq_unbind_filter,
2045         .dump           =       cbq_dump_class,
2046         .dump_stats     =       cbq_dump_class_stats,
2047 };
2048
2049 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2050         .next           =       NULL,
2051         .cl_ops         =       &cbq_class_ops,
2052         .id             =       "cbq",
2053         .priv_size      =       sizeof(struct cbq_sched_data),
2054         .enqueue        =       cbq_enqueue,
2055         .dequeue        =       cbq_dequeue,
2056         .peek           =       qdisc_peek_dequeued,
2057         .drop           =       cbq_drop,
2058         .init           =       cbq_init,
2059         .reset          =       cbq_reset,
2060         .destroy        =       cbq_destroy,
2061         .change         =       NULL,
2062         .dump           =       cbq_dump,
2063         .dump_stats     =       cbq_dump_stats,
2064         .owner          =       THIS_MODULE,
2065 };
2066
2067 static int __init cbq_module_init(void)
2068 {
2069         return register_qdisc(&cbq_qdisc_ops);
2070 }
2071 static void __exit cbq_module_exit(void)
2072 {
2073         unregister_qdisc(&cbq_qdisc_ops);
2074 }
2075 module_init(cbq_module_init)
2076 module_exit(cbq_module_exit)
2077 MODULE_LICENSE("GPL");