net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class
  76 {
  77         struct Qdisc_class_common common;
  78         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  79
  80 /* Parameters */
  81         unsigned char           priority;       /* class priority */
  82         unsigned char           priority2;      /* priority to be used after overlimit */
  83         unsigned char           ewma_log;       /* time constant for idle time calculation */
  84         unsigned char           ovl_strategy;
  85 #ifdef CONFIG_NET_CLS_ACT
  86         unsigned char           police;
  87 #endif
  88
  89         u32                     defmap;
  90
  91         /* Link-sharing scheduler parameters */
  92         long                    maxidle;        /* Class parameters: see below. */
  93         long                    offtime;
  94         long                    minidle;
  95         u32                     avpkt;
  96         struct qdisc_rate_table *R_tab;
  97
  98         /* Overlimit strategy parameters */
  99         void                    (*overlimit)(struct cbq_class *cl);
 100         psched_tdiff_t          penalty;
 101
 102         /* General scheduler (WRR) parameters */
 103         long                    allot;
 104         long                    quantum;        /* Allotment per WRR round */
 105         long                    weight;         /* Relative allotment: see below */
 106
 107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108         struct cbq_class        *split;         /* Ptr to split node */
 109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                    parent otherwise */
 113         struct cbq_class        *sibling;       /* Sibling chain */
 114         struct cbq_class        *children;      /* Pointer to children chain */
 115
 116         struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119 /* Variables */
 120         unsigned char           cpriority;      /* Effective priority */
 121         unsigned char           delayed;
 122         unsigned char           level;          /* level of the class in hierarchy:
 123                                                    0 for leaf classes, and maximal
 124                                                    level of children + 1 for nodes.
 125                                                  */
 126
 127         psched_time_t           last;           /* Last end of service */
 128         psched_time_t           undertime;
 129         long                    avgidle;
 130         long                    deficit;        /* Saved deficit for WRR */
 131         psched_time_t           penalized;
 132         struct gnet_stats_basic_packed bstats;
 133         struct gnet_stats_queue qstats;
 134         struct gnet_stats_rate_est rate_est;
 135         struct tc_cbq_xstats    xstats;
 136
 137         struct tcf_proto        *filter_list;
 138
 139         int                     refcnt;
 140         int                     filters;
 141
 142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143 };
 144
 145 struct cbq_sched_data
 146 {
 147         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 148         int                     nclasses[TC_CBQ_MAXPRIO+1];
 149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151         struct cbq_class        link;
 152
 153         unsigned                activemask;
 154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                    with backlog */
 156
 157 #ifdef CONFIG_NET_CLS_ACT
 158         struct cbq_class        *rx_class;
 159 #endif
 160         struct cbq_class        *tx_class;
 161         struct cbq_class        *tx_borrowed;
 162         int                     tx_len;
 163         psched_time_t           now;            /* Cached timestamp */
 164         psched_time_t           now_rt;         /* Cached real time */
 165         unsigned                pmask;
 166
 167         struct hrtimer          delay_timer;
 168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                    started when CBQ has
 170                                                    backlog, but cannot
 171                                                    transmit just now */
 172         psched_tdiff_t          wd_expires;
 173         int                     toplevel;
 174         u32                     hgenerator;
 175 };
 176
 177
 178 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180 static __inline__ struct cbq_class *
 181 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 182 {
 183         struct Qdisc_class_common *clc;
 184
 185         clc = qdisc_class_find(&q->clhash, classid);
 186         if (clc == NULL)
 187                 return NULL;
 188         return container_of(clc, struct cbq_class, common);
 189 }
 190
 191 #ifdef CONFIG_NET_CLS_ACT
 192
 193 static struct cbq_class *
 194 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 195 {
 196         struct cbq_class *cl, *new;
 197
 198         for (cl = this->tparent; cl; cl = cl->tparent)
 199                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 200                         return new;
 201
 202         return NULL;
 203 }
 204
 205 #endif
 206
 207 /* Classify packet. The procedure is pretty complicated, but
 208    it allows us to combine link sharing and priority scheduling
 209    transparently.
 210
 211    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212    so that it resolves to split nodes. Then packets are classified
 213    by logical priority, or a more specific classifier may be attached
 214    to the split node.
 215  */
 216
 217 static struct cbq_class *
 218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219 {
 220         struct cbq_sched_data *q = qdisc_priv(sch);
 221         struct cbq_class *head = &q->link;
 222         struct cbq_class **defmap;
 223         struct cbq_class *cl = NULL;
 224         u32 prio = skb->priority;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 /*
 240                  * Step 2+n. Apply classifier.
 241                  */
 242                 if (!head->filter_list ||
 243                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                         goto fallback;
 245
 246                 if ((cl = (void*)res.class) == NULL) {
 247                         if (TC_H_MAJ(res.classid))
 248                                 cl = cbq_class_lookup(q, res.classid);
 249                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 250                                 cl = defmap[TC_PRIO_BESTEFFORT];
 251
 252                         if (cl == NULL || cl->level >= head->level)
 253                                 goto fallback;
 254                 }
 255
 256 #ifdef CONFIG_NET_CLS_ACT
 257                 switch (result) {
 258                 case TC_ACT_QUEUED:
 259                 case TC_ACT_STOLEN:
 260                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 261                 case TC_ACT_SHOT:
 262                         return NULL;
 263                 case TC_ACT_RECLASSIFY:
 264                         return cbq_reclassify(skb, cl);
 265                 }
 266 #endif
 267                 if (cl->level == 0)
 268                         return cl;
 269
 270                 /*
 271                  * Step 3+n. If classifier selected a link sharing class,
 272                  *         apply agency specific classifier.
 273                  *         Repeat this procdure until we hit a leaf node.
 274                  */
 275                 head = cl;
 276         }
 277
 278 fallback:
 279         cl = head;
 280
 281         /*
 282          * Step 4. No success...
 283          */
 284         if (TC_H_MAJ(prio) == 0 &&
 285             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 286             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 287                 return head;
 288
 289         return cl;
 290 }
 291
 292 /*
 293    A packet has just been enqueued on the empty class.
 294    cbq_activate_class adds it to the tail of active class list
 295    of its priority band.
 296  */
 297
 298 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 299 {
 300         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 301         int prio = cl->cpriority;
 302         struct cbq_class *cl_tail;
 303
 304         cl_tail = q->active[prio];
 305         q->active[prio] = cl;
 306
 307         if (cl_tail != NULL) {
 308                 cl->next_alive = cl_tail->next_alive;
 309                 cl_tail->next_alive = cl;
 310         } else {
 311                 cl->next_alive = cl;
 312                 q->activemask |= (1<<prio);
 313         }
 314 }
 315
 316 /*
 317    Unlink class from active chain.
 318    Note that this same procedure is done directly in cbq_dequeue*
 319    during round-robin procedure.
 320  */
 321
 322 static void cbq_deactivate_class(struct cbq_class *this)
 323 {
 324         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 325         int prio = this->cpriority;
 326         struct cbq_class *cl;
 327         struct cbq_class *cl_prev = q->active[prio];
 328
 329         do {
 330                 cl = cl_prev->next_alive;
 331                 if (cl == this) {
 332                         cl_prev->next_alive = cl->next_alive;
 333                         cl->next_alive = NULL;
 334
 335                         if (cl == q->active[prio]) {
 336                                 q->active[prio] = cl_prev;
 337                                 if (cl == q->active[prio]) {
 338                                         q->active[prio] = NULL;
 339                                         q->activemask &= ~(1<<prio);
 340                                         return;
 341                                 }
 342                         }
 343                         return;
 344                 }
 345         } while ((cl_prev = cl) != q->active[prio]);
 346 }
 347
 348 static void
 349 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 350 {
 351         int toplevel = q->toplevel;
 352
 353         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 354                 psched_time_t now;
 355                 psched_tdiff_t incr;
 356
 357                 now = psched_get_time();
 358                 incr = now - q->now_rt;
 359                 now = q->now + incr;
 360
 361                 do {
 362                         if (cl->undertime < now) {
 363                                 q->toplevel = cl->level;
 364                                 return;
 365                         }
 366                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 367         }
 368 }
 369
 370 static int
 371 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 372 {
 373         struct cbq_sched_data *q = qdisc_priv(sch);
 374         int uninitialized_var(ret);
 375         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377 #ifdef CONFIG_NET_CLS_ACT
 378         q->rx_class = cl;
 379 #endif
 380         if (cl == NULL) {
 381                 if (ret & __NET_XMIT_BYPASS)
 382                         sch->qstats.drops++;
 383                 kfree_skb(skb);
 384                 return ret;
 385         }
 386
 387 #ifdef CONFIG_NET_CLS_ACT
 388         cl->q->__parent = sch;
 389 #endif
 390         ret = qdisc_enqueue(skb, cl->q);
 391         if (ret == NET_XMIT_SUCCESS) {
 392                 sch->q.qlen++;
 393                 cbq_mark_toplevel(q, cl);
 394                 if (!cl->next_alive)
 395                         cbq_activate_class(cl);
 396                 return ret;
 397         }
 398
 399         if (net_xmit_drop_count(ret)) {
 400                 sch->qstats.drops++;
 401                 cbq_mark_toplevel(q, cl);
 402                 cl->qstats.drops++;
 403         }
 404         return ret;
 405 }
 406
 407 /* Overlimit actions */
 408
 409 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 410
 411 static void cbq_ovl_classic(struct cbq_class *cl)
 412 {
 413         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 414         psched_tdiff_t delay = cl->undertime - q->now;
 415
 416         if (!cl->delayed) {
 417                 delay += cl->offtime;
 418
 419                 /*
 420                    Class goes to sleep, so that it will have no
 421                    chance to work avgidle. Let's forgive it 8)
 422
 423                    BTW cbq-2.0 has a crap in this
 424                    place, apparently they forgot to shift it by cl->ewma_log.
 425                  */
 426                 if (cl->avgidle < 0)
 427                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 428                 if (cl->avgidle < cl->minidle)
 429                         cl->avgidle = cl->minidle;
 430                 if (delay <= 0)
 431                         delay = 1;
 432                 cl->undertime = q->now + delay;
 433
 434                 cl->xstats.overactions++;
 435                 cl->delayed = 1;
 436         }
 437         if (q->wd_expires == 0 || q->wd_expires > delay)
 438                 q->wd_expires = delay;
 439
 440         /* Dirty work! We must schedule wakeups based on
 441            real available rate, rather than leaf rate,
 442            which may be tiny (even zero).
 443          */
 444         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 445                 struct cbq_class *b;
 446                 psched_tdiff_t base_delay = q->wd_expires;
 447
 448                 for (b = cl->borrow; b; b = b->borrow) {
 449                         delay = b->undertime - q->now;
 450                         if (delay < base_delay) {
 451                                 if (delay <= 0)
 452                                         delay = 1;
 453                                 base_delay = delay;
 454                         }
 455                 }
 456
 457                 q->wd_expires = base_delay;
 458         }
 459 }
 460
 461 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 462    they go overlimit
 463  */
 464
 465 static void cbq_ovl_rclassic(struct cbq_class *cl)
 466 {
 467         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 468         struct cbq_class *this = cl;
 469
 470         do {
 471                 if (cl->level > q->toplevel) {
 472                         cl = NULL;
 473                         break;
 474                 }
 475         } while ((cl = cl->borrow) != NULL);
 476
 477         if (cl == NULL)
 478                 cl = this;
 479         cbq_ovl_classic(cl);
 480 }
 481
 482 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 483
 484 static void cbq_ovl_delay(struct cbq_class *cl)
 485 {
 486         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 487         psched_tdiff_t delay = cl->undertime - q->now;
 488
 489         if (test_bit(__QDISC_STATE_DEACTIVATED,
 490                      &qdisc_root_sleeping(cl->qdisc)->state))
 491                 return;
 492
 493         if (!cl->delayed) {
 494                 psched_time_t sched = q->now;
 495                 ktime_t expires;
 496
 497                 delay += cl->offtime;
 498                 if (cl->avgidle < 0)
 499                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 500                 if (cl->avgidle < cl->minidle)
 501                         cl->avgidle = cl->minidle;
 502                 cl->undertime = q->now + delay;
 503
 504                 if (delay > 0) {
 505                         sched += delay + cl->penalty;
 506                         cl->penalized = sched;
 507                         cl->cpriority = TC_CBQ_MAXPRIO;
 508                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 509
 510                         expires = ktime_set(0, 0);
 511                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 512                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 513                             ktime_to_ns(ktime_sub(
 514                                         hrtimer_get_expires(&q->delay_timer),
 515                                         expires)) > 0)
 516                                 hrtimer_set_expires(&q->delay_timer, expires);
 517                         hrtimer_restart(&q->delay_timer);
 518                         cl->delayed = 1;
 519                         cl->xstats.overactions++;
 520                         return;
 521                 }
 522                 delay = 1;
 523         }
 524         if (q->wd_expires == 0 || q->wd_expires > delay)
 525                 q->wd_expires = delay;
 526 }
 527
 528 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 529
 530 static void cbq_ovl_lowprio(struct cbq_class *cl)
 531 {
 532         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 533
 534         cl->penalized = q->now + cl->penalty;
 535
 536         if (cl->cpriority != cl->priority2) {
 537                 cl->cpriority = cl->priority2;
 538                 q->pmask |= (1<<cl->cpriority);
 539                 cl->xstats.overactions++;
 540         }
 541         cbq_ovl_classic(cl);
 542 }
 543
 544 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 545
 546 static void cbq_ovl_drop(struct cbq_class *cl)
 547 {
 548         if (cl->q->ops->drop)
 549                 if (cl->q->ops->drop(cl->q))
 550                         cl->qdisc->q.qlen--;
 551         cl->xstats.overactions++;
 552         cbq_ovl_classic(cl);
 553 }
 554
 555 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 556                                        psched_time_t now)
 557 {
 558         struct cbq_class *cl;
 559         struct cbq_class *cl_prev = q->active[prio];
 560         psched_time_t sched = now;
 561
 562         if (cl_prev == NULL)
 563                 return 0;
 564
 565         do {
 566                 cl = cl_prev->next_alive;
 567                 if (now - cl->penalized > 0) {
 568                         cl_prev->next_alive = cl->next_alive;
 569                         cl->next_alive = NULL;
 570                         cl->cpriority = cl->priority;
 571                         cl->delayed = 0;
 572                         cbq_activate_class(cl);
 573
 574                         if (cl == q->active[prio]) {
 575                                 q->active[prio] = cl_prev;
 576                                 if (cl == q->active[prio]) {
 577                                         q->active[prio] = NULL;
 578                                         return 0;
 579                                 }
 580                         }
 581
 582                         cl = cl_prev->next_alive;
 583                 } else if (sched - cl->penalized > 0)
 584                         sched = cl->penalized;
 585         } while ((cl_prev = cl) != q->active[prio]);
 586
 587         return sched - now;
 588 }
 589
 590 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 591 {
 592         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 593                                                 delay_timer);
 594         struct Qdisc *sch = q->watchdog.qdisc;
 595         psched_time_t now;
 596         psched_tdiff_t delay = 0;
 597         unsigned pmask;
 598
 599         now = psched_get_time();
 600
 601         pmask = q->pmask;
 602         q->pmask = 0;
 603
 604         while (pmask) {
 605                 int prio = ffz(~pmask);
 606                 psched_tdiff_t tmp;
 607
 608                 pmask &= ~(1<<prio);
 609
 610                 tmp = cbq_undelay_prio(q, prio, now);
 611                 if (tmp > 0) {
 612                         q->pmask |= 1<<prio;
 613                         if (tmp < delay || delay == 0)
 614                                 delay = tmp;
 615                 }
 616         }
 617
 618         if (delay) {
 619                 ktime_t time;
 620
 621                 time = ktime_set(0, 0);
 622                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 623                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 624         }
 625
 626         sch->flags &= ~TCQ_F_THROTTLED;
 627         __netif_schedule(qdisc_root(sch));
 628         return HRTIMER_NORESTART;
 629 }
 630
 631 #ifdef CONFIG_NET_CLS_ACT
 632 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 633 {
 634         struct Qdisc *sch = child->__parent;
 635         struct cbq_sched_data *q = qdisc_priv(sch);
 636         struct cbq_class *cl = q->rx_class;
 637
 638         q->rx_class = NULL;
 639
 640         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 641                 int ret;
 642
 643                 cbq_mark_toplevel(q, cl);
 644
 645                 q->rx_class = cl;
 646                 cl->q->__parent = sch;
 647
 648                 ret = qdisc_enqueue(skb, cl->q);
 649                 if (ret == NET_XMIT_SUCCESS) {
 650                         sch->q.qlen++;
 651                         if (!cl->next_alive)
 652                                 cbq_activate_class(cl);
 653                         return 0;
 654                 }
 655                 if (net_xmit_drop_count(ret))
 656                         sch->qstats.drops++;
 657                 return 0;
 658         }
 659
 660         sch->qstats.drops++;
 661         return -1;
 662 }
 663 #endif
 664
 665 /*
 666    It is mission critical procedure.
 667
 668    We "regenerate" toplevel cutoff, if transmitting class
 669    has backlog and it is not regulated. It is not part of
 670    original CBQ description, but looks more reasonable.
 671    Probably, it is wrong. This question needs further investigation.
 672 */
 673
 674 static __inline__ void
 675 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 676                     struct cbq_class *borrowed)
 677 {
 678         if (cl && q->toplevel >= borrowed->level) {
 679                 if (cl->q->q.qlen > 1) {
 680                         do {
 681                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 682                                         q->toplevel = borrowed->level;
 683                                         return;
 684                                 }
 685                         } while ((borrowed=borrowed->borrow) != NULL);
 686                 }
 687 #if 0
 688         /* It is not necessary now. Uncommenting it
 689            will save CPU cycles, but decrease fairness.
 690          */
 691                 q->toplevel = TC_CBQ_MAXLEVEL;
 692 #endif
 693         }
 694 }
 695
 696 static void
 697 cbq_update(struct cbq_sched_data *q)
 698 {
 699         struct cbq_class *this = q->tx_class;
 700         struct cbq_class *cl = this;
 701         int len = q->tx_len;
 702
 703         q->tx_class = NULL;
 704
 705         for ( ; cl; cl = cl->share) {
 706                 long avgidle = cl->avgidle;
 707                 long idle;
 708
 709                 cl->bstats.packets++;
 710                 cl->bstats.bytes += len;
 711
 712                 /*
 713                    (now - last) is total time between packet right edges.
 714                    (last_pktlen/rate) is "virtual" busy time, so that
 715
 716                          idle = (now - last) - last_pktlen/rate
 717                  */
 718
 719                 idle = q->now - cl->last;
 720                 if ((unsigned long)idle > 128*1024*1024) {
 721                         avgidle = cl->maxidle;
 722                 } else {
 723                         idle -= L2T(cl, len);
 724
 725                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 726                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 727                    cl->avgidle == true_avgidle/W,
 728                    hence:
 729                  */
 730                         avgidle += idle - (avgidle>>cl->ewma_log);
 731                 }
 732
 733                 if (avgidle <= 0) {
 734                         /* Overlimit or at-limit */
 735
 736                         if (avgidle < cl->minidle)
 737                                 avgidle = cl->minidle;
 738
 739                         cl->avgidle = avgidle;
 740
 741                         /* Calculate expected time, when this class
 742                            will be allowed to send.
 743                            It will occur, when:
 744                            (1-W)*true_avgidle + W*delay = 0, i.e.
 745                            idle = (1/W - 1)*(-true_avgidle)
 746                            or
 747                            idle = (1 - W)*(-cl->avgidle);
 748                          */
 749                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 750
 751                         /*
 752                            That is not all.
 753                            To maintain the rate allocated to the class,
 754                            we add to undertime virtual clock,
 755                            necessary to complete transmitted packet.
 756                            (len/phys_bandwidth has been already passed
 757                            to the moment of cbq_update)
 758                          */
 759
 760                         idle -= L2T(&q->link, len);
 761                         idle += L2T(cl, len);
 762
 763                         cl->undertime = q->now + idle;
 764                 } else {
 765                         /* Underlimit */
 766
 767                         cl->undertime = PSCHED_PASTPERFECT;
 768                         if (avgidle > cl->maxidle)
 769                                 cl->avgidle = cl->maxidle;
 770                         else
 771                                 cl->avgidle = avgidle;
 772                 }
 773                 cl->last = q->now;
 774         }
 775
 776         cbq_update_toplevel(q, this, q->tx_borrowed);
 777 }
 778
 779 static __inline__ struct cbq_class *
 780 cbq_under_limit(struct cbq_class *cl)
 781 {
 782         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 783         struct cbq_class *this_cl = cl;
 784
 785         if (cl->tparent == NULL)
 786                 return cl;
 787
 788         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 789                 cl->delayed = 0;
 790                 return cl;
 791         }
 792
 793         do {
 794                 /* It is very suspicious place. Now overlimit
 795                    action is generated for not bounded classes
 796                    only if link is completely congested.
 797                    Though it is in agree with ancestor-only paradigm,
 798                    it looks very stupid. Particularly,
 799                    it means that this chunk of code will either
 800                    never be called or result in strong amplification
 801                    of burstiness. Dangerous, silly, and, however,
 802                    no another solution exists.
 803                  */
 804                 if ((cl = cl->borrow) == NULL) {
 805                         this_cl->qstats.overlimits++;
 806                         this_cl->overlimit(this_cl);
 807                         return NULL;
 808                 }
 809                 if (cl->level > q->toplevel)
 810                         return NULL;
 811         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 812
 813         cl->delayed = 0;
 814         return cl;
 815 }
 816
 817 static __inline__ struct sk_buff *
 818 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 819 {
 820         struct cbq_sched_data *q = qdisc_priv(sch);
 821         struct cbq_class *cl_tail, *cl_prev, *cl;
 822         struct sk_buff *skb;
 823         int deficit;
 824
 825         cl_tail = cl_prev = q->active[prio];
 826         cl = cl_prev->next_alive;
 827
 828         do {
 829                 deficit = 0;
 830
 831                 /* Start round */
 832                 do {
 833                         struct cbq_class *borrow = cl;
 834
 835                         if (cl->q->q.qlen &&
 836                             (borrow = cbq_under_limit(cl)) == NULL)
 837                                 goto skip_class;
 838
 839                         if (cl->deficit <= 0) {
 840                                 /* Class exhausted its allotment per
 841                                    this round. Switch to the next one.
 842                                  */
 843                                 deficit = 1;
 844                                 cl->deficit += cl->quantum;
 845                                 goto next_class;
 846                         }
 847
 848                         skb = cl->q->dequeue(cl->q);
 849
 850                         /* Class did not give us any skb :-(
 851                            It could occur even if cl->q->q.qlen != 0
 852                            f.e. if cl->q == "tbf"
 853                          */
 854                         if (skb == NULL)
 855                                 goto skip_class;
 856
 857                         cl->deficit -= qdisc_pkt_len(skb);
 858                         q->tx_class = cl;
 859                         q->tx_borrowed = borrow;
 860                         if (borrow != cl) {
 861 #ifndef CBQ_XSTATS_BORROWS_BYTES
 862                                 borrow->xstats.borrows++;
 863                                 cl->xstats.borrows++;
 864 #else
 865                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 866                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 867 #endif
 868                         }
 869                         q->tx_len = qdisc_pkt_len(skb);
 870
 871                         if (cl->deficit <= 0) {
 872                                 q->active[prio] = cl;
 873                                 cl = cl->next_alive;
 874                                 cl->deficit += cl->quantum;
 875                         }
 876                         return skb;
 877
 878 skip_class:
 879                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 880                                 /* Class is empty or penalized.
 881                                    Unlink it from active chain.
 882                                  */
 883                                 cl_prev->next_alive = cl->next_alive;
 884                                 cl->next_alive = NULL;
 885
 886                                 /* Did cl_tail point to it? */
 887                                 if (cl == cl_tail) {
 888                                         /* Repair it! */
 889                                         cl_tail = cl_prev;
 890
 891                                         /* Was it the last class in this band? */
 892                                         if (cl == cl_tail) {
 893                                                 /* Kill the band! */
 894                                                 q->active[prio] = NULL;
 895                                                 q->activemask &= ~(1<<prio);
 896                                                 if (cl->q->q.qlen)
 897                                                         cbq_activate_class(cl);
 898                                                 return NULL;
 899                                         }
 900
 901                                         q->active[prio] = cl_tail;
 902                                 }
 903                                 if (cl->q->q.qlen)
 904                                         cbq_activate_class(cl);
 905
 906                                 cl = cl_prev;
 907                         }
 908
 909 next_class:
 910                         cl_prev = cl;
 911                         cl = cl->next_alive;
 912                 } while (cl_prev != cl_tail);
 913         } while (deficit);
 914
 915         q->active[prio] = cl_prev;
 916
 917         return NULL;
 918 }
 919
 920 static __inline__ struct sk_buff *
 921 cbq_dequeue_1(struct Qdisc *sch)
 922 {
 923         struct cbq_sched_data *q = qdisc_priv(sch);
 924         struct sk_buff *skb;
 925         unsigned activemask;
 926
 927         activemask = q->activemask&0xFF;
 928         while (activemask) {
 929                 int prio = ffz(~activemask);
 930                 activemask &= ~(1<<prio);
 931                 skb = cbq_dequeue_prio(sch, prio);
 932                 if (skb)
 933                         return skb;
 934         }
 935         return NULL;
 936 }
 937
 938 static struct sk_buff *
 939 cbq_dequeue(struct Qdisc *sch)
 940 {
 941         struct sk_buff *skb;
 942         struct cbq_sched_data *q = qdisc_priv(sch);
 943         psched_time_t now;
 944         psched_tdiff_t incr;
 945
 946         now = psched_get_time();
 947         incr = now - q->now_rt;
 948
 949         if (q->tx_class) {
 950                 psched_tdiff_t incr2;
 951                 /* Time integrator. We calculate EOS time
 952                    by adding expected packet transmission time.
 953                    If real time is greater, we warp artificial clock,
 954                    so that:
 955
 956                    cbq_time = max(real_time, work);
 957                  */
 958                 incr2 = L2T(&q->link, q->tx_len);
 959                 q->now += incr2;
 960                 cbq_update(q);
 961                 if ((incr -= incr2) < 0)
 962                         incr = 0;
 963         }
 964         q->now += incr;
 965         q->now_rt = now;
 966
 967         for (;;) {
 968                 q->wd_expires = 0;
 969
 970                 skb = cbq_dequeue_1(sch);
 971                 if (skb) {
 972                         qdisc_bstats_update(sch, skb);
 973                         sch->q.qlen--;
 974                         sch->flags &= ~TCQ_F_THROTTLED;
 975                         return skb;
 976                 }
 977
 978                 /* All the classes are overlimit.
 979
 980                    It is possible, if:
 981
 982                    1. Scheduler is empty.
 983                    2. Toplevel cutoff inhibited borrowing.
 984                    3. Root class is overlimit.
 985
 986                    Reset 2d and 3d conditions and retry.
 987
 988                    Note, that NS and cbq-2.0 are buggy, peeking
 989                    an arbitrary class is appropriate for ancestor-only
 990                    sharing, but not for toplevel algorithm.
 991
 992                    Our version is better, but slower, because it requires
 993                    two passes, but it is unavoidable with top-level sharing.
 994                 */
 995
 996                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 997                     q->link.undertime == PSCHED_PASTPERFECT)
 998                         break;
 999
1000                 q->toplevel = TC_CBQ_MAXLEVEL;
1001                 q->link.undertime = PSCHED_PASTPERFECT;
1002         }
1003
1004         /* No packets in scheduler or nobody wants to give them to us :-(
1005            Sigh... start watchdog timer in the last case. */
1006
1007         if (sch->q.qlen) {
1008                 sch->qstats.overlimits++;
1009                 if (q->wd_expires)
1010                         qdisc_watchdog_schedule(&q->watchdog,
1011                                                 now + q->wd_expires);
1012         }
1013         return NULL;
1014 }
1015
1016 /* CBQ class maintanance routines */
1017
1018 static void cbq_adjust_levels(struct cbq_class *this)
1019 {
1020         if (this == NULL)
1021                 return;
1022
1023         do {
1024                 int level = 0;
1025                 struct cbq_class *cl;
1026
1027                 if ((cl = this->children) != NULL) {
1028                         do {
1029                                 if (cl->level > level)
1030                                         level = cl->level;
1031                         } while ((cl = cl->sibling) != this->children);
1032                 }
1033                 this->level = level+1;
1034         } while ((this = this->tparent) != NULL);
1035 }
1036
1037 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1038 {
1039         struct cbq_class *cl;
1040         struct hlist_node *n;
1041         unsigned int h;
1042
1043         if (q->quanta[prio] == 0)
1044                 return;
1045
1046         for (h = 0; h < q->clhash.hashsize; h++) {
1047                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1048                         /* BUGGGG... Beware! This expression suffer of
1049                            arithmetic overflows!
1050                          */
1051                         if (cl->priority == prio) {
1052                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1053                                         q->quanta[prio];
1054                         }
1055                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1056                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1057                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1058                         }
1059                 }
1060         }
1061 }
1062
1063 static void cbq_sync_defmap(struct cbq_class *cl)
1064 {
1065         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1066         struct cbq_class *split = cl->split;
1067         unsigned h;
1068         int i;
1069
1070         if (split == NULL)
1071                 return;
1072
1073         for (i=0; i<=TC_PRIO_MAX; i++) {
1074                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1075                         split->defaults[i] = NULL;
1076         }
1077
1078         for (i=0; i<=TC_PRIO_MAX; i++) {
1079                 int level = split->level;
1080
1081                 if (split->defaults[i])
1082                         continue;
1083
1084                 for (h = 0; h < q->clhash.hashsize; h++) {
1085                         struct hlist_node *n;
1086                         struct cbq_class *c;
1087
1088                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1089                                              common.hnode) {
1090                                 if (c->split == split && c->level < level &&
1091                                     c->defmap&(1<<i)) {
1092                                         split->defaults[i] = c;
1093                                         level = c->level;
1094                                 }
1095                         }
1096                 }
1097         }
1098 }
1099
1100 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1101 {
1102         struct cbq_class *split = NULL;
1103
1104         if (splitid == 0) {
1105                 if ((split = cl->split) == NULL)
1106                         return;
1107                 splitid = split->common.classid;
1108         }
1109
1110         if (split == NULL || split->common.classid != splitid) {
1111                 for (split = cl->tparent; split; split = split->tparent)
1112                         if (split->common.classid == splitid)
1113                                 break;
1114         }
1115
1116         if (split == NULL)
1117                 return;
1118
1119         if (cl->split != split) {
1120                 cl->defmap = 0;
1121                 cbq_sync_defmap(cl);
1122                 cl->split = split;
1123                 cl->defmap = def&mask;
1124         } else
1125                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1126
1127         cbq_sync_defmap(cl);
1128 }
1129
1130 static void cbq_unlink_class(struct cbq_class *this)
1131 {
1132         struct cbq_class *cl, **clp;
1133         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1134
1135         qdisc_class_hash_remove(&q->clhash, &this->common);
1136
1137         if (this->tparent) {
1138                 clp=&this->sibling;
1139                 cl = *clp;
1140                 do {
1141                         if (cl == this) {
1142                                 *clp = cl->sibling;
1143                                 break;
1144                         }
1145                         clp = &cl->sibling;
1146                 } while ((cl = *clp) != this->sibling);
1147
1148                 if (this->tparent->children == this) {
1149                         this->tparent->children = this->sibling;
1150                         if (this->sibling == this)
1151                                 this->tparent->children = NULL;
1152                 }
1153         } else {
1154                 WARN_ON(this->sibling != this);
1155         }
1156 }
1157
1158 static void cbq_link_class(struct cbq_class *this)
1159 {
1160         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1161         struct cbq_class *parent = this->tparent;
1162
1163         this->sibling = this;
1164         qdisc_class_hash_insert(&q->clhash, &this->common);
1165
1166         if (parent == NULL)
1167                 return;
1168
1169         if (parent->children == NULL) {
1170                 parent->children = this;
1171         } else {
1172                 this->sibling = parent->children->sibling;
1173                 parent->children->sibling = this;
1174         }
1175 }
1176
1177 static unsigned int cbq_drop(struct Qdisc* sch)
1178 {
1179         struct cbq_sched_data *q = qdisc_priv(sch);
1180         struct cbq_class *cl, *cl_head;
1181         int prio;
1182         unsigned int len;
1183
1184         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1185                 if ((cl_head = q->active[prio]) == NULL)
1186                         continue;
1187
1188                 cl = cl_head;
1189                 do {
1190                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1191                                 sch->q.qlen--;
1192                                 if (!cl->q->q.qlen)
1193                                         cbq_deactivate_class(cl);
1194                                 return len;
1195                         }
1196                 } while ((cl = cl->next_alive) != cl_head);
1197         }
1198         return 0;
1199 }
1200
1201 static void
1202 cbq_reset(struct Qdisc* sch)
1203 {
1204         struct cbq_sched_data *q = qdisc_priv(sch);
1205         struct cbq_class *cl;
1206         struct hlist_node *n;
1207         int prio;
1208         unsigned h;
1209
1210         q->activemask = 0;
1211         q->pmask = 0;
1212         q->tx_class = NULL;
1213         q->tx_borrowed = NULL;
1214         qdisc_watchdog_cancel(&q->watchdog);
1215         hrtimer_cancel(&q->delay_timer);
1216         q->toplevel = TC_CBQ_MAXLEVEL;
1217         q->now = psched_get_time();
1218         q->now_rt = q->now;
1219
1220         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1221                 q->active[prio] = NULL;
1222
1223         for (h = 0; h < q->clhash.hashsize; h++) {
1224                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1225                         qdisc_reset(cl->q);
1226
1227                         cl->next_alive = NULL;
1228                         cl->undertime = PSCHED_PASTPERFECT;
1229                         cl->avgidle = cl->maxidle;
1230                         cl->deficit = cl->quantum;
1231                         cl->cpriority = cl->priority;
1232                 }
1233         }
1234         sch->q.qlen = 0;
1235 }
1236
1237
1238 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1239 {
1240         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1241                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1242                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1243         }
1244         if (lss->change&TCF_CBQ_LSS_EWMA)
1245                 cl->ewma_log = lss->ewma_log;
1246         if (lss->change&TCF_CBQ_LSS_AVPKT)
1247                 cl->avpkt = lss->avpkt;
1248         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1249                 cl->minidle = -(long)lss->minidle;
1250         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1251                 cl->maxidle = lss->maxidle;
1252                 cl->avgidle = lss->maxidle;
1253         }
1254         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1255                 cl->offtime = lss->offtime;
1256         return 0;
1257 }
1258
1259 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1260 {
1261         q->nclasses[cl->priority]--;
1262         q->quanta[cl->priority] -= cl->weight;
1263         cbq_normalize_quanta(q, cl->priority);
1264 }
1265
1266 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267 {
1268         q->nclasses[cl->priority]++;
1269         q->quanta[cl->priority] += cl->weight;
1270         cbq_normalize_quanta(q, cl->priority);
1271 }
1272
1273 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1274 {
1275         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1276
1277         if (wrr->allot)
1278                 cl->allot = wrr->allot;
1279         if (wrr->weight)
1280                 cl->weight = wrr->weight;
1281         if (wrr->priority) {
1282                 cl->priority = wrr->priority-1;
1283                 cl->cpriority = cl->priority;
1284                 if (cl->priority >= cl->priority2)
1285                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1286         }
1287
1288         cbq_addprio(q, cl);
1289         return 0;
1290 }
1291
1292 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1293 {
1294         switch (ovl->strategy) {
1295         case TC_CBQ_OVL_CLASSIC:
1296                 cl->overlimit = cbq_ovl_classic;
1297                 break;
1298         case TC_CBQ_OVL_DELAY:
1299                 cl->overlimit = cbq_ovl_delay;
1300                 break;
1301         case TC_CBQ_OVL_LOWPRIO:
1302                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1303                     ovl->priority2-1 <= cl->priority)
1304                         return -EINVAL;
1305                 cl->priority2 = ovl->priority2-1;
1306                 cl->overlimit = cbq_ovl_lowprio;
1307                 break;
1308         case TC_CBQ_OVL_DROP:
1309                 cl->overlimit = cbq_ovl_drop;
1310                 break;
1311         case TC_CBQ_OVL_RCLASSIC:
1312                 cl->overlimit = cbq_ovl_rclassic;
1313                 break;
1314         default:
1315                 return -EINVAL;
1316         }
1317         cl->penalty = ovl->penalty;
1318         return 0;
1319 }
1320
1321 #ifdef CONFIG_NET_CLS_ACT
1322 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1323 {
1324         cl->police = p->police;
1325
1326         if (cl->q->handle) {
1327                 if (p->police == TC_POLICE_RECLASSIFY)
1328                         cl->q->reshape_fail = cbq_reshape_fail;
1329                 else
1330                         cl->q->reshape_fail = NULL;
1331         }
1332         return 0;
1333 }
1334 #endif
1335
1336 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1337 {
1338         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1339         return 0;
1340 }
1341
1342 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1343         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1344         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1345         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1346         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1347         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1348         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1349         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1350 };
1351
1352 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1353 {
1354         struct cbq_sched_data *q = qdisc_priv(sch);
1355         struct nlattr *tb[TCA_CBQ_MAX + 1];
1356         struct tc_ratespec *r;
1357         int err;
1358
1359         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1360         if (err < 0)
1361                 return err;
1362
1363         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1364                 return -EINVAL;
1365
1366         r = nla_data(tb[TCA_CBQ_RATE]);
1367
1368         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1369                 return -EINVAL;
1370
1371         err = qdisc_class_hash_init(&q->clhash);
1372         if (err < 0)
1373                 goto put_rtab;
1374
1375         q->link.refcnt = 1;
1376         q->link.sibling = &q->link;
1377         q->link.common.classid = sch->handle;
1378         q->link.qdisc = sch;
1379         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1380                                       sch->handle);
1381         if (!q->link.q)
1382                 q->link.q = &noop_qdisc;
1383
1384         q->link.priority = TC_CBQ_MAXPRIO-1;
1385         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1386         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1387         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1388         q->link.overlimit = cbq_ovl_classic;
1389         q->link.allot = psched_mtu(qdisc_dev(sch));
1390         q->link.quantum = q->link.allot;
1391         q->link.weight = q->link.R_tab->rate.rate;
1392
1393         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1394         q->link.avpkt = q->link.allot/2;
1395         q->link.minidle = -0x7FFFFFFF;
1396
1397         qdisc_watchdog_init(&q->watchdog, sch);
1398         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1399         q->delay_timer.function = cbq_undelay;
1400         q->toplevel = TC_CBQ_MAXLEVEL;
1401         q->now = psched_get_time();
1402         q->now_rt = q->now;
1403
1404         cbq_link_class(&q->link);
1405
1406         if (tb[TCA_CBQ_LSSOPT])
1407                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1408
1409         cbq_addprio(q, &q->link);
1410         return 0;
1411
1412 put_rtab:
1413         qdisc_put_rtab(q->link.R_tab);
1414         return err;
1415 }
1416
1417 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1418 {
1419         unsigned char *b = skb_tail_pointer(skb);
1420
1421         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1422         return skb->len;
1423
1424 nla_put_failure:
1425         nlmsg_trim(skb, b);
1426         return -1;
1427 }
1428
1429 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1430 {
1431         unsigned char *b = skb_tail_pointer(skb);
1432         struct tc_cbq_lssopt opt;
1433
1434         opt.flags = 0;
1435         if (cl->borrow == NULL)
1436                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1437         if (cl->share == NULL)
1438                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1439         opt.ewma_log = cl->ewma_log;
1440         opt.level = cl->level;
1441         opt.avpkt = cl->avpkt;
1442         opt.maxidle = cl->maxidle;
1443         opt.minidle = (u32)(-cl->minidle);
1444         opt.offtime = cl->offtime;
1445         opt.change = ~0;
1446         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1447         return skb->len;
1448
1449 nla_put_failure:
1450         nlmsg_trim(skb, b);
1451         return -1;
1452 }
1453
1454 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1455 {
1456         unsigned char *b = skb_tail_pointer(skb);
1457         struct tc_cbq_wrropt opt;
1458
1459         opt.flags = 0;
1460         opt.allot = cl->allot;
1461         opt.priority = cl->priority+1;
1462         opt.cpriority = cl->cpriority+1;
1463         opt.weight = cl->weight;
1464         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1465         return skb->len;
1466
1467 nla_put_failure:
1468         nlmsg_trim(skb, b);
1469         return -1;
1470 }
1471
1472 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1473 {
1474         unsigned char *b = skb_tail_pointer(skb);
1475         struct tc_cbq_ovl opt;
1476
1477         opt.strategy = cl->ovl_strategy;
1478         opt.priority2 = cl->priority2+1;
1479         opt.pad = 0;
1480         opt.penalty = cl->penalty;
1481         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1482         return skb->len;
1483
1484 nla_put_failure:
1485         nlmsg_trim(skb, b);
1486         return -1;
1487 }
1488
1489 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1490 {
1491         unsigned char *b = skb_tail_pointer(skb);
1492         struct tc_cbq_fopt opt;
1493
1494         if (cl->split || cl->defmap) {
1495                 opt.split = cl->split ? cl->split->common.classid : 0;
1496                 opt.defmap = cl->defmap;
1497                 opt.defchange = ~0;
1498                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1499         }
1500         return skb->len;
1501
1502 nla_put_failure:
1503         nlmsg_trim(skb, b);
1504         return -1;
1505 }
1506
1507 #ifdef CONFIG_NET_CLS_ACT
1508 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1509 {
1510         unsigned char *b = skb_tail_pointer(skb);
1511         struct tc_cbq_police opt;
1512
1513         if (cl->police) {
1514                 opt.police = cl->police;
1515                 opt.__res1 = 0;
1516                 opt.__res2 = 0;
1517                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1518         }
1519         return skb->len;
1520
1521 nla_put_failure:
1522         nlmsg_trim(skb, b);
1523         return -1;
1524 }
1525 #endif
1526
1527 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1528 {
1529         if (cbq_dump_lss(skb, cl) < 0 ||
1530             cbq_dump_rate(skb, cl) < 0 ||
1531             cbq_dump_wrr(skb, cl) < 0 ||
1532             cbq_dump_ovl(skb, cl) < 0 ||
1533 #ifdef CONFIG_NET_CLS_ACT
1534             cbq_dump_police(skb, cl) < 0 ||
1535 #endif
1536             cbq_dump_fopt(skb, cl) < 0)
1537                 return -1;
1538         return 0;
1539 }
1540
1541 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1542 {
1543         struct cbq_sched_data *q = qdisc_priv(sch);
1544         struct nlattr *nest;
1545
1546         nest = nla_nest_start(skb, TCA_OPTIONS);
1547         if (nest == NULL)
1548                 goto nla_put_failure;
1549         if (cbq_dump_attr(skb, &q->link) < 0)
1550                 goto nla_put_failure;
1551         nla_nest_end(skb, nest);
1552         return skb->len;
1553
1554 nla_put_failure:
1555         nla_nest_cancel(skb, nest);
1556         return -1;
1557 }
1558
1559 static int
1560 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1561 {
1562         struct cbq_sched_data *q = qdisc_priv(sch);
1563
1564         q->link.xstats.avgidle = q->link.avgidle;
1565         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1566 }
1567
1568 static int
1569 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1570                struct sk_buff *skb, struct tcmsg *tcm)
1571 {
1572         struct cbq_class *cl = (struct cbq_class*)arg;
1573         struct nlattr *nest;
1574
1575         if (cl->tparent)
1576                 tcm->tcm_parent = cl->tparent->common.classid;
1577         else
1578                 tcm->tcm_parent = TC_H_ROOT;
1579         tcm->tcm_handle = cl->common.classid;
1580         tcm->tcm_info = cl->q->handle;
1581
1582         nest = nla_nest_start(skb, TCA_OPTIONS);
1583         if (nest == NULL)
1584                 goto nla_put_failure;
1585         if (cbq_dump_attr(skb, cl) < 0)
1586                 goto nla_put_failure;
1587         nla_nest_end(skb, nest);
1588         return skb->len;
1589
1590 nla_put_failure:
1591         nla_nest_cancel(skb, nest);
1592         return -1;
1593 }
1594
1595 static int
1596 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1597         struct gnet_dump *d)
1598 {
1599         struct cbq_sched_data *q = qdisc_priv(sch);
1600         struct cbq_class *cl = (struct cbq_class*)arg;
1601
1602         cl->qstats.qlen = cl->q->q.qlen;
1603         cl->xstats.avgidle = cl->avgidle;
1604         cl->xstats.undertime = 0;
1605
1606         if (cl->undertime != PSCHED_PASTPERFECT)
1607                 cl->xstats.undertime = cl->undertime - q->now;
1608
1609         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1610             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1611             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1612                 return -1;
1613
1614         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1615 }
1616
1617 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1618                      struct Qdisc **old)
1619 {
1620         struct cbq_class *cl = (struct cbq_class*)arg;
1621
1622         if (new == NULL) {
1623                 new = qdisc_create_dflt(sch->dev_queue,
1624                                         &pfifo_qdisc_ops, cl->common.classid);
1625                 if (new == NULL)
1626                         return -ENOBUFS;
1627         } else {
1628 #ifdef CONFIG_NET_CLS_ACT
1629                 if (cl->police == TC_POLICE_RECLASSIFY)
1630                         new->reshape_fail = cbq_reshape_fail;
1631 #endif
1632         }
1633         sch_tree_lock(sch);
1634         *old = cl->q;
1635         cl->q = new;
1636         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1637         qdisc_reset(*old);
1638         sch_tree_unlock(sch);
1639
1640         return 0;
1641 }
1642
1643 static struct Qdisc *
1644 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1645 {
1646         struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648         return cl->q;
1649 }
1650
1651 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1652 {
1653         struct cbq_class *cl = (struct cbq_class *)arg;
1654
1655         if (cl->q->q.qlen == 0)
1656                 cbq_deactivate_class(cl);
1657 }
1658
1659 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1660 {
1661         struct cbq_sched_data *q = qdisc_priv(sch);
1662         struct cbq_class *cl = cbq_class_lookup(q, classid);
1663
1664         if (cl) {
1665                 cl->refcnt++;
1666                 return (unsigned long)cl;
1667         }
1668         return 0;
1669 }
1670
1671 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1672 {
1673         struct cbq_sched_data *q = qdisc_priv(sch);
1674
1675         WARN_ON(cl->filters);
1676
1677         tcf_destroy_chain(&cl->filter_list);
1678         qdisc_destroy(cl->q);
1679         qdisc_put_rtab(cl->R_tab);
1680         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1681         if (cl != &q->link)
1682                 kfree(cl);
1683 }
1684
1685 static void
1686 cbq_destroy(struct Qdisc* sch)
1687 {
1688         struct cbq_sched_data *q = qdisc_priv(sch);
1689         struct hlist_node *n, *next;
1690         struct cbq_class *cl;
1691         unsigned h;
1692
1693 #ifdef CONFIG_NET_CLS_ACT
1694         q->rx_class = NULL;
1695 #endif
1696         /*
1697          * Filters must be destroyed first because we don't destroy the
1698          * classes from root to leafs which means that filters can still
1699          * be bound to classes which have been destroyed already. --TGR '04
1700          */
1701         for (h = 0; h < q->clhash.hashsize; h++) {
1702                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1703                         tcf_destroy_chain(&cl->filter_list);
1704         }
1705         for (h = 0; h < q->clhash.hashsize; h++) {
1706                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1707                                           common.hnode)
1708                         cbq_destroy_class(sch, cl);
1709         }
1710         qdisc_class_hash_destroy(&q->clhash);
1711 }
1712
1713 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1714 {
1715         struct cbq_class *cl = (struct cbq_class*)arg;
1716
1717         if (--cl->refcnt == 0) {
1718 #ifdef CONFIG_NET_CLS_ACT
1719                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1720                 struct cbq_sched_data *q = qdisc_priv(sch);
1721
1722                 spin_lock_bh(root_lock);
1723                 if (q->rx_class == cl)
1724                         q->rx_class = NULL;
1725                 spin_unlock_bh(root_lock);
1726 #endif
1727
1728                 cbq_destroy_class(sch, cl);
1729         }
1730 }
1731
1732 static int
1733 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1734                  unsigned long *arg)
1735 {
1736         int err;
1737         struct cbq_sched_data *q = qdisc_priv(sch);
1738         struct cbq_class *cl = (struct cbq_class*)*arg;
1739         struct nlattr *opt = tca[TCA_OPTIONS];
1740         struct nlattr *tb[TCA_CBQ_MAX + 1];
1741         struct cbq_class *parent;
1742         struct qdisc_rate_table *rtab = NULL;
1743
1744         if (opt == NULL)
1745                 return -EINVAL;
1746
1747         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1748         if (err < 0)
1749                 return err;
1750
1751         if (cl) {
1752                 /* Check parent */
1753                 if (parentid) {
1754                         if (cl->tparent &&
1755                             cl->tparent->common.classid != parentid)
1756                                 return -EINVAL;
1757                         if (!cl->tparent && parentid != TC_H_ROOT)
1758                                 return -EINVAL;
1759                 }
1760
1761                 if (tb[TCA_CBQ_RATE]) {
1762                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1763                                               tb[TCA_CBQ_RTAB]);
1764                         if (rtab == NULL)
1765                                 return -EINVAL;
1766                 }
1767
1768                 if (tca[TCA_RATE]) {
1769                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1770                                                     qdisc_root_sleeping_lock(sch),
1771                                                     tca[TCA_RATE]);
1772                         if (err) {
1773                                 if (rtab)
1774                                         qdisc_put_rtab(rtab);
1775                                 return err;
1776                         }
1777                 }
1778
1779                 /* Change class parameters */
1780                 sch_tree_lock(sch);
1781
1782                 if (cl->next_alive != NULL)
1783                         cbq_deactivate_class(cl);
1784
1785                 if (rtab) {
1786                         qdisc_put_rtab(cl->R_tab);
1787                         cl->R_tab = rtab;
1788                 }
1789
1790                 if (tb[TCA_CBQ_LSSOPT])
1791                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1792
1793                 if (tb[TCA_CBQ_WRROPT]) {
1794                         cbq_rmprio(q, cl);
1795                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1796                 }
1797
1798                 if (tb[TCA_CBQ_OVL_STRATEGY])
1799                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1800
1801 #ifdef CONFIG_NET_CLS_ACT
1802                 if (tb[TCA_CBQ_POLICE])
1803                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1804 #endif
1805
1806                 if (tb[TCA_CBQ_FOPT])
1807                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1808
1809                 if (cl->q->q.qlen)
1810                         cbq_activate_class(cl);
1811
1812                 sch_tree_unlock(sch);
1813
1814                 return 0;
1815         }
1816
1817         if (parentid == TC_H_ROOT)
1818                 return -EINVAL;
1819
1820         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1821             tb[TCA_CBQ_LSSOPT] == NULL)
1822                 return -EINVAL;
1823
1824         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1825         if (rtab == NULL)
1826                 return -EINVAL;
1827
1828         if (classid) {
1829                 err = -EINVAL;
1830                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1831                         goto failure;
1832         } else {
1833                 int i;
1834                 classid = TC_H_MAKE(sch->handle,0x8000);
1835
1836                 for (i=0; i<0x8000; i++) {
1837                         if (++q->hgenerator >= 0x8000)
1838                                 q->hgenerator = 1;
1839                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1840                                 break;
1841                 }
1842                 err = -ENOSR;
1843                 if (i >= 0x8000)
1844                         goto failure;
1845                 classid = classid|q->hgenerator;
1846         }
1847
1848         parent = &q->link;
1849         if (parentid) {
1850                 parent = cbq_class_lookup(q, parentid);
1851                 err = -EINVAL;
1852                 if (parent == NULL)
1853                         goto failure;
1854         }
1855
1856         err = -ENOBUFS;
1857         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1858         if (cl == NULL)
1859                 goto failure;
1860
1861         if (tca[TCA_RATE]) {
1862                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1863                                         qdisc_root_sleeping_lock(sch),
1864                                         tca[TCA_RATE]);
1865                 if (err) {
1866                         kfree(cl);
1867                         goto failure;
1868                 }
1869         }
1870
1871         cl->R_tab = rtab;
1872         rtab = NULL;
1873         cl->refcnt = 1;
1874         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1875         if (!cl->q)
1876                 cl->q = &noop_qdisc;
1877         cl->common.classid = classid;
1878         cl->tparent = parent;
1879         cl->qdisc = sch;
1880         cl->allot = parent->allot;
1881         cl->quantum = cl->allot;
1882         cl->weight = cl->R_tab->rate.rate;
1883
1884         sch_tree_lock(sch);
1885         cbq_link_class(cl);
1886         cl->borrow = cl->tparent;
1887         if (cl->tparent != &q->link)
1888                 cl->share = cl->tparent;
1889         cbq_adjust_levels(parent);
1890         cl->minidle = -0x7FFFFFFF;
1891         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1892         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1893         if (cl->ewma_log==0)
1894                 cl->ewma_log = q->link.ewma_log;
1895         if (cl->maxidle==0)
1896                 cl->maxidle = q->link.maxidle;
1897         if (cl->avpkt==0)
1898                 cl->avpkt = q->link.avpkt;
1899         cl->overlimit = cbq_ovl_classic;
1900         if (tb[TCA_CBQ_OVL_STRATEGY])
1901                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1902 #ifdef CONFIG_NET_CLS_ACT
1903         if (tb[TCA_CBQ_POLICE])
1904                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1905 #endif
1906         if (tb[TCA_CBQ_FOPT])
1907                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1908         sch_tree_unlock(sch);
1909
1910         qdisc_class_hash_grow(sch, &q->clhash);
1911
1912         *arg = (unsigned long)cl;
1913         return 0;
1914
1915 failure:
1916         qdisc_put_rtab(rtab);
1917         return err;
1918 }
1919
1920 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1921 {
1922         struct cbq_sched_data *q = qdisc_priv(sch);
1923         struct cbq_class *cl = (struct cbq_class*)arg;
1924         unsigned int qlen;
1925
1926         if (cl->filters || cl->children || cl == &q->link)
1927                 return -EBUSY;
1928
1929         sch_tree_lock(sch);
1930
1931         qlen = cl->q->q.qlen;
1932         qdisc_reset(cl->q);
1933         qdisc_tree_decrease_qlen(cl->q, qlen);
1934
1935         if (cl->next_alive)
1936                 cbq_deactivate_class(cl);
1937
1938         if (q->tx_borrowed == cl)
1939                 q->tx_borrowed = q->tx_class;
1940         if (q->tx_class == cl) {
1941                 q->tx_class = NULL;
1942                 q->tx_borrowed = NULL;
1943         }
1944 #ifdef CONFIG_NET_CLS_ACT
1945         if (q->rx_class == cl)
1946                 q->rx_class = NULL;
1947 #endif
1948
1949         cbq_unlink_class(cl);
1950         cbq_adjust_levels(cl->tparent);
1951         cl->defmap = 0;
1952         cbq_sync_defmap(cl);
1953
1954         cbq_rmprio(q, cl);
1955         sch_tree_unlock(sch);
1956
1957         BUG_ON(--cl->refcnt == 0);
1958         /*
1959          * This shouldn't happen: we "hold" one cops->get() when called
1960          * from tc_ctl_tclass; the destroy method is done from cops->put().
1961          */
1962
1963         return 0;
1964 }
1965
1966 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1967 {
1968         struct cbq_sched_data *q = qdisc_priv(sch);
1969         struct cbq_class *cl = (struct cbq_class *)arg;
1970
1971         if (cl == NULL)
1972                 cl = &q->link;
1973
1974         return &cl->filter_list;
1975 }
1976
1977 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1978                                      u32 classid)
1979 {
1980         struct cbq_sched_data *q = qdisc_priv(sch);
1981         struct cbq_class *p = (struct cbq_class*)parent;
1982         struct cbq_class *cl = cbq_class_lookup(q, classid);
1983
1984         if (cl) {
1985                 if (p && p->level <= cl->level)
1986                         return 0;
1987                 cl->filters++;
1988                 return (unsigned long)cl;
1989         }
1990         return 0;
1991 }
1992
1993 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1994 {
1995         struct cbq_class *cl = (struct cbq_class*)arg;
1996
1997         cl->filters--;
1998 }
1999
2000 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2001 {
2002         struct cbq_sched_data *q = qdisc_priv(sch);
2003         struct cbq_class *cl;
2004         struct hlist_node *n;
2005         unsigned h;
2006
2007         if (arg->stop)
2008                 return;
2009
2010         for (h = 0; h < q->clhash.hashsize; h++) {
2011                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2012                         if (arg->count < arg->skip) {
2013                                 arg->count++;
2014                                 continue;
2015                         }
2016                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2017                                 arg->stop = 1;
2018                                 return;
2019                         }
2020                         arg->count++;
2021                 }
2022         }
2023 }
2024
2025 static const struct Qdisc_class_ops cbq_class_ops = {
2026         .graft          =       cbq_graft,
2027         .leaf           =       cbq_leaf,
2028         .qlen_notify    =       cbq_qlen_notify,
2029         .get            =       cbq_get,
2030         .put            =       cbq_put,
2031         .change         =       cbq_change_class,
2032         .delete         =       cbq_delete,
2033         .walk           =       cbq_walk,
2034         .tcf_chain      =       cbq_find_tcf,
2035         .bind_tcf       =       cbq_bind_filter,
2036         .unbind_tcf     =       cbq_unbind_filter,
2037         .dump           =       cbq_dump_class,
2038         .dump_stats     =       cbq_dump_class_stats,
2039 };
2040
2041 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2042         .next           =       NULL,
2043         .cl_ops         =       &cbq_class_ops,
2044         .id             =       "cbq",
2045         .priv_size      =       sizeof(struct cbq_sched_data),
2046         .enqueue        =       cbq_enqueue,
2047         .dequeue        =       cbq_dequeue,
2048         .peek           =       qdisc_peek_dequeued,
2049         .drop           =       cbq_drop,
2050         .init           =       cbq_init,
2051         .reset          =       cbq_reset,
2052         .destroy        =       cbq_destroy,
2053         .change         =       NULL,
2054         .dump           =       cbq_dump,
2055         .dump_stats     =       cbq_dump_stats,
2056         .owner          =       THIS_MODULE,
2057 };
2058
2059 static int __init cbq_module_init(void)
2060 {
2061         return register_qdisc(&cbq_qdisc_ops);
2062 }
2063 static void __exit cbq_module_exit(void)
2064 {
2065         unregister_qdisc(&cbq_qdisc_ops);
2066 }
2067 module_init(cbq_module_init)
2068 module_exit(cbq_module_exit)
2069 MODULE_LICENSE("GPL");