net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class
  76 {
  77         struct Qdisc_class_common common;
  78         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  79
  80 /* Parameters */
  81         unsigned char           priority;       /* class priority */
  82         unsigned char           priority2;      /* priority to be used after overlimit */
  83         unsigned char           ewma_log;       /* time constant for idle time calculation */
  84         unsigned char           ovl_strategy;
  85 #ifdef CONFIG_NET_CLS_ACT
  86         unsigned char           police;
  87 #endif
  88
  89         u32                     defmap;
  90
  91         /* Link-sharing scheduler parameters */
  92         long                    maxidle;        /* Class parameters: see below. */
  93         long                    offtime;
  94         long                    minidle;
  95         u32                     avpkt;
  96         struct qdisc_rate_table *R_tab;
  97
  98         /* Overlimit strategy parameters */
  99         void                    (*overlimit)(struct cbq_class *cl);
 100         psched_tdiff_t          penalty;
 101
 102         /* General scheduler (WRR) parameters */
 103         long                    allot;
 104         long                    quantum;        /* Allotment per WRR round */
 105         long                    weight;         /* Relative allotment: see below */
 106
 107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108         struct cbq_class        *split;         /* Ptr to split node */
 109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                    parent otherwise */
 113         struct cbq_class        *sibling;       /* Sibling chain */
 114         struct cbq_class        *children;      /* Pointer to children chain */
 115
 116         struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119 /* Variables */
 120         unsigned char           cpriority;      /* Effective priority */
 121         unsigned char           delayed;
 122         unsigned char           level;          /* level of the class in hierarchy:
 123                                                    0 for leaf classes, and maximal
 124                                                    level of children + 1 for nodes.
 125                                                  */
 126
 127         psched_time_t           last;           /* Last end of service */
 128         psched_time_t           undertime;
 129         long                    avgidle;
 130         long                    deficit;        /* Saved deficit for WRR */
 131         psched_time_t           penalized;
 132         struct gnet_stats_basic_packed bstats;
 133         struct gnet_stats_queue qstats;
 134         struct gnet_stats_rate_est rate_est;
 135         struct tc_cbq_xstats    xstats;
 136
 137         struct tcf_proto        *filter_list;
 138
 139         int                     refcnt;
 140         int                     filters;
 141
 142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143 };
 144
 145 struct cbq_sched_data
 146 {
 147         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 148         int                     nclasses[TC_CBQ_MAXPRIO+1];
 149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151         struct cbq_class        link;
 152
 153         unsigned                activemask;
 154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                    with backlog */
 156
 157 #ifdef CONFIG_NET_CLS_ACT
 158         struct cbq_class        *rx_class;
 159 #endif
 160         struct cbq_class        *tx_class;
 161         struct cbq_class        *tx_borrowed;
 162         int                     tx_len;
 163         psched_time_t           now;            /* Cached timestamp */
 164         psched_time_t           now_rt;         /* Cached real time */
 165         unsigned                pmask;
 166
 167         struct hrtimer          delay_timer;
 168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                    started when CBQ has
 170                                                    backlog, but cannot
 171                                                    transmit just now */
 172         psched_tdiff_t          wd_expires;
 173         int                     toplevel;
 174         u32                     hgenerator;
 175 };
 176
 177
 178 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180 static __inline__ struct cbq_class *
 181 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 182 {
 183         struct Qdisc_class_common *clc;
 184
 185         clc = qdisc_class_find(&q->clhash, classid);
 186         if (clc == NULL)
 187                 return NULL;
 188         return container_of(clc, struct cbq_class, common);
 189 }
 190
 191 #ifdef CONFIG_NET_CLS_ACT
 192
 193 static struct cbq_class *
 194 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 195 {
 196         struct cbq_class *cl, *new;
 197
 198         for (cl = this->tparent; cl; cl = cl->tparent)
 199                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 200                         return new;
 201
 202         return NULL;
 203 }
 204
 205 #endif
 206
 207 /* Classify packet. The procedure is pretty complicated, but
 208    it allows us to combine link sharing and priority scheduling
 209    transparently.
 210
 211    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212    so that it resolves to split nodes. Then packets are classified
 213    by logical priority, or a more specific classifier may be attached
 214    to the split node.
 215  */
 216
 217 static struct cbq_class *
 218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219 {
 220         struct cbq_sched_data *q = qdisc_priv(sch);
 221         struct cbq_class *head = &q->link;
 222         struct cbq_class **defmap;
 223         struct cbq_class *cl = NULL;
 224         u32 prio = skb->priority;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 /*
 240                  * Step 2+n. Apply classifier.
 241                  */
 242                 if (!head->filter_list ||
 243                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                         goto fallback;
 245
 246                 if ((cl = (void*)res.class) == NULL) {
 247                         if (TC_H_MAJ(res.classid))
 248                                 cl = cbq_class_lookup(q, res.classid);
 249                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 250                                 cl = defmap[TC_PRIO_BESTEFFORT];
 251
 252                         if (cl == NULL || cl->level >= head->level)
 253                                 goto fallback;
 254                 }
 255
 256 #ifdef CONFIG_NET_CLS_ACT
 257                 switch (result) {
 258                 case TC_ACT_QUEUED:
 259                 case TC_ACT_STOLEN:
 260                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 261                 case TC_ACT_SHOT:
 262                         return NULL;
 263                 case TC_ACT_RECLASSIFY:
 264                         return cbq_reclassify(skb, cl);
 265                 }
 266 #endif
 267                 if (cl->level == 0)
 268                         return cl;
 269
 270                 /*
 271                  * Step 3+n. If classifier selected a link sharing class,
 272                  *         apply agency specific classifier.
 273                  *         Repeat this procdure until we hit a leaf node.
 274                  */
 275                 head = cl;
 276         }
 277
 278 fallback:
 279         cl = head;
 280
 281         /*
 282          * Step 4. No success...
 283          */
 284         if (TC_H_MAJ(prio) == 0 &&
 285             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 286             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 287                 return head;
 288
 289         return cl;
 290 }
 291
 292 /*
 293    A packet has just been enqueued on the empty class.
 294    cbq_activate_class adds it to the tail of active class list
 295    of its priority band.
 296  */
 297
 298 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 299 {
 300         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 301         int prio = cl->cpriority;
 302         struct cbq_class *cl_tail;
 303
 304         cl_tail = q->active[prio];
 305         q->active[prio] = cl;
 306
 307         if (cl_tail != NULL) {
 308                 cl->next_alive = cl_tail->next_alive;
 309                 cl_tail->next_alive = cl;
 310         } else {
 311                 cl->next_alive = cl;
 312                 q->activemask |= (1<<prio);
 313         }
 314 }
 315
 316 /*
 317    Unlink class from active chain.
 318    Note that this same procedure is done directly in cbq_dequeue*
 319    during round-robin procedure.
 320  */
 321
 322 static void cbq_deactivate_class(struct cbq_class *this)
 323 {
 324         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 325         int prio = this->cpriority;
 326         struct cbq_class *cl;
 327         struct cbq_class *cl_prev = q->active[prio];
 328
 329         do {
 330                 cl = cl_prev->next_alive;
 331                 if (cl == this) {
 332                         cl_prev->next_alive = cl->next_alive;
 333                         cl->next_alive = NULL;
 334
 335                         if (cl == q->active[prio]) {
 336                                 q->active[prio] = cl_prev;
 337                                 if (cl == q->active[prio]) {
 338                                         q->active[prio] = NULL;
 339                                         q->activemask &= ~(1<<prio);
 340                                         return;
 341                                 }
 342                         }
 343                         return;
 344                 }
 345         } while ((cl_prev = cl) != q->active[prio]);
 346 }
 347
 348 static void
 349 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 350 {
 351         int toplevel = q->toplevel;
 352
 353         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 354                 psched_time_t now;
 355                 psched_tdiff_t incr;
 356
 357                 now = psched_get_time();
 358                 incr = now - q->now_rt;
 359                 now = q->now + incr;
 360
 361                 do {
 362                         if (cl->undertime < now) {
 363                                 q->toplevel = cl->level;
 364                                 return;
 365                         }
 366                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 367         }
 368 }
 369
 370 static int
 371 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 372 {
 373         struct cbq_sched_data *q = qdisc_priv(sch);
 374         int uninitialized_var(ret);
 375         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377 #ifdef CONFIG_NET_CLS_ACT
 378         q->rx_class = cl;
 379 #endif
 380         if (cl == NULL) {
 381                 if (ret & __NET_XMIT_BYPASS)
 382                         sch->qstats.drops++;
 383                 kfree_skb(skb);
 384                 return ret;
 385         }
 386
 387 #ifdef CONFIG_NET_CLS_ACT
 388         cl->q->__parent = sch;
 389 #endif
 390         ret = qdisc_enqueue(skb, cl->q);
 391         if (ret == NET_XMIT_SUCCESS) {
 392                 sch->q.qlen++;
 393                 qdisc_bstats_update(sch, skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 /* Overlimit actions */
 409
 410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 411
 412 static void cbq_ovl_classic(struct cbq_class *cl)
 413 {
 414         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 415         psched_tdiff_t delay = cl->undertime - q->now;
 416
 417         if (!cl->delayed) {
 418                 delay += cl->offtime;
 419
 420                 /*
 421                    Class goes to sleep, so that it will have no
 422                    chance to work avgidle. Let's forgive it 8)
 423
 424                    BTW cbq-2.0 has a crap in this
 425                    place, apparently they forgot to shift it by cl->ewma_log.
 426                  */
 427                 if (cl->avgidle < 0)
 428                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 429                 if (cl->avgidle < cl->minidle)
 430                         cl->avgidle = cl->minidle;
 431                 if (delay <= 0)
 432                         delay = 1;
 433                 cl->undertime = q->now + delay;
 434
 435                 cl->xstats.overactions++;
 436                 cl->delayed = 1;
 437         }
 438         if (q->wd_expires == 0 || q->wd_expires > delay)
 439                 q->wd_expires = delay;
 440
 441         /* Dirty work! We must schedule wakeups based on
 442            real available rate, rather than leaf rate,
 443            which may be tiny (even zero).
 444          */
 445         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 446                 struct cbq_class *b;
 447                 psched_tdiff_t base_delay = q->wd_expires;
 448
 449                 for (b = cl->borrow; b; b = b->borrow) {
 450                         delay = b->undertime - q->now;
 451                         if (delay < base_delay) {
 452                                 if (delay <= 0)
 453                                         delay = 1;
 454                                 base_delay = delay;
 455                         }
 456                 }
 457
 458                 q->wd_expires = base_delay;
 459         }
 460 }
 461
 462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 463    they go overlimit
 464  */
 465
 466 static void cbq_ovl_rclassic(struct cbq_class *cl)
 467 {
 468         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 469         struct cbq_class *this = cl;
 470
 471         do {
 472                 if (cl->level > q->toplevel) {
 473                         cl = NULL;
 474                         break;
 475                 }
 476         } while ((cl = cl->borrow) != NULL);
 477
 478         if (cl == NULL)
 479                 cl = this;
 480         cbq_ovl_classic(cl);
 481 }
 482
 483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 484
 485 static void cbq_ovl_delay(struct cbq_class *cl)
 486 {
 487         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 488         psched_tdiff_t delay = cl->undertime - q->now;
 489
 490         if (test_bit(__QDISC_STATE_DEACTIVATED,
 491                      &qdisc_root_sleeping(cl->qdisc)->state))
 492                 return;
 493
 494         if (!cl->delayed) {
 495                 psched_time_t sched = q->now;
 496                 ktime_t expires;
 497
 498                 delay += cl->offtime;
 499                 if (cl->avgidle < 0)
 500                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 501                 if (cl->avgidle < cl->minidle)
 502                         cl->avgidle = cl->minidle;
 503                 cl->undertime = q->now + delay;
 504
 505                 if (delay > 0) {
 506                         sched += delay + cl->penalty;
 507                         cl->penalized = sched;
 508                         cl->cpriority = TC_CBQ_MAXPRIO;
 509                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 510
 511                         expires = ktime_set(0, 0);
 512                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 513                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514                             ktime_to_ns(ktime_sub(
 515                                         hrtimer_get_expires(&q->delay_timer),
 516                                         expires)) > 0)
 517                                 hrtimer_set_expires(&q->delay_timer, expires);
 518                         hrtimer_restart(&q->delay_timer);
 519                         cl->delayed = 1;
 520                         cl->xstats.overactions++;
 521                         return;
 522                 }
 523                 delay = 1;
 524         }
 525         if (q->wd_expires == 0 || q->wd_expires > delay)
 526                 q->wd_expires = delay;
 527 }
 528
 529 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531 static void cbq_ovl_lowprio(struct cbq_class *cl)
 532 {
 533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535         cl->penalized = q->now + cl->penalty;
 536
 537         if (cl->cpriority != cl->priority2) {
 538                 cl->cpriority = cl->priority2;
 539                 q->pmask |= (1<<cl->cpriority);
 540                 cl->xstats.overactions++;
 541         }
 542         cbq_ovl_classic(cl);
 543 }
 544
 545 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547 static void cbq_ovl_drop(struct cbq_class *cl)
 548 {
 549         if (cl->q->ops->drop)
 550                 if (cl->q->ops->drop(cl->q))
 551                         cl->qdisc->q.qlen--;
 552         cl->xstats.overactions++;
 553         cbq_ovl_classic(cl);
 554 }
 555
 556 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557                                        psched_time_t now)
 558 {
 559         struct cbq_class *cl;
 560         struct cbq_class *cl_prev = q->active[prio];
 561         psched_time_t sched = now;
 562
 563         if (cl_prev == NULL)
 564                 return 0;
 565
 566         do {
 567                 cl = cl_prev->next_alive;
 568                 if (now - cl->penalized > 0) {
 569                         cl_prev->next_alive = cl->next_alive;
 570                         cl->next_alive = NULL;
 571                         cl->cpriority = cl->priority;
 572                         cl->delayed = 0;
 573                         cbq_activate_class(cl);
 574
 575                         if (cl == q->active[prio]) {
 576                                 q->active[prio] = cl_prev;
 577                                 if (cl == q->active[prio]) {
 578                                         q->active[prio] = NULL;
 579                                         return 0;
 580                                 }
 581                         }
 582
 583                         cl = cl_prev->next_alive;
 584                 } else if (sched - cl->penalized > 0)
 585                         sched = cl->penalized;
 586         } while ((cl_prev = cl) != q->active[prio]);
 587
 588         return sched - now;
 589 }
 590
 591 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592 {
 593         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594                                                 delay_timer);
 595         struct Qdisc *sch = q->watchdog.qdisc;
 596         psched_time_t now;
 597         psched_tdiff_t delay = 0;
 598         unsigned pmask;
 599
 600         now = psched_get_time();
 601
 602         pmask = q->pmask;
 603         q->pmask = 0;
 604
 605         while (pmask) {
 606                 int prio = ffz(~pmask);
 607                 psched_tdiff_t tmp;
 608
 609                 pmask &= ~(1<<prio);
 610
 611                 tmp = cbq_undelay_prio(q, prio, now);
 612                 if (tmp > 0) {
 613                         q->pmask |= 1<<prio;
 614                         if (tmp < delay || delay == 0)
 615                                 delay = tmp;
 616                 }
 617         }
 618
 619         if (delay) {
 620                 ktime_t time;
 621
 622                 time = ktime_set(0, 0);
 623                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625         }
 626
 627         sch->flags &= ~TCQ_F_THROTTLED;
 628         __netif_schedule(qdisc_root(sch));
 629         return HRTIMER_NORESTART;
 630 }
 631
 632 #ifdef CONFIG_NET_CLS_ACT
 633 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634 {
 635         struct Qdisc *sch = child->__parent;
 636         struct cbq_sched_data *q = qdisc_priv(sch);
 637         struct cbq_class *cl = q->rx_class;
 638
 639         q->rx_class = NULL;
 640
 641         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642                 int ret;
 643
 644                 cbq_mark_toplevel(q, cl);
 645
 646                 q->rx_class = cl;
 647                 cl->q->__parent = sch;
 648
 649                 ret = qdisc_enqueue(skb, cl->q);
 650                 if (ret == NET_XMIT_SUCCESS) {
 651                         sch->q.qlen++;
 652                         qdisc_bstats_update(sch, skb);
 653                         if (!cl->next_alive)
 654                                 cbq_activate_class(cl);
 655                         return 0;
 656                 }
 657                 if (net_xmit_drop_count(ret))
 658                         sch->qstats.drops++;
 659                 return 0;
 660         }
 661
 662         sch->qstats.drops++;
 663         return -1;
 664 }
 665 #endif
 666
 667 /*
 668    It is mission critical procedure.
 669
 670    We "regenerate" toplevel cutoff, if transmitting class
 671    has backlog and it is not regulated. It is not part of
 672    original CBQ description, but looks more reasonable.
 673    Probably, it is wrong. This question needs further investigation.
 674 */
 675
 676 static __inline__ void
 677 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 678                     struct cbq_class *borrowed)
 679 {
 680         if (cl && q->toplevel >= borrowed->level) {
 681                 if (cl->q->q.qlen > 1) {
 682                         do {
 683                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 684                                         q->toplevel = borrowed->level;
 685                                         return;
 686                                 }
 687                         } while ((borrowed=borrowed->borrow) != NULL);
 688                 }
 689 #if 0
 690         /* It is not necessary now. Uncommenting it
 691            will save CPU cycles, but decrease fairness.
 692          */
 693                 q->toplevel = TC_CBQ_MAXLEVEL;
 694 #endif
 695         }
 696 }
 697
 698 static void
 699 cbq_update(struct cbq_sched_data *q)
 700 {
 701         struct cbq_class *this = q->tx_class;
 702         struct cbq_class *cl = this;
 703         int len = q->tx_len;
 704
 705         q->tx_class = NULL;
 706
 707         for ( ; cl; cl = cl->share) {
 708                 long avgidle = cl->avgidle;
 709                 long idle;
 710
 711                 cl->bstats.packets++;
 712                 cl->bstats.bytes += len;
 713
 714                 /*
 715                    (now - last) is total time between packet right edges.
 716                    (last_pktlen/rate) is "virtual" busy time, so that
 717
 718                          idle = (now - last) - last_pktlen/rate
 719                  */
 720
 721                 idle = q->now - cl->last;
 722                 if ((unsigned long)idle > 128*1024*1024) {
 723                         avgidle = cl->maxidle;
 724                 } else {
 725                         idle -= L2T(cl, len);
 726
 727                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 728                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 729                    cl->avgidle == true_avgidle/W,
 730                    hence:
 731                  */
 732                         avgidle += idle - (avgidle>>cl->ewma_log);
 733                 }
 734
 735                 if (avgidle <= 0) {
 736                         /* Overlimit or at-limit */
 737
 738                         if (avgidle < cl->minidle)
 739                                 avgidle = cl->minidle;
 740
 741                         cl->avgidle = avgidle;
 742
 743                         /* Calculate expected time, when this class
 744                            will be allowed to send.
 745                            It will occur, when:
 746                            (1-W)*true_avgidle + W*delay = 0, i.e.
 747                            idle = (1/W - 1)*(-true_avgidle)
 748                            or
 749                            idle = (1 - W)*(-cl->avgidle);
 750                          */
 751                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 752
 753                         /*
 754                            That is not all.
 755                            To maintain the rate allocated to the class,
 756                            we add to undertime virtual clock,
 757                            necessary to complete transmitted packet.
 758                            (len/phys_bandwidth has been already passed
 759                            to the moment of cbq_update)
 760                          */
 761
 762                         idle -= L2T(&q->link, len);
 763                         idle += L2T(cl, len);
 764
 765                         cl->undertime = q->now + idle;
 766                 } else {
 767                         /* Underlimit */
 768
 769                         cl->undertime = PSCHED_PASTPERFECT;
 770                         if (avgidle > cl->maxidle)
 771                                 cl->avgidle = cl->maxidle;
 772                         else
 773                                 cl->avgidle = avgidle;
 774                 }
 775                 cl->last = q->now;
 776         }
 777
 778         cbq_update_toplevel(q, this, q->tx_borrowed);
 779 }
 780
 781 static __inline__ struct cbq_class *
 782 cbq_under_limit(struct cbq_class *cl)
 783 {
 784         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 785         struct cbq_class *this_cl = cl;
 786
 787         if (cl->tparent == NULL)
 788                 return cl;
 789
 790         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 791                 cl->delayed = 0;
 792                 return cl;
 793         }
 794
 795         do {
 796                 /* It is very suspicious place. Now overlimit
 797                    action is generated for not bounded classes
 798                    only if link is completely congested.
 799                    Though it is in agree with ancestor-only paradigm,
 800                    it looks very stupid. Particularly,
 801                    it means that this chunk of code will either
 802                    never be called or result in strong amplification
 803                    of burstiness. Dangerous, silly, and, however,
 804                    no another solution exists.
 805                  */
 806                 if ((cl = cl->borrow) == NULL) {
 807                         this_cl->qstats.overlimits++;
 808                         this_cl->overlimit(this_cl);
 809                         return NULL;
 810                 }
 811                 if (cl->level > q->toplevel)
 812                         return NULL;
 813         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815         cl->delayed = 0;
 816         return cl;
 817 }
 818
 819 static __inline__ struct sk_buff *
 820 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821 {
 822         struct cbq_sched_data *q = qdisc_priv(sch);
 823         struct cbq_class *cl_tail, *cl_prev, *cl;
 824         struct sk_buff *skb;
 825         int deficit;
 826
 827         cl_tail = cl_prev = q->active[prio];
 828         cl = cl_prev->next_alive;
 829
 830         do {
 831                 deficit = 0;
 832
 833                 /* Start round */
 834                 do {
 835                         struct cbq_class *borrow = cl;
 836
 837                         if (cl->q->q.qlen &&
 838                             (borrow = cbq_under_limit(cl)) == NULL)
 839                                 goto skip_class;
 840
 841                         if (cl->deficit <= 0) {
 842                                 /* Class exhausted its allotment per
 843                                    this round. Switch to the next one.
 844                                  */
 845                                 deficit = 1;
 846                                 cl->deficit += cl->quantum;
 847                                 goto next_class;
 848                         }
 849
 850                         skb = cl->q->dequeue(cl->q);
 851
 852                         /* Class did not give us any skb :-(
 853                            It could occur even if cl->q->q.qlen != 0
 854                            f.e. if cl->q == "tbf"
 855                          */
 856                         if (skb == NULL)
 857                                 goto skip_class;
 858
 859                         cl->deficit -= qdisc_pkt_len(skb);
 860                         q->tx_class = cl;
 861                         q->tx_borrowed = borrow;
 862                         if (borrow != cl) {
 863 #ifndef CBQ_XSTATS_BORROWS_BYTES
 864                                 borrow->xstats.borrows++;
 865                                 cl->xstats.borrows++;
 866 #else
 867                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 868                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 869 #endif
 870                         }
 871                         q->tx_len = qdisc_pkt_len(skb);
 872
 873                         if (cl->deficit <= 0) {
 874                                 q->active[prio] = cl;
 875                                 cl = cl->next_alive;
 876                                 cl->deficit += cl->quantum;
 877                         }
 878                         return skb;
 879
 880 skip_class:
 881                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882                                 /* Class is empty or penalized.
 883                                    Unlink it from active chain.
 884                                  */
 885                                 cl_prev->next_alive = cl->next_alive;
 886                                 cl->next_alive = NULL;
 887
 888                                 /* Did cl_tail point to it? */
 889                                 if (cl == cl_tail) {
 890                                         /* Repair it! */
 891                                         cl_tail = cl_prev;
 892
 893                                         /* Was it the last class in this band? */
 894                                         if (cl == cl_tail) {
 895                                                 /* Kill the band! */
 896                                                 q->active[prio] = NULL;
 897                                                 q->activemask &= ~(1<<prio);
 898                                                 if (cl->q->q.qlen)
 899                                                         cbq_activate_class(cl);
 900                                                 return NULL;
 901                                         }
 902
 903                                         q->active[prio] = cl_tail;
 904                                 }
 905                                 if (cl->q->q.qlen)
 906                                         cbq_activate_class(cl);
 907
 908                                 cl = cl_prev;
 909                         }
 910
 911 next_class:
 912                         cl_prev = cl;
 913                         cl = cl->next_alive;
 914                 } while (cl_prev != cl_tail);
 915         } while (deficit);
 916
 917         q->active[prio] = cl_prev;
 918
 919         return NULL;
 920 }
 921
 922 static __inline__ struct sk_buff *
 923 cbq_dequeue_1(struct Qdisc *sch)
 924 {
 925         struct cbq_sched_data *q = qdisc_priv(sch);
 926         struct sk_buff *skb;
 927         unsigned activemask;
 928
 929         activemask = q->activemask&0xFF;
 930         while (activemask) {
 931                 int prio = ffz(~activemask);
 932                 activemask &= ~(1<<prio);
 933                 skb = cbq_dequeue_prio(sch, prio);
 934                 if (skb)
 935                         return skb;
 936         }
 937         return NULL;
 938 }
 939
 940 static struct sk_buff *
 941 cbq_dequeue(struct Qdisc *sch)
 942 {
 943         struct sk_buff *skb;
 944         struct cbq_sched_data *q = qdisc_priv(sch);
 945         psched_time_t now;
 946         psched_tdiff_t incr;
 947
 948         now = psched_get_time();
 949         incr = now - q->now_rt;
 950
 951         if (q->tx_class) {
 952                 psched_tdiff_t incr2;
 953                 /* Time integrator. We calculate EOS time
 954                    by adding expected packet transmission time.
 955                    If real time is greater, we warp artificial clock,
 956                    so that:
 957
 958                    cbq_time = max(real_time, work);
 959                  */
 960                 incr2 = L2T(&q->link, q->tx_len);
 961                 q->now += incr2;
 962                 cbq_update(q);
 963                 if ((incr -= incr2) < 0)
 964                         incr = 0;
 965         }
 966         q->now += incr;
 967         q->now_rt = now;
 968
 969         for (;;) {
 970                 q->wd_expires = 0;
 971
 972                 skb = cbq_dequeue_1(sch);
 973                 if (skb) {
 974                         sch->q.qlen--;
 975                         sch->flags &= ~TCQ_F_THROTTLED;
 976                         return skb;
 977                 }
 978
 979                 /* All the classes are overlimit.
 980
 981                    It is possible, if:
 982
 983                    1. Scheduler is empty.
 984                    2. Toplevel cutoff inhibited borrowing.
 985                    3. Root class is overlimit.
 986
 987                    Reset 2d and 3d conditions and retry.
 988
 989                    Note, that NS and cbq-2.0 are buggy, peeking
 990                    an arbitrary class is appropriate for ancestor-only
 991                    sharing, but not for toplevel algorithm.
 992
 993                    Our version is better, but slower, because it requires
 994                    two passes, but it is unavoidable with top-level sharing.
 995                 */
 996
 997                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 998                     q->link.undertime == PSCHED_PASTPERFECT)
 999                         break;
1000
1001                 q->toplevel = TC_CBQ_MAXLEVEL;
1002                 q->link.undertime = PSCHED_PASTPERFECT;
1003         }
1004
1005         /* No packets in scheduler or nobody wants to give them to us :-(
1006            Sigh... start watchdog timer in the last case. */
1007
1008         if (sch->q.qlen) {
1009                 sch->qstats.overlimits++;
1010                 if (q->wd_expires)
1011                         qdisc_watchdog_schedule(&q->watchdog,
1012                                                 now + q->wd_expires);
1013         }
1014         return NULL;
1015 }
1016
1017 /* CBQ class maintanance routines */
1018
1019 static void cbq_adjust_levels(struct cbq_class *this)
1020 {
1021         if (this == NULL)
1022                 return;
1023
1024         do {
1025                 int level = 0;
1026                 struct cbq_class *cl;
1027
1028                 if ((cl = this->children) != NULL) {
1029                         do {
1030                                 if (cl->level > level)
1031                                         level = cl->level;
1032                         } while ((cl = cl->sibling) != this->children);
1033                 }
1034                 this->level = level+1;
1035         } while ((this = this->tparent) != NULL);
1036 }
1037
1038 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1039 {
1040         struct cbq_class *cl;
1041         struct hlist_node *n;
1042         unsigned int h;
1043
1044         if (q->quanta[prio] == 0)
1045                 return;
1046
1047         for (h = 0; h < q->clhash.hashsize; h++) {
1048                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1049                         /* BUGGGG... Beware! This expression suffer of
1050                            arithmetic overflows!
1051                          */
1052                         if (cl->priority == prio) {
1053                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1054                                         q->quanta[prio];
1055                         }
1056                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1057                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1058                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1059                         }
1060                 }
1061         }
1062 }
1063
1064 static void cbq_sync_defmap(struct cbq_class *cl)
1065 {
1066         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1067         struct cbq_class *split = cl->split;
1068         unsigned h;
1069         int i;
1070
1071         if (split == NULL)
1072                 return;
1073
1074         for (i=0; i<=TC_PRIO_MAX; i++) {
1075                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1076                         split->defaults[i] = NULL;
1077         }
1078
1079         for (i=0; i<=TC_PRIO_MAX; i++) {
1080                 int level = split->level;
1081
1082                 if (split->defaults[i])
1083                         continue;
1084
1085                 for (h = 0; h < q->clhash.hashsize; h++) {
1086                         struct hlist_node *n;
1087                         struct cbq_class *c;
1088
1089                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1090                                              common.hnode) {
1091                                 if (c->split == split && c->level < level &&
1092                                     c->defmap&(1<<i)) {
1093                                         split->defaults[i] = c;
1094                                         level = c->level;
1095                                 }
1096                         }
1097                 }
1098         }
1099 }
1100
1101 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1102 {
1103         struct cbq_class *split = NULL;
1104
1105         if (splitid == 0) {
1106                 if ((split = cl->split) == NULL)
1107                         return;
1108                 splitid = split->common.classid;
1109         }
1110
1111         if (split == NULL || split->common.classid != splitid) {
1112                 for (split = cl->tparent; split; split = split->tparent)
1113                         if (split->common.classid == splitid)
1114                                 break;
1115         }
1116
1117         if (split == NULL)
1118                 return;
1119
1120         if (cl->split != split) {
1121                 cl->defmap = 0;
1122                 cbq_sync_defmap(cl);
1123                 cl->split = split;
1124                 cl->defmap = def&mask;
1125         } else
1126                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1127
1128         cbq_sync_defmap(cl);
1129 }
1130
1131 static void cbq_unlink_class(struct cbq_class *this)
1132 {
1133         struct cbq_class *cl, **clp;
1134         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1135
1136         qdisc_class_hash_remove(&q->clhash, &this->common);
1137
1138         if (this->tparent) {
1139                 clp=&this->sibling;
1140                 cl = *clp;
1141                 do {
1142                         if (cl == this) {
1143                                 *clp = cl->sibling;
1144                                 break;
1145                         }
1146                         clp = &cl->sibling;
1147                 } while ((cl = *clp) != this->sibling);
1148
1149                 if (this->tparent->children == this) {
1150                         this->tparent->children = this->sibling;
1151                         if (this->sibling == this)
1152                                 this->tparent->children = NULL;
1153                 }
1154         } else {
1155                 WARN_ON(this->sibling != this);
1156         }
1157 }
1158
1159 static void cbq_link_class(struct cbq_class *this)
1160 {
1161         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1162         struct cbq_class *parent = this->tparent;
1163
1164         this->sibling = this;
1165         qdisc_class_hash_insert(&q->clhash, &this->common);
1166
1167         if (parent == NULL)
1168                 return;
1169
1170         if (parent->children == NULL) {
1171                 parent->children = this;
1172         } else {
1173                 this->sibling = parent->children->sibling;
1174                 parent->children->sibling = this;
1175         }
1176 }
1177
1178 static unsigned int cbq_drop(struct Qdisc* sch)
1179 {
1180         struct cbq_sched_data *q = qdisc_priv(sch);
1181         struct cbq_class *cl, *cl_head;
1182         int prio;
1183         unsigned int len;
1184
1185         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1186                 if ((cl_head = q->active[prio]) == NULL)
1187                         continue;
1188
1189                 cl = cl_head;
1190                 do {
1191                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1192                                 sch->q.qlen--;
1193                                 if (!cl->q->q.qlen)
1194                                         cbq_deactivate_class(cl);
1195                                 return len;
1196                         }
1197                 } while ((cl = cl->next_alive) != cl_head);
1198         }
1199         return 0;
1200 }
1201
1202 static void
1203 cbq_reset(struct Qdisc* sch)
1204 {
1205         struct cbq_sched_data *q = qdisc_priv(sch);
1206         struct cbq_class *cl;
1207         struct hlist_node *n;
1208         int prio;
1209         unsigned h;
1210
1211         q->activemask = 0;
1212         q->pmask = 0;
1213         q->tx_class = NULL;
1214         q->tx_borrowed = NULL;
1215         qdisc_watchdog_cancel(&q->watchdog);
1216         hrtimer_cancel(&q->delay_timer);
1217         q->toplevel = TC_CBQ_MAXLEVEL;
1218         q->now = psched_get_time();
1219         q->now_rt = q->now;
1220
1221         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1222                 q->active[prio] = NULL;
1223
1224         for (h = 0; h < q->clhash.hashsize; h++) {
1225                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1226                         qdisc_reset(cl->q);
1227
1228                         cl->next_alive = NULL;
1229                         cl->undertime = PSCHED_PASTPERFECT;
1230                         cl->avgidle = cl->maxidle;
1231                         cl->deficit = cl->quantum;
1232                         cl->cpriority = cl->priority;
1233                 }
1234         }
1235         sch->q.qlen = 0;
1236 }
1237
1238
1239 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1240 {
1241         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1242                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1243                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1244         }
1245         if (lss->change&TCF_CBQ_LSS_EWMA)
1246                 cl->ewma_log = lss->ewma_log;
1247         if (lss->change&TCF_CBQ_LSS_AVPKT)
1248                 cl->avpkt = lss->avpkt;
1249         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1250                 cl->minidle = -(long)lss->minidle;
1251         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1252                 cl->maxidle = lss->maxidle;
1253                 cl->avgidle = lss->maxidle;
1254         }
1255         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1256                 cl->offtime = lss->offtime;
1257         return 0;
1258 }
1259
1260 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1261 {
1262         q->nclasses[cl->priority]--;
1263         q->quanta[cl->priority] -= cl->weight;
1264         cbq_normalize_quanta(q, cl->priority);
1265 }
1266
1267 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1268 {
1269         q->nclasses[cl->priority]++;
1270         q->quanta[cl->priority] += cl->weight;
1271         cbq_normalize_quanta(q, cl->priority);
1272 }
1273
1274 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1275 {
1276         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1277
1278         if (wrr->allot)
1279                 cl->allot = wrr->allot;
1280         if (wrr->weight)
1281                 cl->weight = wrr->weight;
1282         if (wrr->priority) {
1283                 cl->priority = wrr->priority-1;
1284                 cl->cpriority = cl->priority;
1285                 if (cl->priority >= cl->priority2)
1286                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1287         }
1288
1289         cbq_addprio(q, cl);
1290         return 0;
1291 }
1292
1293 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1294 {
1295         switch (ovl->strategy) {
1296         case TC_CBQ_OVL_CLASSIC:
1297                 cl->overlimit = cbq_ovl_classic;
1298                 break;
1299         case TC_CBQ_OVL_DELAY:
1300                 cl->overlimit = cbq_ovl_delay;
1301                 break;
1302         case TC_CBQ_OVL_LOWPRIO:
1303                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1304                     ovl->priority2-1 <= cl->priority)
1305                         return -EINVAL;
1306                 cl->priority2 = ovl->priority2-1;
1307                 cl->overlimit = cbq_ovl_lowprio;
1308                 break;
1309         case TC_CBQ_OVL_DROP:
1310                 cl->overlimit = cbq_ovl_drop;
1311                 break;
1312         case TC_CBQ_OVL_RCLASSIC:
1313                 cl->overlimit = cbq_ovl_rclassic;
1314                 break;
1315         default:
1316                 return -EINVAL;
1317         }
1318         cl->penalty = ovl->penalty;
1319         return 0;
1320 }
1321
1322 #ifdef CONFIG_NET_CLS_ACT
1323 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1324 {
1325         cl->police = p->police;
1326
1327         if (cl->q->handle) {
1328                 if (p->police == TC_POLICE_RECLASSIFY)
1329                         cl->q->reshape_fail = cbq_reshape_fail;
1330                 else
1331                         cl->q->reshape_fail = NULL;
1332         }
1333         return 0;
1334 }
1335 #endif
1336
1337 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1338 {
1339         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1340         return 0;
1341 }
1342
1343 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1344         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1345         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1346         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1347         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1348         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1349         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1350         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1351 };
1352
1353 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1354 {
1355         struct cbq_sched_data *q = qdisc_priv(sch);
1356         struct nlattr *tb[TCA_CBQ_MAX + 1];
1357         struct tc_ratespec *r;
1358         int err;
1359
1360         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1361         if (err < 0)
1362                 return err;
1363
1364         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1365                 return -EINVAL;
1366
1367         r = nla_data(tb[TCA_CBQ_RATE]);
1368
1369         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1370                 return -EINVAL;
1371
1372         err = qdisc_class_hash_init(&q->clhash);
1373         if (err < 0)
1374                 goto put_rtab;
1375
1376         q->link.refcnt = 1;
1377         q->link.sibling = &q->link;
1378         q->link.common.classid = sch->handle;
1379         q->link.qdisc = sch;
1380         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1381                                       sch->handle);
1382         if (!q->link.q)
1383                 q->link.q = &noop_qdisc;
1384
1385         q->link.priority = TC_CBQ_MAXPRIO-1;
1386         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1387         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1388         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1389         q->link.overlimit = cbq_ovl_classic;
1390         q->link.allot = psched_mtu(qdisc_dev(sch));
1391         q->link.quantum = q->link.allot;
1392         q->link.weight = q->link.R_tab->rate.rate;
1393
1394         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1395         q->link.avpkt = q->link.allot/2;
1396         q->link.minidle = -0x7FFFFFFF;
1397
1398         qdisc_watchdog_init(&q->watchdog, sch);
1399         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1400         q->delay_timer.function = cbq_undelay;
1401         q->toplevel = TC_CBQ_MAXLEVEL;
1402         q->now = psched_get_time();
1403         q->now_rt = q->now;
1404
1405         cbq_link_class(&q->link);
1406
1407         if (tb[TCA_CBQ_LSSOPT])
1408                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1409
1410         cbq_addprio(q, &q->link);
1411         return 0;
1412
1413 put_rtab:
1414         qdisc_put_rtab(q->link.R_tab);
1415         return err;
1416 }
1417
1418 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1419 {
1420         unsigned char *b = skb_tail_pointer(skb);
1421
1422         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1423         return skb->len;
1424
1425 nla_put_failure:
1426         nlmsg_trim(skb, b);
1427         return -1;
1428 }
1429
1430 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1431 {
1432         unsigned char *b = skb_tail_pointer(skb);
1433         struct tc_cbq_lssopt opt;
1434
1435         opt.flags = 0;
1436         if (cl->borrow == NULL)
1437                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1438         if (cl->share == NULL)
1439                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1440         opt.ewma_log = cl->ewma_log;
1441         opt.level = cl->level;
1442         opt.avpkt = cl->avpkt;
1443         opt.maxidle = cl->maxidle;
1444         opt.minidle = (u32)(-cl->minidle);
1445         opt.offtime = cl->offtime;
1446         opt.change = ~0;
1447         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1448         return skb->len;
1449
1450 nla_put_failure:
1451         nlmsg_trim(skb, b);
1452         return -1;
1453 }
1454
1455 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1456 {
1457         unsigned char *b = skb_tail_pointer(skb);
1458         struct tc_cbq_wrropt opt;
1459
1460         opt.flags = 0;
1461         opt.allot = cl->allot;
1462         opt.priority = cl->priority+1;
1463         opt.cpriority = cl->cpriority+1;
1464         opt.weight = cl->weight;
1465         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1466         return skb->len;
1467
1468 nla_put_failure:
1469         nlmsg_trim(skb, b);
1470         return -1;
1471 }
1472
1473 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1474 {
1475         unsigned char *b = skb_tail_pointer(skb);
1476         struct tc_cbq_ovl opt;
1477
1478         opt.strategy = cl->ovl_strategy;
1479         opt.priority2 = cl->priority2+1;
1480         opt.pad = 0;
1481         opt.penalty = cl->penalty;
1482         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1483         return skb->len;
1484
1485 nla_put_failure:
1486         nlmsg_trim(skb, b);
1487         return -1;
1488 }
1489
1490 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1491 {
1492         unsigned char *b = skb_tail_pointer(skb);
1493         struct tc_cbq_fopt opt;
1494
1495         if (cl->split || cl->defmap) {
1496                 opt.split = cl->split ? cl->split->common.classid : 0;
1497                 opt.defmap = cl->defmap;
1498                 opt.defchange = ~0;
1499                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1500         }
1501         return skb->len;
1502
1503 nla_put_failure:
1504         nlmsg_trim(skb, b);
1505         return -1;
1506 }
1507
1508 #ifdef CONFIG_NET_CLS_ACT
1509 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1510 {
1511         unsigned char *b = skb_tail_pointer(skb);
1512         struct tc_cbq_police opt;
1513
1514         if (cl->police) {
1515                 opt.police = cl->police;
1516                 opt.__res1 = 0;
1517                 opt.__res2 = 0;
1518                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1519         }
1520         return skb->len;
1521
1522 nla_put_failure:
1523         nlmsg_trim(skb, b);
1524         return -1;
1525 }
1526 #endif
1527
1528 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1529 {
1530         if (cbq_dump_lss(skb, cl) < 0 ||
1531             cbq_dump_rate(skb, cl) < 0 ||
1532             cbq_dump_wrr(skb, cl) < 0 ||
1533             cbq_dump_ovl(skb, cl) < 0 ||
1534 #ifdef CONFIG_NET_CLS_ACT
1535             cbq_dump_police(skb, cl) < 0 ||
1536 #endif
1537             cbq_dump_fopt(skb, cl) < 0)
1538                 return -1;
1539         return 0;
1540 }
1541
1542 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1543 {
1544         struct cbq_sched_data *q = qdisc_priv(sch);
1545         struct nlattr *nest;
1546
1547         nest = nla_nest_start(skb, TCA_OPTIONS);
1548         if (nest == NULL)
1549                 goto nla_put_failure;
1550         if (cbq_dump_attr(skb, &q->link) < 0)
1551                 goto nla_put_failure;
1552         nla_nest_end(skb, nest);
1553         return skb->len;
1554
1555 nla_put_failure:
1556         nla_nest_cancel(skb, nest);
1557         return -1;
1558 }
1559
1560 static int
1561 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1562 {
1563         struct cbq_sched_data *q = qdisc_priv(sch);
1564
1565         q->link.xstats.avgidle = q->link.avgidle;
1566         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1567 }
1568
1569 static int
1570 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1571                struct sk_buff *skb, struct tcmsg *tcm)
1572 {
1573         struct cbq_class *cl = (struct cbq_class*)arg;
1574         struct nlattr *nest;
1575
1576         if (cl->tparent)
1577                 tcm->tcm_parent = cl->tparent->common.classid;
1578         else
1579                 tcm->tcm_parent = TC_H_ROOT;
1580         tcm->tcm_handle = cl->common.classid;
1581         tcm->tcm_info = cl->q->handle;
1582
1583         nest = nla_nest_start(skb, TCA_OPTIONS);
1584         if (nest == NULL)
1585                 goto nla_put_failure;
1586         if (cbq_dump_attr(skb, cl) < 0)
1587                 goto nla_put_failure;
1588         nla_nest_end(skb, nest);
1589         return skb->len;
1590
1591 nla_put_failure:
1592         nla_nest_cancel(skb, nest);
1593         return -1;
1594 }
1595
1596 static int
1597 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1598         struct gnet_dump *d)
1599 {
1600         struct cbq_sched_data *q = qdisc_priv(sch);
1601         struct cbq_class *cl = (struct cbq_class*)arg;
1602
1603         cl->qstats.qlen = cl->q->q.qlen;
1604         cl->xstats.avgidle = cl->avgidle;
1605         cl->xstats.undertime = 0;
1606
1607         if (cl->undertime != PSCHED_PASTPERFECT)
1608                 cl->xstats.undertime = cl->undertime - q->now;
1609
1610         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1611             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1612             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1613                 return -1;
1614
1615         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1616 }
1617
1618 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1619                      struct Qdisc **old)
1620 {
1621         struct cbq_class *cl = (struct cbq_class*)arg;
1622
1623         if (new == NULL) {
1624                 new = qdisc_create_dflt(sch->dev_queue,
1625                                         &pfifo_qdisc_ops, cl->common.classid);
1626                 if (new == NULL)
1627                         return -ENOBUFS;
1628         } else {
1629 #ifdef CONFIG_NET_CLS_ACT
1630                 if (cl->police == TC_POLICE_RECLASSIFY)
1631                         new->reshape_fail = cbq_reshape_fail;
1632 #endif
1633         }
1634         sch_tree_lock(sch);
1635         *old = cl->q;
1636         cl->q = new;
1637         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1638         qdisc_reset(*old);
1639         sch_tree_unlock(sch);
1640
1641         return 0;
1642 }
1643
1644 static struct Qdisc *
1645 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1646 {
1647         struct cbq_class *cl = (struct cbq_class*)arg;
1648
1649         return cl->q;
1650 }
1651
1652 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1653 {
1654         struct cbq_class *cl = (struct cbq_class *)arg;
1655
1656         if (cl->q->q.qlen == 0)
1657                 cbq_deactivate_class(cl);
1658 }
1659
1660 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1661 {
1662         struct cbq_sched_data *q = qdisc_priv(sch);
1663         struct cbq_class *cl = cbq_class_lookup(q, classid);
1664
1665         if (cl) {
1666                 cl->refcnt++;
1667                 return (unsigned long)cl;
1668         }
1669         return 0;
1670 }
1671
1672 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1673 {
1674         struct cbq_sched_data *q = qdisc_priv(sch);
1675
1676         WARN_ON(cl->filters);
1677
1678         tcf_destroy_chain(&cl->filter_list);
1679         qdisc_destroy(cl->q);
1680         qdisc_put_rtab(cl->R_tab);
1681         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1682         if (cl != &q->link)
1683                 kfree(cl);
1684 }
1685
1686 static void
1687 cbq_destroy(struct Qdisc* sch)
1688 {
1689         struct cbq_sched_data *q = qdisc_priv(sch);
1690         struct hlist_node *n, *next;
1691         struct cbq_class *cl;
1692         unsigned h;
1693
1694 #ifdef CONFIG_NET_CLS_ACT
1695         q->rx_class = NULL;
1696 #endif
1697         /*
1698          * Filters must be destroyed first because we don't destroy the
1699          * classes from root to leafs which means that filters can still
1700          * be bound to classes which have been destroyed already. --TGR '04
1701          */
1702         for (h = 0; h < q->clhash.hashsize; h++) {
1703                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1704                         tcf_destroy_chain(&cl->filter_list);
1705         }
1706         for (h = 0; h < q->clhash.hashsize; h++) {
1707                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1708                                           common.hnode)
1709                         cbq_destroy_class(sch, cl);
1710         }
1711         qdisc_class_hash_destroy(&q->clhash);
1712 }
1713
1714 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1715 {
1716         struct cbq_class *cl = (struct cbq_class*)arg;
1717
1718         if (--cl->refcnt == 0) {
1719 #ifdef CONFIG_NET_CLS_ACT
1720                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1721                 struct cbq_sched_data *q = qdisc_priv(sch);
1722
1723                 spin_lock_bh(root_lock);
1724                 if (q->rx_class == cl)
1725                         q->rx_class = NULL;
1726                 spin_unlock_bh(root_lock);
1727 #endif
1728
1729                 cbq_destroy_class(sch, cl);
1730         }
1731 }
1732
1733 static int
1734 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1735                  unsigned long *arg)
1736 {
1737         int err;
1738         struct cbq_sched_data *q = qdisc_priv(sch);
1739         struct cbq_class *cl = (struct cbq_class*)*arg;
1740         struct nlattr *opt = tca[TCA_OPTIONS];
1741         struct nlattr *tb[TCA_CBQ_MAX + 1];
1742         struct cbq_class *parent;
1743         struct qdisc_rate_table *rtab = NULL;
1744
1745         if (opt == NULL)
1746                 return -EINVAL;
1747
1748         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1749         if (err < 0)
1750                 return err;
1751
1752         if (cl) {
1753                 /* Check parent */
1754                 if (parentid) {
1755                         if (cl->tparent &&
1756                             cl->tparent->common.classid != parentid)
1757                                 return -EINVAL;
1758                         if (!cl->tparent && parentid != TC_H_ROOT)
1759                                 return -EINVAL;
1760                 }
1761
1762                 if (tb[TCA_CBQ_RATE]) {
1763                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1764                                               tb[TCA_CBQ_RTAB]);
1765                         if (rtab == NULL)
1766                                 return -EINVAL;
1767                 }
1768
1769                 if (tca[TCA_RATE]) {
1770                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1771                                                     qdisc_root_sleeping_lock(sch),
1772                                                     tca[TCA_RATE]);
1773                         if (err) {
1774                                 if (rtab)
1775                                         qdisc_put_rtab(rtab);
1776                                 return err;
1777                         }
1778                 }
1779
1780                 /* Change class parameters */
1781                 sch_tree_lock(sch);
1782
1783                 if (cl->next_alive != NULL)
1784                         cbq_deactivate_class(cl);
1785
1786                 if (rtab) {
1787                         qdisc_put_rtab(cl->R_tab);
1788                         cl->R_tab = rtab;
1789                 }
1790
1791                 if (tb[TCA_CBQ_LSSOPT])
1792                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1793
1794                 if (tb[TCA_CBQ_WRROPT]) {
1795                         cbq_rmprio(q, cl);
1796                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1797                 }
1798
1799                 if (tb[TCA_CBQ_OVL_STRATEGY])
1800                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1801
1802 #ifdef CONFIG_NET_CLS_ACT
1803                 if (tb[TCA_CBQ_POLICE])
1804                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1805 #endif
1806
1807                 if (tb[TCA_CBQ_FOPT])
1808                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1809
1810                 if (cl->q->q.qlen)
1811                         cbq_activate_class(cl);
1812
1813                 sch_tree_unlock(sch);
1814
1815                 return 0;
1816         }
1817
1818         if (parentid == TC_H_ROOT)
1819                 return -EINVAL;
1820
1821         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1822             tb[TCA_CBQ_LSSOPT] == NULL)
1823                 return -EINVAL;
1824
1825         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1826         if (rtab == NULL)
1827                 return -EINVAL;
1828
1829         if (classid) {
1830                 err = -EINVAL;
1831                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1832                         goto failure;
1833         } else {
1834                 int i;
1835                 classid = TC_H_MAKE(sch->handle,0x8000);
1836
1837                 for (i=0; i<0x8000; i++) {
1838                         if (++q->hgenerator >= 0x8000)
1839                                 q->hgenerator = 1;
1840                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1841                                 break;
1842                 }
1843                 err = -ENOSR;
1844                 if (i >= 0x8000)
1845                         goto failure;
1846                 classid = classid|q->hgenerator;
1847         }
1848
1849         parent = &q->link;
1850         if (parentid) {
1851                 parent = cbq_class_lookup(q, parentid);
1852                 err = -EINVAL;
1853                 if (parent == NULL)
1854                         goto failure;
1855         }
1856
1857         err = -ENOBUFS;
1858         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1859         if (cl == NULL)
1860                 goto failure;
1861
1862         if (tca[TCA_RATE]) {
1863                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1864                                         qdisc_root_sleeping_lock(sch),
1865                                         tca[TCA_RATE]);
1866                 if (err) {
1867                         kfree(cl);
1868                         goto failure;
1869                 }
1870         }
1871
1872         cl->R_tab = rtab;
1873         rtab = NULL;
1874         cl->refcnt = 1;
1875         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1876         if (!cl->q)
1877                 cl->q = &noop_qdisc;
1878         cl->common.classid = classid;
1879         cl->tparent = parent;
1880         cl->qdisc = sch;
1881         cl->allot = parent->allot;
1882         cl->quantum = cl->allot;
1883         cl->weight = cl->R_tab->rate.rate;
1884
1885         sch_tree_lock(sch);
1886         cbq_link_class(cl);
1887         cl->borrow = cl->tparent;
1888         if (cl->tparent != &q->link)
1889                 cl->share = cl->tparent;
1890         cbq_adjust_levels(parent);
1891         cl->minidle = -0x7FFFFFFF;
1892         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1893         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1894         if (cl->ewma_log==0)
1895                 cl->ewma_log = q->link.ewma_log;
1896         if (cl->maxidle==0)
1897                 cl->maxidle = q->link.maxidle;
1898         if (cl->avpkt==0)
1899                 cl->avpkt = q->link.avpkt;
1900         cl->overlimit = cbq_ovl_classic;
1901         if (tb[TCA_CBQ_OVL_STRATEGY])
1902                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1903 #ifdef CONFIG_NET_CLS_ACT
1904         if (tb[TCA_CBQ_POLICE])
1905                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1906 #endif
1907         if (tb[TCA_CBQ_FOPT])
1908                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1909         sch_tree_unlock(sch);
1910
1911         qdisc_class_hash_grow(sch, &q->clhash);
1912
1913         *arg = (unsigned long)cl;
1914         return 0;
1915
1916 failure:
1917         qdisc_put_rtab(rtab);
1918         return err;
1919 }
1920
1921 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1922 {
1923         struct cbq_sched_data *q = qdisc_priv(sch);
1924         struct cbq_class *cl = (struct cbq_class*)arg;
1925         unsigned int qlen;
1926
1927         if (cl->filters || cl->children || cl == &q->link)
1928                 return -EBUSY;
1929
1930         sch_tree_lock(sch);
1931
1932         qlen = cl->q->q.qlen;
1933         qdisc_reset(cl->q);
1934         qdisc_tree_decrease_qlen(cl->q, qlen);
1935
1936         if (cl->next_alive)
1937                 cbq_deactivate_class(cl);
1938
1939         if (q->tx_borrowed == cl)
1940                 q->tx_borrowed = q->tx_class;
1941         if (q->tx_class == cl) {
1942                 q->tx_class = NULL;
1943                 q->tx_borrowed = NULL;
1944         }
1945 #ifdef CONFIG_NET_CLS_ACT
1946         if (q->rx_class == cl)
1947                 q->rx_class = NULL;
1948 #endif
1949
1950         cbq_unlink_class(cl);
1951         cbq_adjust_levels(cl->tparent);
1952         cl->defmap = 0;
1953         cbq_sync_defmap(cl);
1954
1955         cbq_rmprio(q, cl);
1956         sch_tree_unlock(sch);
1957
1958         BUG_ON(--cl->refcnt == 0);
1959         /*
1960          * This shouldn't happen: we "hold" one cops->get() when called
1961          * from tc_ctl_tclass; the destroy method is done from cops->put().
1962          */
1963
1964         return 0;
1965 }
1966
1967 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1968 {
1969         struct cbq_sched_data *q = qdisc_priv(sch);
1970         struct cbq_class *cl = (struct cbq_class *)arg;
1971
1972         if (cl == NULL)
1973                 cl = &q->link;
1974
1975         return &cl->filter_list;
1976 }
1977
1978 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1979                                      u32 classid)
1980 {
1981         struct cbq_sched_data *q = qdisc_priv(sch);
1982         struct cbq_class *p = (struct cbq_class*)parent;
1983         struct cbq_class *cl = cbq_class_lookup(q, classid);
1984
1985         if (cl) {
1986                 if (p && p->level <= cl->level)
1987                         return 0;
1988                 cl->filters++;
1989                 return (unsigned long)cl;
1990         }
1991         return 0;
1992 }
1993
1994 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1995 {
1996         struct cbq_class *cl = (struct cbq_class*)arg;
1997
1998         cl->filters--;
1999 }
2000
2001 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2002 {
2003         struct cbq_sched_data *q = qdisc_priv(sch);
2004         struct cbq_class *cl;
2005         struct hlist_node *n;
2006         unsigned h;
2007
2008         if (arg->stop)
2009                 return;
2010
2011         for (h = 0; h < q->clhash.hashsize; h++) {
2012                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2013                         if (arg->count < arg->skip) {
2014                                 arg->count++;
2015                                 continue;
2016                         }
2017                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2018                                 arg->stop = 1;
2019                                 return;
2020                         }
2021                         arg->count++;
2022                 }
2023         }
2024 }
2025
2026 static const struct Qdisc_class_ops cbq_class_ops = {
2027         .graft          =       cbq_graft,
2028         .leaf           =       cbq_leaf,
2029         .qlen_notify    =       cbq_qlen_notify,
2030         .get            =       cbq_get,
2031         .put            =       cbq_put,
2032         .change         =       cbq_change_class,
2033         .delete         =       cbq_delete,
2034         .walk           =       cbq_walk,
2035         .tcf_chain      =       cbq_find_tcf,
2036         .bind_tcf       =       cbq_bind_filter,
2037         .unbind_tcf     =       cbq_unbind_filter,
2038         .dump           =       cbq_dump_class,
2039         .dump_stats     =       cbq_dump_class_stats,
2040 };
2041
2042 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2043         .next           =       NULL,
2044         .cl_ops         =       &cbq_class_ops,
2045         .id             =       "cbq",
2046         .priv_size      =       sizeof(struct cbq_sched_data),
2047         .enqueue        =       cbq_enqueue,
2048         .dequeue        =       cbq_dequeue,
2049         .peek           =       qdisc_peek_dequeued,
2050         .drop           =       cbq_drop,
2051         .init           =       cbq_init,
2052         .reset          =       cbq_reset,
2053         .destroy        =       cbq_destroy,
2054         .change         =       NULL,
2055         .dump           =       cbq_dump,
2056         .dump_stats     =       cbq_dump_stats,
2057         .owner          =       THIS_MODULE,
2058 };
2059
2060 static int __init cbq_module_init(void)
2061 {
2062         return register_qdisc(&cbq_qdisc_ops);
2063 }
2064 static void __exit cbq_module_exit(void)
2065 {
2066         unregister_qdisc(&cbq_qdisc_ops);
2067 }
2068 module_init(cbq_module_init)
2069 module_exit(cbq_module_exit)
2070 MODULE_LICENSE("GPL");