net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int uninitialized_var(ret);
 374         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 375
 376 #ifdef CONFIG_NET_CLS_ACT
 377         q->rx_class = cl;
 378 #endif
 379         if (cl == NULL) {
 380                 if (ret & __NET_XMIT_BYPASS)
 381                         sch->qstats.drops++;
 382                 kfree_skb(skb);
 383                 return ret;
 384         }
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         cl->q->__parent = sch;
 388 #endif
 389         ret = qdisc_enqueue(skb, cl->q);
 390         if (ret == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes += qdisc_pkt_len(skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 static int
 409 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 410 {
 411         struct cbq_sched_data *q = qdisc_priv(sch);
 412         struct cbq_class *cl;
 413         int ret;
 414
 415         if ((cl = q->tx_class) == NULL) {
 416                 kfree_skb(skb);
 417                 sch->qstats.drops++;
 418                 return NET_XMIT_CN;
 419         }
 420         q->tx_class = NULL;
 421
 422         cbq_mark_toplevel(q, cl);
 423
 424 #ifdef CONFIG_NET_CLS_ACT
 425         q->rx_class = cl;
 426         cl->q->__parent = sch;
 427 #endif
 428         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 429                 sch->q.qlen++;
 430                 sch->qstats.requeues++;
 431                 if (!cl->next_alive)
 432                         cbq_activate_class(cl);
 433                 return 0;
 434         }
 435         if (net_xmit_drop_count(ret)) {
 436                 sch->qstats.drops++;
 437                 cl->qstats.drops++;
 438         }
 439         return ret;
 440 }
 441
 442 /* Overlimit actions */
 443
 444 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 445
 446 static void cbq_ovl_classic(struct cbq_class *cl)
 447 {
 448         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 449         psched_tdiff_t delay = cl->undertime - q->now;
 450
 451         if (!cl->delayed) {
 452                 delay += cl->offtime;
 453
 454                 /*
 455                    Class goes to sleep, so that it will have no
 456                    chance to work avgidle. Let's forgive it 8)
 457
 458                    BTW cbq-2.0 has a crap in this
 459                    place, apparently they forgot to shift it by cl->ewma_log.
 460                  */
 461                 if (cl->avgidle < 0)
 462                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 463                 if (cl->avgidle < cl->minidle)
 464                         cl->avgidle = cl->minidle;
 465                 if (delay <= 0)
 466                         delay = 1;
 467                 cl->undertime = q->now + delay;
 468
 469                 cl->xstats.overactions++;
 470                 cl->delayed = 1;
 471         }
 472         if (q->wd_expires == 0 || q->wd_expires > delay)
 473                 q->wd_expires = delay;
 474
 475         /* Dirty work! We must schedule wakeups based on
 476            real available rate, rather than leaf rate,
 477            which may be tiny (even zero).
 478          */
 479         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 480                 struct cbq_class *b;
 481                 psched_tdiff_t base_delay = q->wd_expires;
 482
 483                 for (b = cl->borrow; b; b = b->borrow) {
 484                         delay = b->undertime - q->now;
 485                         if (delay < base_delay) {
 486                                 if (delay <= 0)
 487                                         delay = 1;
 488                                 base_delay = delay;
 489                         }
 490                 }
 491
 492                 q->wd_expires = base_delay;
 493         }
 494 }
 495
 496 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 497    they go overlimit
 498  */
 499
 500 static void cbq_ovl_rclassic(struct cbq_class *cl)
 501 {
 502         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 503         struct cbq_class *this = cl;
 504
 505         do {
 506                 if (cl->level > q->toplevel) {
 507                         cl = NULL;
 508                         break;
 509                 }
 510         } while ((cl = cl->borrow) != NULL);
 511
 512         if (cl == NULL)
 513                 cl = this;
 514         cbq_ovl_classic(cl);
 515 }
 516
 517 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 518
 519 static void cbq_ovl_delay(struct cbq_class *cl)
 520 {
 521         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 522         psched_tdiff_t delay = cl->undertime - q->now;
 523
 524         if (test_bit(__QDISC_STATE_DEACTIVATED,
 525                      &qdisc_root_sleeping(cl->qdisc)->state))
 526                 return;
 527
 528         if (!cl->delayed) {
 529                 psched_time_t sched = q->now;
 530                 ktime_t expires;
 531
 532                 delay += cl->offtime;
 533                 if (cl->avgidle < 0)
 534                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 535                 if (cl->avgidle < cl->minidle)
 536                         cl->avgidle = cl->minidle;
 537                 cl->undertime = q->now + delay;
 538
 539                 if (delay > 0) {
 540                         sched += delay + cl->penalty;
 541                         cl->penalized = sched;
 542                         cl->cpriority = TC_CBQ_MAXPRIO;
 543                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 544
 545                         expires = ktime_set(0, 0);
 546                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 547                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 548                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 549                                                   expires)) > 0)
 550                                 q->delay_timer.expires = expires;
 551                         hrtimer_restart(&q->delay_timer);
 552                         cl->delayed = 1;
 553                         cl->xstats.overactions++;
 554                         return;
 555                 }
 556                 delay = 1;
 557         }
 558         if (q->wd_expires == 0 || q->wd_expires > delay)
 559                 q->wd_expires = delay;
 560 }
 561
 562 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 563
 564 static void cbq_ovl_lowprio(struct cbq_class *cl)
 565 {
 566         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 567
 568         cl->penalized = q->now + cl->penalty;
 569
 570         if (cl->cpriority != cl->priority2) {
 571                 cl->cpriority = cl->priority2;
 572                 q->pmask |= (1<<cl->cpriority);
 573                 cl->xstats.overactions++;
 574         }
 575         cbq_ovl_classic(cl);
 576 }
 577
 578 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 579
 580 static void cbq_ovl_drop(struct cbq_class *cl)
 581 {
 582         if (cl->q->ops->drop)
 583                 if (cl->q->ops->drop(cl->q))
 584                         cl->qdisc->q.qlen--;
 585         cl->xstats.overactions++;
 586         cbq_ovl_classic(cl);
 587 }
 588
 589 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 590                                        psched_time_t now)
 591 {
 592         struct cbq_class *cl;
 593         struct cbq_class *cl_prev = q->active[prio];
 594         psched_time_t sched = now;
 595
 596         if (cl_prev == NULL)
 597                 return 0;
 598
 599         do {
 600                 cl = cl_prev->next_alive;
 601                 if (now - cl->penalized > 0) {
 602                         cl_prev->next_alive = cl->next_alive;
 603                         cl->next_alive = NULL;
 604                         cl->cpriority = cl->priority;
 605                         cl->delayed = 0;
 606                         cbq_activate_class(cl);
 607
 608                         if (cl == q->active[prio]) {
 609                                 q->active[prio] = cl_prev;
 610                                 if (cl == q->active[prio]) {
 611                                         q->active[prio] = NULL;
 612                                         return 0;
 613                                 }
 614                         }
 615
 616                         cl = cl_prev->next_alive;
 617                 } else if (sched - cl->penalized > 0)
 618                         sched = cl->penalized;
 619         } while ((cl_prev = cl) != q->active[prio]);
 620
 621         return sched - now;
 622 }
 623
 624 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 625 {
 626         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 627                                                 delay_timer);
 628         struct Qdisc *sch = q->watchdog.qdisc;
 629         psched_time_t now;
 630         psched_tdiff_t delay = 0;
 631         unsigned pmask;
 632
 633         now = psched_get_time();
 634
 635         pmask = q->pmask;
 636         q->pmask = 0;
 637
 638         while (pmask) {
 639                 int prio = ffz(~pmask);
 640                 psched_tdiff_t tmp;
 641
 642                 pmask &= ~(1<<prio);
 643
 644                 tmp = cbq_undelay_prio(q, prio, now);
 645                 if (tmp > 0) {
 646                         q->pmask |= 1<<prio;
 647                         if (tmp < delay || delay == 0)
 648                                 delay = tmp;
 649                 }
 650         }
 651
 652         if (delay) {
 653                 ktime_t time;
 654
 655                 time = ktime_set(0, 0);
 656                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 657                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 658         }
 659
 660         sch->flags &= ~TCQ_F_THROTTLED;
 661         __netif_schedule(qdisc_root(sch));
 662         return HRTIMER_NORESTART;
 663 }
 664
 665 #ifdef CONFIG_NET_CLS_ACT
 666 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 667 {
 668         struct Qdisc *sch = child->__parent;
 669         struct cbq_sched_data *q = qdisc_priv(sch);
 670         struct cbq_class *cl = q->rx_class;
 671
 672         q->rx_class = NULL;
 673
 674         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 675                 int ret;
 676
 677                 cbq_mark_toplevel(q, cl);
 678
 679                 q->rx_class = cl;
 680                 cl->q->__parent = sch;
 681
 682                 ret = qdisc_enqueue(skb, cl->q);
 683                 if (ret == NET_XMIT_SUCCESS) {
 684                         sch->q.qlen++;
 685                         sch->bstats.packets++;
 686                         sch->bstats.bytes += qdisc_pkt_len(skb);
 687                         if (!cl->next_alive)
 688                                 cbq_activate_class(cl);
 689                         return 0;
 690                 }
 691                 if (net_xmit_drop_count(ret))
 692                         sch->qstats.drops++;
 693                 return 0;
 694         }
 695
 696         sch->qstats.drops++;
 697         return -1;
 698 }
 699 #endif
 700
 701 /*
 702    It is mission critical procedure.
 703
 704    We "regenerate" toplevel cutoff, if transmitting class
 705    has backlog and it is not regulated. It is not part of
 706    original CBQ description, but looks more reasonable.
 707    Probably, it is wrong. This question needs further investigation.
 708 */
 709
 710 static __inline__ void
 711 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 712                     struct cbq_class *borrowed)
 713 {
 714         if (cl && q->toplevel >= borrowed->level) {
 715                 if (cl->q->q.qlen > 1) {
 716                         do {
 717                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 718                                         q->toplevel = borrowed->level;
 719                                         return;
 720                                 }
 721                         } while ((borrowed=borrowed->borrow) != NULL);
 722                 }
 723 #if 0
 724         /* It is not necessary now. Uncommenting it
 725            will save CPU cycles, but decrease fairness.
 726          */
 727                 q->toplevel = TC_CBQ_MAXLEVEL;
 728 #endif
 729         }
 730 }
 731
 732 static void
 733 cbq_update(struct cbq_sched_data *q)
 734 {
 735         struct cbq_class *this = q->tx_class;
 736         struct cbq_class *cl = this;
 737         int len = q->tx_len;
 738
 739         q->tx_class = NULL;
 740
 741         for ( ; cl; cl = cl->share) {
 742                 long avgidle = cl->avgidle;
 743                 long idle;
 744
 745                 cl->bstats.packets++;
 746                 cl->bstats.bytes += len;
 747
 748                 /*
 749                    (now - last) is total time between packet right edges.
 750                    (last_pktlen/rate) is "virtual" busy time, so that
 751
 752                          idle = (now - last) - last_pktlen/rate
 753                  */
 754
 755                 idle = q->now - cl->last;
 756                 if ((unsigned long)idle > 128*1024*1024) {
 757                         avgidle = cl->maxidle;
 758                 } else {
 759                         idle -= L2T(cl, len);
 760
 761                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 762                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 763                    cl->avgidle == true_avgidle/W,
 764                    hence:
 765                  */
 766                         avgidle += idle - (avgidle>>cl->ewma_log);
 767                 }
 768
 769                 if (avgidle <= 0) {
 770                         /* Overlimit or at-limit */
 771
 772                         if (avgidle < cl->minidle)
 773                                 avgidle = cl->minidle;
 774
 775                         cl->avgidle = avgidle;
 776
 777                         /* Calculate expected time, when this class
 778                            will be allowed to send.
 779                            It will occur, when:
 780                            (1-W)*true_avgidle + W*delay = 0, i.e.
 781                            idle = (1/W - 1)*(-true_avgidle)
 782                            or
 783                            idle = (1 - W)*(-cl->avgidle);
 784                          */
 785                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 786
 787                         /*
 788                            That is not all.
 789                            To maintain the rate allocated to the class,
 790                            we add to undertime virtual clock,
 791                            necessary to complete transmitted packet.
 792                            (len/phys_bandwidth has been already passed
 793                            to the moment of cbq_update)
 794                          */
 795
 796                         idle -= L2T(&q->link, len);
 797                         idle += L2T(cl, len);
 798
 799                         cl->undertime = q->now + idle;
 800                 } else {
 801                         /* Underlimit */
 802
 803                         cl->undertime = PSCHED_PASTPERFECT;
 804                         if (avgidle > cl->maxidle)
 805                                 cl->avgidle = cl->maxidle;
 806                         else
 807                                 cl->avgidle = avgidle;
 808                 }
 809                 cl->last = q->now;
 810         }
 811
 812         cbq_update_toplevel(q, this, q->tx_borrowed);
 813 }
 814
 815 static __inline__ struct cbq_class *
 816 cbq_under_limit(struct cbq_class *cl)
 817 {
 818         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 819         struct cbq_class *this_cl = cl;
 820
 821         if (cl->tparent == NULL)
 822                 return cl;
 823
 824         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 825                 cl->delayed = 0;
 826                 return cl;
 827         }
 828
 829         do {
 830                 /* It is very suspicious place. Now overlimit
 831                    action is generated for not bounded classes
 832                    only if link is completely congested.
 833                    Though it is in agree with ancestor-only paradigm,
 834                    it looks very stupid. Particularly,
 835                    it means that this chunk of code will either
 836                    never be called or result in strong amplification
 837                    of burstiness. Dangerous, silly, and, however,
 838                    no another solution exists.
 839                  */
 840                 if ((cl = cl->borrow) == NULL) {
 841                         this_cl->qstats.overlimits++;
 842                         this_cl->overlimit(this_cl);
 843                         return NULL;
 844                 }
 845                 if (cl->level > q->toplevel)
 846                         return NULL;
 847         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 848
 849         cl->delayed = 0;
 850         return cl;
 851 }
 852
 853 static __inline__ struct sk_buff *
 854 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 855 {
 856         struct cbq_sched_data *q = qdisc_priv(sch);
 857         struct cbq_class *cl_tail, *cl_prev, *cl;
 858         struct sk_buff *skb;
 859         int deficit;
 860
 861         cl_tail = cl_prev = q->active[prio];
 862         cl = cl_prev->next_alive;
 863
 864         do {
 865                 deficit = 0;
 866
 867                 /* Start round */
 868                 do {
 869                         struct cbq_class *borrow = cl;
 870
 871                         if (cl->q->q.qlen &&
 872                             (borrow = cbq_under_limit(cl)) == NULL)
 873                                 goto skip_class;
 874
 875                         if (cl->deficit <= 0) {
 876                                 /* Class exhausted its allotment per
 877                                    this round. Switch to the next one.
 878                                  */
 879                                 deficit = 1;
 880                                 cl->deficit += cl->quantum;
 881                                 goto next_class;
 882                         }
 883
 884                         skb = cl->q->dequeue(cl->q);
 885
 886                         /* Class did not give us any skb :-(
 887                            It could occur even if cl->q->q.qlen != 0
 888                            f.e. if cl->q == "tbf"
 889                          */
 890                         if (skb == NULL)
 891                                 goto skip_class;
 892
 893                         cl->deficit -= qdisc_pkt_len(skb);
 894                         q->tx_class = cl;
 895                         q->tx_borrowed = borrow;
 896                         if (borrow != cl) {
 897 #ifndef CBQ_XSTATS_BORROWS_BYTES
 898                                 borrow->xstats.borrows++;
 899                                 cl->xstats.borrows++;
 900 #else
 901                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 902                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 903 #endif
 904                         }
 905                         q->tx_len = qdisc_pkt_len(skb);
 906
 907                         if (cl->deficit <= 0) {
 908                                 q->active[prio] = cl;
 909                                 cl = cl->next_alive;
 910                                 cl->deficit += cl->quantum;
 911                         }
 912                         return skb;
 913
 914 skip_class:
 915                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 916                                 /* Class is empty or penalized.
 917                                    Unlink it from active chain.
 918                                  */
 919                                 cl_prev->next_alive = cl->next_alive;
 920                                 cl->next_alive = NULL;
 921
 922                                 /* Did cl_tail point to it? */
 923                                 if (cl == cl_tail) {
 924                                         /* Repair it! */
 925                                         cl_tail = cl_prev;
 926
 927                                         /* Was it the last class in this band? */
 928                                         if (cl == cl_tail) {
 929                                                 /* Kill the band! */
 930                                                 q->active[prio] = NULL;
 931                                                 q->activemask &= ~(1<<prio);
 932                                                 if (cl->q->q.qlen)
 933                                                         cbq_activate_class(cl);
 934                                                 return NULL;
 935                                         }
 936
 937                                         q->active[prio] = cl_tail;
 938                                 }
 939                                 if (cl->q->q.qlen)
 940                                         cbq_activate_class(cl);
 941
 942                                 cl = cl_prev;
 943                         }
 944
 945 next_class:
 946                         cl_prev = cl;
 947                         cl = cl->next_alive;
 948                 } while (cl_prev != cl_tail);
 949         } while (deficit);
 950
 951         q->active[prio] = cl_prev;
 952
 953         return NULL;
 954 }
 955
 956 static __inline__ struct sk_buff *
 957 cbq_dequeue_1(struct Qdisc *sch)
 958 {
 959         struct cbq_sched_data *q = qdisc_priv(sch);
 960         struct sk_buff *skb;
 961         unsigned activemask;
 962
 963         activemask = q->activemask&0xFF;
 964         while (activemask) {
 965                 int prio = ffz(~activemask);
 966                 activemask &= ~(1<<prio);
 967                 skb = cbq_dequeue_prio(sch, prio);
 968                 if (skb)
 969                         return skb;
 970         }
 971         return NULL;
 972 }
 973
 974 static struct sk_buff *
 975 cbq_dequeue(struct Qdisc *sch)
 976 {
 977         struct sk_buff *skb;
 978         struct cbq_sched_data *q = qdisc_priv(sch);
 979         psched_time_t now;
 980         psched_tdiff_t incr;
 981
 982         now = psched_get_time();
 983         incr = now - q->now_rt;
 984
 985         if (q->tx_class) {
 986                 psched_tdiff_t incr2;
 987                 /* Time integrator. We calculate EOS time
 988                    by adding expected packet transmission time.
 989                    If real time is greater, we warp artificial clock,
 990                    so that:
 991
 992                    cbq_time = max(real_time, work);
 993                  */
 994                 incr2 = L2T(&q->link, q->tx_len);
 995                 q->now += incr2;
 996                 cbq_update(q);
 997                 if ((incr -= incr2) < 0)
 998                         incr = 0;
 999         }
1000         q->now += incr;
1001         q->now_rt = now;
1002
1003         for (;;) {
1004                 q->wd_expires = 0;
1005
1006                 skb = cbq_dequeue_1(sch);
1007                 if (skb) {
1008                         sch->q.qlen--;
1009                         sch->flags &= ~TCQ_F_THROTTLED;
1010                         return skb;
1011                 }
1012
1013                 /* All the classes are overlimit.
1014
1015                    It is possible, if:
1016
1017                    1. Scheduler is empty.
1018                    2. Toplevel cutoff inhibited borrowing.
1019                    3. Root class is overlimit.
1020
1021                    Reset 2d and 3d conditions and retry.
1022
1023                    Note, that NS and cbq-2.0 are buggy, peeking
1024                    an arbitrary class is appropriate for ancestor-only
1025                    sharing, but not for toplevel algorithm.
1026
1027                    Our version is better, but slower, because it requires
1028                    two passes, but it is unavoidable with top-level sharing.
1029                 */
1030
1031                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1032                     q->link.undertime == PSCHED_PASTPERFECT)
1033                         break;
1034
1035                 q->toplevel = TC_CBQ_MAXLEVEL;
1036                 q->link.undertime = PSCHED_PASTPERFECT;
1037         }
1038
1039         /* No packets in scheduler or nobody wants to give them to us :-(
1040            Sigh... start watchdog timer in the last case. */
1041
1042         if (sch->q.qlen) {
1043                 sch->qstats.overlimits++;
1044                 if (q->wd_expires)
1045                         qdisc_watchdog_schedule(&q->watchdog,
1046                                                 now + q->wd_expires);
1047         }
1048         return NULL;
1049 }
1050
1051 /* CBQ class maintanance routines */
1052
1053 static void cbq_adjust_levels(struct cbq_class *this)
1054 {
1055         if (this == NULL)
1056                 return;
1057
1058         do {
1059                 int level = 0;
1060                 struct cbq_class *cl;
1061
1062                 if ((cl = this->children) != NULL) {
1063                         do {
1064                                 if (cl->level > level)
1065                                         level = cl->level;
1066                         } while ((cl = cl->sibling) != this->children);
1067                 }
1068                 this->level = level+1;
1069         } while ((this = this->tparent) != NULL);
1070 }
1071
1072 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1073 {
1074         struct cbq_class *cl;
1075         struct hlist_node *n;
1076         unsigned int h;
1077
1078         if (q->quanta[prio] == 0)
1079                 return;
1080
1081         for (h = 0; h < q->clhash.hashsize; h++) {
1082                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1083                         /* BUGGGG... Beware! This expression suffer of
1084                            arithmetic overflows!
1085                          */
1086                         if (cl->priority == prio) {
1087                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1088                                         q->quanta[prio];
1089                         }
1090                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1091                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1092                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1093                         }
1094                 }
1095         }
1096 }
1097
1098 static void cbq_sync_defmap(struct cbq_class *cl)
1099 {
1100         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1101         struct cbq_class *split = cl->split;
1102         unsigned h;
1103         int i;
1104
1105         if (split == NULL)
1106                 return;
1107
1108         for (i=0; i<=TC_PRIO_MAX; i++) {
1109                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1110                         split->defaults[i] = NULL;
1111         }
1112
1113         for (i=0; i<=TC_PRIO_MAX; i++) {
1114                 int level = split->level;
1115
1116                 if (split->defaults[i])
1117                         continue;
1118
1119                 for (h = 0; h < q->clhash.hashsize; h++) {
1120                         struct hlist_node *n;
1121                         struct cbq_class *c;
1122
1123                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1124                                              common.hnode) {
1125                                 if (c->split == split && c->level < level &&
1126                                     c->defmap&(1<<i)) {
1127                                         split->defaults[i] = c;
1128                                         level = c->level;
1129                                 }
1130                         }
1131                 }
1132         }
1133 }
1134
1135 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1136 {
1137         struct cbq_class *split = NULL;
1138
1139         if (splitid == 0) {
1140                 if ((split = cl->split) == NULL)
1141                         return;
1142                 splitid = split->common.classid;
1143         }
1144
1145         if (split == NULL || split->common.classid != splitid) {
1146                 for (split = cl->tparent; split; split = split->tparent)
1147                         if (split->common.classid == splitid)
1148                                 break;
1149         }
1150
1151         if (split == NULL)
1152                 return;
1153
1154         if (cl->split != split) {
1155                 cl->defmap = 0;
1156                 cbq_sync_defmap(cl);
1157                 cl->split = split;
1158                 cl->defmap = def&mask;
1159         } else
1160                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1161
1162         cbq_sync_defmap(cl);
1163 }
1164
1165 static void cbq_unlink_class(struct cbq_class *this)
1166 {
1167         struct cbq_class *cl, **clp;
1168         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1169
1170         qdisc_class_hash_remove(&q->clhash, &this->common);
1171
1172         if (this->tparent) {
1173                 clp=&this->sibling;
1174                 cl = *clp;
1175                 do {
1176                         if (cl == this) {
1177                                 *clp = cl->sibling;
1178                                 break;
1179                         }
1180                         clp = &cl->sibling;
1181                 } while ((cl = *clp) != this->sibling);
1182
1183                 if (this->tparent->children == this) {
1184                         this->tparent->children = this->sibling;
1185                         if (this->sibling == this)
1186                                 this->tparent->children = NULL;
1187                 }
1188         } else {
1189                 WARN_ON(this->sibling != this);
1190         }
1191 }
1192
1193 static void cbq_link_class(struct cbq_class *this)
1194 {
1195         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1196         struct cbq_class *parent = this->tparent;
1197
1198         this->sibling = this;
1199         qdisc_class_hash_insert(&q->clhash, &this->common);
1200
1201         if (parent == NULL)
1202                 return;
1203
1204         if (parent->children == NULL) {
1205                 parent->children = this;
1206         } else {
1207                 this->sibling = parent->children->sibling;
1208                 parent->children->sibling = this;
1209         }
1210 }
1211
1212 static unsigned int cbq_drop(struct Qdisc* sch)
1213 {
1214         struct cbq_sched_data *q = qdisc_priv(sch);
1215         struct cbq_class *cl, *cl_head;
1216         int prio;
1217         unsigned int len;
1218
1219         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1220                 if ((cl_head = q->active[prio]) == NULL)
1221                         continue;
1222
1223                 cl = cl_head;
1224                 do {
1225                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1226                                 sch->q.qlen--;
1227                                 if (!cl->q->q.qlen)
1228                                         cbq_deactivate_class(cl);
1229                                 return len;
1230                         }
1231                 } while ((cl = cl->next_alive) != cl_head);
1232         }
1233         return 0;
1234 }
1235
1236 static void
1237 cbq_reset(struct Qdisc* sch)
1238 {
1239         struct cbq_sched_data *q = qdisc_priv(sch);
1240         struct cbq_class *cl;
1241         struct hlist_node *n;
1242         int prio;
1243         unsigned h;
1244
1245         q->activemask = 0;
1246         q->pmask = 0;
1247         q->tx_class = NULL;
1248         q->tx_borrowed = NULL;
1249         qdisc_watchdog_cancel(&q->watchdog);
1250         hrtimer_cancel(&q->delay_timer);
1251         q->toplevel = TC_CBQ_MAXLEVEL;
1252         q->now = psched_get_time();
1253         q->now_rt = q->now;
1254
1255         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1256                 q->active[prio] = NULL;
1257
1258         for (h = 0; h < q->clhash.hashsize; h++) {
1259                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1260                         qdisc_reset(cl->q);
1261
1262                         cl->next_alive = NULL;
1263                         cl->undertime = PSCHED_PASTPERFECT;
1264                         cl->avgidle = cl->maxidle;
1265                         cl->deficit = cl->quantum;
1266                         cl->cpriority = cl->priority;
1267                 }
1268         }
1269         sch->q.qlen = 0;
1270 }
1271
1272
1273 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1274 {
1275         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1276                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1277                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1278         }
1279         if (lss->change&TCF_CBQ_LSS_EWMA)
1280                 cl->ewma_log = lss->ewma_log;
1281         if (lss->change&TCF_CBQ_LSS_AVPKT)
1282                 cl->avpkt = lss->avpkt;
1283         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1284                 cl->minidle = -(long)lss->minidle;
1285         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1286                 cl->maxidle = lss->maxidle;
1287                 cl->avgidle = lss->maxidle;
1288         }
1289         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1290                 cl->offtime = lss->offtime;
1291         return 0;
1292 }
1293
1294 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1295 {
1296         q->nclasses[cl->priority]--;
1297         q->quanta[cl->priority] -= cl->weight;
1298         cbq_normalize_quanta(q, cl->priority);
1299 }
1300
1301 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1302 {
1303         q->nclasses[cl->priority]++;
1304         q->quanta[cl->priority] += cl->weight;
1305         cbq_normalize_quanta(q, cl->priority);
1306 }
1307
1308 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1309 {
1310         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1311
1312         if (wrr->allot)
1313                 cl->allot = wrr->allot;
1314         if (wrr->weight)
1315                 cl->weight = wrr->weight;
1316         if (wrr->priority) {
1317                 cl->priority = wrr->priority-1;
1318                 cl->cpriority = cl->priority;
1319                 if (cl->priority >= cl->priority2)
1320                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1321         }
1322
1323         cbq_addprio(q, cl);
1324         return 0;
1325 }
1326
1327 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1328 {
1329         switch (ovl->strategy) {
1330         case TC_CBQ_OVL_CLASSIC:
1331                 cl->overlimit = cbq_ovl_classic;
1332                 break;
1333         case TC_CBQ_OVL_DELAY:
1334                 cl->overlimit = cbq_ovl_delay;
1335                 break;
1336         case TC_CBQ_OVL_LOWPRIO:
1337                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1338                     ovl->priority2-1 <= cl->priority)
1339                         return -EINVAL;
1340                 cl->priority2 = ovl->priority2-1;
1341                 cl->overlimit = cbq_ovl_lowprio;
1342                 break;
1343         case TC_CBQ_OVL_DROP:
1344                 cl->overlimit = cbq_ovl_drop;
1345                 break;
1346         case TC_CBQ_OVL_RCLASSIC:
1347                 cl->overlimit = cbq_ovl_rclassic;
1348                 break;
1349         default:
1350                 return -EINVAL;
1351         }
1352         cl->penalty = ovl->penalty;
1353         return 0;
1354 }
1355
1356 #ifdef CONFIG_NET_CLS_ACT
1357 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1358 {
1359         cl->police = p->police;
1360
1361         if (cl->q->handle) {
1362                 if (p->police == TC_POLICE_RECLASSIFY)
1363                         cl->q->reshape_fail = cbq_reshape_fail;
1364                 else
1365                         cl->q->reshape_fail = NULL;
1366         }
1367         return 0;
1368 }
1369 #endif
1370
1371 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1372 {
1373         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1374         return 0;
1375 }
1376
1377 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1378         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1379         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1380         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1381         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1382         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1383         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1384         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1385 };
1386
1387 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1388 {
1389         struct cbq_sched_data *q = qdisc_priv(sch);
1390         struct nlattr *tb[TCA_CBQ_MAX + 1];
1391         struct tc_ratespec *r;
1392         int err;
1393
1394         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1395         if (err < 0)
1396                 return err;
1397
1398         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1399                 return -EINVAL;
1400
1401         r = nla_data(tb[TCA_CBQ_RATE]);
1402
1403         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1404                 return -EINVAL;
1405
1406         err = qdisc_class_hash_init(&q->clhash);
1407         if (err < 0)
1408                 goto put_rtab;
1409
1410         q->link.refcnt = 1;
1411         q->link.sibling = &q->link;
1412         q->link.common.classid = sch->handle;
1413         q->link.qdisc = sch;
1414         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1415                                             &pfifo_qdisc_ops,
1416                                             sch->handle)))
1417                 q->link.q = &noop_qdisc;
1418
1419         q->link.priority = TC_CBQ_MAXPRIO-1;
1420         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423         q->link.overlimit = cbq_ovl_classic;
1424         q->link.allot = psched_mtu(qdisc_dev(sch));
1425         q->link.quantum = q->link.allot;
1426         q->link.weight = q->link.R_tab->rate.rate;
1427
1428         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429         q->link.avpkt = q->link.allot/2;
1430         q->link.minidle = -0x7FFFFFFF;
1431
1432         qdisc_watchdog_init(&q->watchdog, sch);
1433         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1434         q->delay_timer.function = cbq_undelay;
1435         q->toplevel = TC_CBQ_MAXLEVEL;
1436         q->now = psched_get_time();
1437         q->now_rt = q->now;
1438
1439         cbq_link_class(&q->link);
1440
1441         if (tb[TCA_CBQ_LSSOPT])
1442                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1443
1444         cbq_addprio(q, &q->link);
1445         return 0;
1446
1447 put_rtab:
1448         qdisc_put_rtab(q->link.R_tab);
1449         return err;
1450 }
1451
1452 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1453 {
1454         unsigned char *b = skb_tail_pointer(skb);
1455
1456         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1457         return skb->len;
1458
1459 nla_put_failure:
1460         nlmsg_trim(skb, b);
1461         return -1;
1462 }
1463
1464 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1465 {
1466         unsigned char *b = skb_tail_pointer(skb);
1467         struct tc_cbq_lssopt opt;
1468
1469         opt.flags = 0;
1470         if (cl->borrow == NULL)
1471                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1472         if (cl->share == NULL)
1473                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1474         opt.ewma_log = cl->ewma_log;
1475         opt.level = cl->level;
1476         opt.avpkt = cl->avpkt;
1477         opt.maxidle = cl->maxidle;
1478         opt.minidle = (u32)(-cl->minidle);
1479         opt.offtime = cl->offtime;
1480         opt.change = ~0;
1481         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1482         return skb->len;
1483
1484 nla_put_failure:
1485         nlmsg_trim(skb, b);
1486         return -1;
1487 }
1488
1489 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1490 {
1491         unsigned char *b = skb_tail_pointer(skb);
1492         struct tc_cbq_wrropt opt;
1493
1494         opt.flags = 0;
1495         opt.allot = cl->allot;
1496         opt.priority = cl->priority+1;
1497         opt.cpriority = cl->cpriority+1;
1498         opt.weight = cl->weight;
1499         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1500         return skb->len;
1501
1502 nla_put_failure:
1503         nlmsg_trim(skb, b);
1504         return -1;
1505 }
1506
1507 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1508 {
1509         unsigned char *b = skb_tail_pointer(skb);
1510         struct tc_cbq_ovl opt;
1511
1512         opt.strategy = cl->ovl_strategy;
1513         opt.priority2 = cl->priority2+1;
1514         opt.pad = 0;
1515         opt.penalty = cl->penalty;
1516         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1517         return skb->len;
1518
1519 nla_put_failure:
1520         nlmsg_trim(skb, b);
1521         return -1;
1522 }
1523
1524 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char *b = skb_tail_pointer(skb);
1527         struct tc_cbq_fopt opt;
1528
1529         if (cl->split || cl->defmap) {
1530                 opt.split = cl->split ? cl->split->common.classid : 0;
1531                 opt.defmap = cl->defmap;
1532                 opt.defchange = ~0;
1533                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1534         }
1535         return skb->len;
1536
1537 nla_put_failure:
1538         nlmsg_trim(skb, b);
1539         return -1;
1540 }
1541
1542 #ifdef CONFIG_NET_CLS_ACT
1543 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1544 {
1545         unsigned char *b = skb_tail_pointer(skb);
1546         struct tc_cbq_police opt;
1547
1548         if (cl->police) {
1549                 opt.police = cl->police;
1550                 opt.__res1 = 0;
1551                 opt.__res2 = 0;
1552                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1553         }
1554         return skb->len;
1555
1556 nla_put_failure:
1557         nlmsg_trim(skb, b);
1558         return -1;
1559 }
1560 #endif
1561
1562 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1563 {
1564         if (cbq_dump_lss(skb, cl) < 0 ||
1565             cbq_dump_rate(skb, cl) < 0 ||
1566             cbq_dump_wrr(skb, cl) < 0 ||
1567             cbq_dump_ovl(skb, cl) < 0 ||
1568 #ifdef CONFIG_NET_CLS_ACT
1569             cbq_dump_police(skb, cl) < 0 ||
1570 #endif
1571             cbq_dump_fopt(skb, cl) < 0)
1572                 return -1;
1573         return 0;
1574 }
1575
1576 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1577 {
1578         struct cbq_sched_data *q = qdisc_priv(sch);
1579         struct nlattr *nest;
1580
1581         nest = nla_nest_start(skb, TCA_OPTIONS);
1582         if (nest == NULL)
1583                 goto nla_put_failure;
1584         if (cbq_dump_attr(skb, &q->link) < 0)
1585                 goto nla_put_failure;
1586         nla_nest_end(skb, nest);
1587         return skb->len;
1588
1589 nla_put_failure:
1590         nla_nest_cancel(skb, nest);
1591         return -1;
1592 }
1593
1594 static int
1595 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1596 {
1597         struct cbq_sched_data *q = qdisc_priv(sch);
1598
1599         q->link.xstats.avgidle = q->link.avgidle;
1600         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1601 }
1602
1603 static int
1604 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1605                struct sk_buff *skb, struct tcmsg *tcm)
1606 {
1607         struct cbq_class *cl = (struct cbq_class*)arg;
1608         struct nlattr *nest;
1609
1610         if (cl->tparent)
1611                 tcm->tcm_parent = cl->tparent->common.classid;
1612         else
1613                 tcm->tcm_parent = TC_H_ROOT;
1614         tcm->tcm_handle = cl->common.classid;
1615         tcm->tcm_info = cl->q->handle;
1616
1617         nest = nla_nest_start(skb, TCA_OPTIONS);
1618         if (nest == NULL)
1619                 goto nla_put_failure;
1620         if (cbq_dump_attr(skb, cl) < 0)
1621                 goto nla_put_failure;
1622         nla_nest_end(skb, nest);
1623         return skb->len;
1624
1625 nla_put_failure:
1626         nla_nest_cancel(skb, nest);
1627         return -1;
1628 }
1629
1630 static int
1631 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1632         struct gnet_dump *d)
1633 {
1634         struct cbq_sched_data *q = qdisc_priv(sch);
1635         struct cbq_class *cl = (struct cbq_class*)arg;
1636
1637         cl->qstats.qlen = cl->q->q.qlen;
1638         cl->xstats.avgidle = cl->avgidle;
1639         cl->xstats.undertime = 0;
1640
1641         if (cl->undertime != PSCHED_PASTPERFECT)
1642                 cl->xstats.undertime = cl->undertime - q->now;
1643
1644         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1645             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1646             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1647                 return -1;
1648
1649         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1650 }
1651
1652 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1653                      struct Qdisc **old)
1654 {
1655         struct cbq_class *cl = (struct cbq_class*)arg;
1656
1657         if (cl) {
1658                 if (new == NULL) {
1659                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1660                                                 &pfifo_qdisc_ops,
1661                                                 cl->common.classid);
1662                         if (new == NULL)
1663                                 return -ENOBUFS;
1664                 } else {
1665 #ifdef CONFIG_NET_CLS_ACT
1666                         if (cl->police == TC_POLICE_RECLASSIFY)
1667                                 new->reshape_fail = cbq_reshape_fail;
1668 #endif
1669                 }
1670                 sch_tree_lock(sch);
1671                 *old = xchg(&cl->q, new);
1672                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1673                 qdisc_reset(*old);
1674                 sch_tree_unlock(sch);
1675
1676                 return 0;
1677         }
1678         return -ENOENT;
1679 }
1680
1681 static struct Qdisc *
1682 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1683 {
1684         struct cbq_class *cl = (struct cbq_class*)arg;
1685
1686         return cl ? cl->q : NULL;
1687 }
1688
1689 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1690 {
1691         struct cbq_class *cl = (struct cbq_class *)arg;
1692
1693         if (cl->q->q.qlen == 0)
1694                 cbq_deactivate_class(cl);
1695 }
1696
1697 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1698 {
1699         struct cbq_sched_data *q = qdisc_priv(sch);
1700         struct cbq_class *cl = cbq_class_lookup(q, classid);
1701
1702         if (cl) {
1703                 cl->refcnt++;
1704                 return (unsigned long)cl;
1705         }
1706         return 0;
1707 }
1708
1709 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1710 {
1711         struct cbq_sched_data *q = qdisc_priv(sch);
1712
1713         WARN_ON(cl->filters);
1714
1715         tcf_destroy_chain(&cl->filter_list);
1716         qdisc_destroy(cl->q);
1717         qdisc_put_rtab(cl->R_tab);
1718         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1719         if (cl != &q->link)
1720                 kfree(cl);
1721 }
1722
1723 static void
1724 cbq_destroy(struct Qdisc* sch)
1725 {
1726         struct cbq_sched_data *q = qdisc_priv(sch);
1727         struct hlist_node *n, *next;
1728         struct cbq_class *cl;
1729         unsigned h;
1730
1731 #ifdef CONFIG_NET_CLS_ACT
1732         q->rx_class = NULL;
1733 #endif
1734         /*
1735          * Filters must be destroyed first because we don't destroy the
1736          * classes from root to leafs which means that filters can still
1737          * be bound to classes which have been destroyed already. --TGR '04
1738          */
1739         for (h = 0; h < q->clhash.hashsize; h++) {
1740                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1741                         tcf_destroy_chain(&cl->filter_list);
1742         }
1743         for (h = 0; h < q->clhash.hashsize; h++) {
1744                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1745                                           common.hnode)
1746                         cbq_destroy_class(sch, cl);
1747         }
1748         qdisc_class_hash_destroy(&q->clhash);
1749 }
1750
1751 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1752 {
1753         struct cbq_class *cl = (struct cbq_class*)arg;
1754
1755         if (--cl->refcnt == 0) {
1756 #ifdef CONFIG_NET_CLS_ACT
1757                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1758                 struct cbq_sched_data *q = qdisc_priv(sch);
1759
1760                 spin_lock_bh(root_lock);
1761                 if (q->rx_class == cl)
1762                         q->rx_class = NULL;
1763                 spin_unlock_bh(root_lock);
1764 #endif
1765
1766                 cbq_destroy_class(sch, cl);
1767         }
1768 }
1769
1770 static int
1771 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1772                  unsigned long *arg)
1773 {
1774         int err;
1775         struct cbq_sched_data *q = qdisc_priv(sch);
1776         struct cbq_class *cl = (struct cbq_class*)*arg;
1777         struct nlattr *opt = tca[TCA_OPTIONS];
1778         struct nlattr *tb[TCA_CBQ_MAX + 1];
1779         struct cbq_class *parent;
1780         struct qdisc_rate_table *rtab = NULL;
1781
1782         if (opt == NULL)
1783                 return -EINVAL;
1784
1785         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1786         if (err < 0)
1787                 return err;
1788
1789         if (cl) {
1790                 /* Check parent */
1791                 if (parentid) {
1792                         if (cl->tparent &&
1793                             cl->tparent->common.classid != parentid)
1794                                 return -EINVAL;
1795                         if (!cl->tparent && parentid != TC_H_ROOT)
1796                                 return -EINVAL;
1797                 }
1798
1799                 if (tb[TCA_CBQ_RATE]) {
1800                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1801                         if (rtab == NULL)
1802                                 return -EINVAL;
1803                 }
1804
1805                 /* Change class parameters */
1806                 sch_tree_lock(sch);
1807
1808                 if (cl->next_alive != NULL)
1809                         cbq_deactivate_class(cl);
1810
1811                 if (rtab) {
1812                         rtab = xchg(&cl->R_tab, rtab);
1813                         qdisc_put_rtab(rtab);
1814                 }
1815
1816                 if (tb[TCA_CBQ_LSSOPT])
1817                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1818
1819                 if (tb[TCA_CBQ_WRROPT]) {
1820                         cbq_rmprio(q, cl);
1821                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1822                 }
1823
1824                 if (tb[TCA_CBQ_OVL_STRATEGY])
1825                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1826
1827 #ifdef CONFIG_NET_CLS_ACT
1828                 if (tb[TCA_CBQ_POLICE])
1829                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1830 #endif
1831
1832                 if (tb[TCA_CBQ_FOPT])
1833                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1834
1835                 if (cl->q->q.qlen)
1836                         cbq_activate_class(cl);
1837
1838                 sch_tree_unlock(sch);
1839
1840                 if (tca[TCA_RATE])
1841                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1842                                               qdisc_root_sleeping_lock(sch),
1843                                               tca[TCA_RATE]);
1844                 return 0;
1845         }
1846
1847         if (parentid == TC_H_ROOT)
1848                 return -EINVAL;
1849
1850         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1851             tb[TCA_CBQ_LSSOPT] == NULL)
1852                 return -EINVAL;
1853
1854         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1855         if (rtab == NULL)
1856                 return -EINVAL;
1857
1858         if (classid) {
1859                 err = -EINVAL;
1860                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1861                         goto failure;
1862         } else {
1863                 int i;
1864                 classid = TC_H_MAKE(sch->handle,0x8000);
1865
1866                 for (i=0; i<0x8000; i++) {
1867                         if (++q->hgenerator >= 0x8000)
1868                                 q->hgenerator = 1;
1869                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1870                                 break;
1871                 }
1872                 err = -ENOSR;
1873                 if (i >= 0x8000)
1874                         goto failure;
1875                 classid = classid|q->hgenerator;
1876         }
1877
1878         parent = &q->link;
1879         if (parentid) {
1880                 parent = cbq_class_lookup(q, parentid);
1881                 err = -EINVAL;
1882                 if (parent == NULL)
1883                         goto failure;
1884         }
1885
1886         err = -ENOBUFS;
1887         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1888         if (cl == NULL)
1889                 goto failure;
1890         cl->R_tab = rtab;
1891         rtab = NULL;
1892         cl->refcnt = 1;
1893         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1894                                         &pfifo_qdisc_ops, classid)))
1895                 cl->q = &noop_qdisc;
1896         cl->common.classid = classid;
1897         cl->tparent = parent;
1898         cl->qdisc = sch;
1899         cl->allot = parent->allot;
1900         cl->quantum = cl->allot;
1901         cl->weight = cl->R_tab->rate.rate;
1902
1903         sch_tree_lock(sch);
1904         cbq_link_class(cl);
1905         cl->borrow = cl->tparent;
1906         if (cl->tparent != &q->link)
1907                 cl->share = cl->tparent;
1908         cbq_adjust_levels(parent);
1909         cl->minidle = -0x7FFFFFFF;
1910         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1911         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1912         if (cl->ewma_log==0)
1913                 cl->ewma_log = q->link.ewma_log;
1914         if (cl->maxidle==0)
1915                 cl->maxidle = q->link.maxidle;
1916         if (cl->avpkt==0)
1917                 cl->avpkt = q->link.avpkt;
1918         cl->overlimit = cbq_ovl_classic;
1919         if (tb[TCA_CBQ_OVL_STRATEGY])
1920                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1921 #ifdef CONFIG_NET_CLS_ACT
1922         if (tb[TCA_CBQ_POLICE])
1923                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1924 #endif
1925         if (tb[TCA_CBQ_FOPT])
1926                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1927         sch_tree_unlock(sch);
1928
1929         qdisc_class_hash_grow(sch, &q->clhash);
1930
1931         if (tca[TCA_RATE])
1932                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1933                                   qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1934
1935         *arg = (unsigned long)cl;
1936         return 0;
1937
1938 failure:
1939         qdisc_put_rtab(rtab);
1940         return err;
1941 }
1942
1943 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1944 {
1945         struct cbq_sched_data *q = qdisc_priv(sch);
1946         struct cbq_class *cl = (struct cbq_class*)arg;
1947         unsigned int qlen;
1948
1949         if (cl->filters || cl->children || cl == &q->link)
1950                 return -EBUSY;
1951
1952         sch_tree_lock(sch);
1953
1954         qlen = cl->q->q.qlen;
1955         qdisc_reset(cl->q);
1956         qdisc_tree_decrease_qlen(cl->q, qlen);
1957
1958         if (cl->next_alive)
1959                 cbq_deactivate_class(cl);
1960
1961         if (q->tx_borrowed == cl)
1962                 q->tx_borrowed = q->tx_class;
1963         if (q->tx_class == cl) {
1964                 q->tx_class = NULL;
1965                 q->tx_borrowed = NULL;
1966         }
1967 #ifdef CONFIG_NET_CLS_ACT
1968         if (q->rx_class == cl)
1969                 q->rx_class = NULL;
1970 #endif
1971
1972         cbq_unlink_class(cl);
1973         cbq_adjust_levels(cl->tparent);
1974         cl->defmap = 0;
1975         cbq_sync_defmap(cl);
1976
1977         cbq_rmprio(q, cl);
1978         sch_tree_unlock(sch);
1979
1980         if (--cl->refcnt == 0)
1981                 cbq_destroy_class(sch, cl);
1982
1983         return 0;
1984 }
1985
1986 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1987 {
1988         struct cbq_sched_data *q = qdisc_priv(sch);
1989         struct cbq_class *cl = (struct cbq_class *)arg;
1990
1991         if (cl == NULL)
1992                 cl = &q->link;
1993
1994         return &cl->filter_list;
1995 }
1996
1997 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1998                                      u32 classid)
1999 {
2000         struct cbq_sched_data *q = qdisc_priv(sch);
2001         struct cbq_class *p = (struct cbq_class*)parent;
2002         struct cbq_class *cl = cbq_class_lookup(q, classid);
2003
2004         if (cl) {
2005                 if (p && p->level <= cl->level)
2006                         return 0;
2007                 cl->filters++;
2008                 return (unsigned long)cl;
2009         }
2010         return 0;
2011 }
2012
2013 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2014 {
2015         struct cbq_class *cl = (struct cbq_class*)arg;
2016
2017         cl->filters--;
2018 }
2019
2020 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2021 {
2022         struct cbq_sched_data *q = qdisc_priv(sch);
2023         struct cbq_class *cl;
2024         struct hlist_node *n;
2025         unsigned h;
2026
2027         if (arg->stop)
2028                 return;
2029
2030         for (h = 0; h < q->clhash.hashsize; h++) {
2031                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2032                         if (arg->count < arg->skip) {
2033                                 arg->count++;
2034                                 continue;
2035                         }
2036                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2037                                 arg->stop = 1;
2038                                 return;
2039                         }
2040                         arg->count++;
2041                 }
2042         }
2043 }
2044
2045 static const struct Qdisc_class_ops cbq_class_ops = {
2046         .graft          =       cbq_graft,
2047         .leaf           =       cbq_leaf,
2048         .qlen_notify    =       cbq_qlen_notify,
2049         .get            =       cbq_get,
2050         .put            =       cbq_put,
2051         .change         =       cbq_change_class,
2052         .delete         =       cbq_delete,
2053         .walk           =       cbq_walk,
2054         .tcf_chain      =       cbq_find_tcf,
2055         .bind_tcf       =       cbq_bind_filter,
2056         .unbind_tcf     =       cbq_unbind_filter,
2057         .dump           =       cbq_dump_class,
2058         .dump_stats     =       cbq_dump_class_stats,
2059 };
2060
2061 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2062         .next           =       NULL,
2063         .cl_ops         =       &cbq_class_ops,
2064         .id             =       "cbq",
2065         .priv_size      =       sizeof(struct cbq_sched_data),
2066         .enqueue        =       cbq_enqueue,
2067         .dequeue        =       cbq_dequeue,
2068         .requeue        =       cbq_requeue,
2069         .drop           =       cbq_drop,
2070         .init           =       cbq_init,
2071         .reset          =       cbq_reset,
2072         .destroy        =       cbq_destroy,
2073         .change         =       NULL,
2074         .dump           =       cbq_dump,
2075         .dump_stats     =       cbq_dump_stats,
2076         .owner          =       THIS_MODULE,
2077 };
2078
2079 static int __init cbq_module_init(void)
2080 {
2081         return register_qdisc(&cbq_qdisc_ops);
2082 }
2083 static void __exit cbq_module_exit(void)
2084 {
2085         unregister_qdisc(&cbq_qdisc_ops);
2086 }
2087 module_init(cbq_module_init)
2088 module_exit(cbq_module_exit)
2089 MODULE_LICENSE("GPL");