2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
26 #include <net/pkt_sched.h>
28 /* Network Emulation Queuing algorithm.
29 ====================================
31 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32 Network Emulation Tool
33 [2] Luigi Rizzo, DummyNet for FreeBSD
35 ----------------------------------------------------------------
37 This started out as a simple way to delay outgoing packets to
38 test TCP but has grown to include most of the functionality
39 of a full blown network emulator like NISTnet. It can delay
40 packets and add random jitter (and correlation). The random
41 distribution can be loaded from a table as well to provide
42 normal, Pareto, or experimental curves. Packet loss,
43 duplication, and reordering can also be emulated.
45 This qdisc does not do classification that can be handled in
46 layering other disciplines. It does not need to do bandwidth
47 control either since that can be handled by using token
48 bucket or other rate control.
50 The simulator is limited by the Linux timer resolution
51 and will create packet bursts on the HZ boundary (1ms).
54 struct netem_sched_data {
56 struct timer_list timer;
69 } delay_cor, loss_cor, dup_cor;
77 /* Time stamp put into socket buffer control block */
79 psched_time_t time_to_send;
82 /* init_crandom - initialize correlated random number generator
83 * Use entropy source for initial seed.
85 static void init_crandom(struct crndstate *state, unsigned long rho)
88 state->last = net_random();
91 /* get_crandom - correlated random number generator
92 * Next number depends on last value.
93 * rho is scaled to avoid floating point.
95 static unsigned long get_crandom(struct crndstate *state)
100 if (state->rho == 0) /* no correllation */
103 value = net_random();
104 rho = (u64)state->rho + 1;
105 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
106 state->last = answer;
110 /* tabledist - return a pseudo-randomly distributed value with mean mu and
111 * std deviation sigma. Uses table lookup to approximate the desired
112 * distribution, and a uniformly-distributed pseudo-random source.
114 static long tabledist(unsigned long mu, long sigma,
115 struct crndstate *state, const struct disttable *dist)
123 rnd = get_crandom(state);
125 /* default uniform distribution */
127 return (rnd % (2*sigma)) - sigma + mu;
129 t = dist->table[rnd % dist->size];
130 x = (sigma % NETEM_DIST_SCALE) * t;
132 x += NETEM_DIST_SCALE/2;
134 x -= NETEM_DIST_SCALE/2;
136 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
140 * Insert one skb into qdisc.
141 * Note: parent depends on return value to account for queue length.
142 * NET_XMIT_DROP: queue length didn't change.
143 * NET_XMIT_SUCCESS: one skb was queued.
145 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
147 struct netem_sched_data *q = qdisc_priv(sch);
148 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
149 struct sk_buff *skb2;
153 pr_debug("netem_enqueue skb=%p\n", skb);
155 /* Random duplication */
156 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
159 /* Random packet drop 0 => none, ~0 => all */
160 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
166 return NET_XMIT_DROP;
170 * If we need to duplicate packet, then re-insert at top of the
171 * qdisc tree, since parent queuer expects that only one
172 * skb will be queued.
174 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
175 struct Qdisc *rootq = sch->dev->qdisc;
176 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
179 rootq->enqueue(skb2, rootq);
180 q->duplicate = dupsave;
184 * Do re-ordering by putting one out of N packets at the front
186 * gap == 0 is special case for no-reordering.
188 if (q->gap == 0 || q->counter != q->gap) {
190 PSCHED_GET_TIME(now);
192 tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist),
196 ret = q->qdisc->enqueue(skb, q->qdisc);
199 PSCHED_GET_TIME(cb->time_to_send);
200 ret = q->qdisc->ops->requeue(skb, q->qdisc);
203 if (likely(ret == NET_XMIT_SUCCESS)) {
205 sch->bstats.bytes += skb->len;
206 sch->bstats.packets++;
210 pr_debug("netem: enqueue ret %d\n", ret);
214 /* Requeue packets but don't change time stamp */
215 static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
217 struct netem_sched_data *q = qdisc_priv(sch);
220 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
222 sch->qstats.requeues++;
228 static unsigned int netem_drop(struct Qdisc* sch)
230 struct netem_sched_data *q = qdisc_priv(sch);
233 if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
240 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
242 struct netem_sched_data *q = qdisc_priv(sch);
245 skb = q->qdisc->dequeue(q->qdisc);
247 const struct netem_skb_cb *cb
248 = (const struct netem_skb_cb *)skb->cb;
252 /* if more time remaining? */
253 PSCHED_GET_TIME(now);
254 delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
255 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
257 pr_debug("netem_dequeue: return skb=%p\n", skb);
259 sch->flags &= ~TCQ_F_THROTTLED;
263 mod_timer(&q->timer, jiffies + delay);
264 sch->flags |= TCQ_F_THROTTLED;
266 if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
273 static void netem_watchdog(unsigned long arg)
275 struct Qdisc *sch = (struct Qdisc *)arg;
277 pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
278 sch->flags &= ~TCQ_F_THROTTLED;
279 netif_schedule(sch->dev);
282 static void netem_reset(struct Qdisc *sch)
284 struct netem_sched_data *q = qdisc_priv(sch);
286 qdisc_reset(q->qdisc);
288 sch->flags &= ~TCQ_F_THROTTLED;
289 del_timer_sync(&q->timer);
292 static int set_fifo_limit(struct Qdisc *q, int limit)
297 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
299 rta->rta_type = RTM_NEWQDISC;
300 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
301 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
303 ret = q->ops->change(q, rta);
310 * Distribution data is a variable size payload containing
311 * signed 16 bit values.
313 static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
315 struct netem_sched_data *q = qdisc_priv(sch);
316 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
317 const __s16 *data = RTA_DATA(attr);
324 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
329 for (i = 0; i < n; i++)
330 d->table[i] = data[i];
332 spin_lock_bh(&sch->dev->queue_lock);
333 d = xchg(&q->delay_dist, d);
334 spin_unlock_bh(&sch->dev->queue_lock);
340 static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
342 struct netem_sched_data *q = qdisc_priv(sch);
343 const struct tc_netem_corr *c = RTA_DATA(attr);
345 if (RTA_PAYLOAD(attr) != sizeof(*c))
348 init_crandom(&q->delay_cor, c->delay_corr);
349 init_crandom(&q->loss_cor, c->loss_corr);
350 init_crandom(&q->dup_cor, c->dup_corr);
354 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
356 struct netem_sched_data *q = qdisc_priv(sch);
357 struct tc_netem_qopt *qopt;
360 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
363 qopt = RTA_DATA(opt);
364 ret = set_fifo_limit(q->qdisc, qopt->limit);
366 pr_debug("netem: can't set fifo limit\n");
370 q->latency = qopt->latency;
371 q->jitter = qopt->jitter;
372 q->limit = qopt->limit;
374 q->loss = qopt->loss;
375 q->duplicate = qopt->duplicate;
377 /* Handle nested options after initial queue options.
378 * Should have put all options in nested format but too late now.
380 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
381 struct rtattr *tb[TCA_NETEM_MAX];
382 if (rtattr_parse(tb, TCA_NETEM_MAX,
383 RTA_DATA(opt) + sizeof(*qopt),
384 RTA_PAYLOAD(opt) - sizeof(*qopt)))
387 if (tb[TCA_NETEM_CORR-1]) {
388 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
393 if (tb[TCA_NETEM_DELAY_DIST-1]) {
394 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
404 static int netem_init(struct Qdisc *sch, struct rtattr *opt)
406 struct netem_sched_data *q = qdisc_priv(sch);
412 init_timer(&q->timer);
413 q->timer.function = netem_watchdog;
414 q->timer.data = (unsigned long) sch;
417 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
419 pr_debug("netem: qdisc create failed\n");
423 ret = netem_change(sch, opt);
425 pr_debug("netem: change failed\n");
426 qdisc_destroy(q->qdisc);
431 static void netem_destroy(struct Qdisc *sch)
433 struct netem_sched_data *q = qdisc_priv(sch);
435 del_timer_sync(&q->timer);
436 qdisc_destroy(q->qdisc);
437 kfree(q->delay_dist);
440 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
442 const struct netem_sched_data *q = qdisc_priv(sch);
443 unsigned char *b = skb->tail;
444 struct rtattr *rta = (struct rtattr *) b;
445 struct tc_netem_qopt qopt;
446 struct tc_netem_corr cor;
448 qopt.latency = q->latency;
449 qopt.jitter = q->jitter;
450 qopt.limit = q->limit;
453 qopt.duplicate = q->duplicate;
454 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
456 cor.delay_corr = q->delay_cor.rho;
457 cor.loss_corr = q->loss_cor.rho;
458 cor.dup_corr = q->dup_cor.rho;
459 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
460 rta->rta_len = skb->tail - b;
465 skb_trim(skb, b - skb->data);
469 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
470 struct sk_buff *skb, struct tcmsg *tcm)
472 struct netem_sched_data *q = qdisc_priv(sch);
474 if (cl != 1) /* only one class */
477 tcm->tcm_handle |= TC_H_MIN(1);
478 tcm->tcm_info = q->qdisc->handle;
483 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
486 struct netem_sched_data *q = qdisc_priv(sch);
492 *old = xchg(&q->qdisc, new);
495 sch_tree_unlock(sch);
500 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
502 struct netem_sched_data *q = qdisc_priv(sch);
506 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
511 static void netem_put(struct Qdisc *sch, unsigned long arg)
515 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
516 struct rtattr **tca, unsigned long *arg)
521 static int netem_delete(struct Qdisc *sch, unsigned long arg)
526 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
529 if (walker->count >= walker->skip)
530 if (walker->fn(sch, 1, walker) < 0) {
538 static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
543 static struct Qdisc_class_ops netem_class_ops = {
544 .graft = netem_graft,
548 .change = netem_change_class,
549 .delete = netem_delete,
551 .tcf_chain = netem_find_tcf,
552 .dump = netem_dump_class,
555 static struct Qdisc_ops netem_qdisc_ops = {
557 .cl_ops = &netem_class_ops,
558 .priv_size = sizeof(struct netem_sched_data),
559 .enqueue = netem_enqueue,
560 .dequeue = netem_dequeue,
561 .requeue = netem_requeue,
564 .reset = netem_reset,
565 .destroy = netem_destroy,
566 .change = netem_change,
568 .owner = THIS_MODULE,
572 static int __init netem_module_init(void)
574 return register_qdisc(&netem_qdisc_ops);
576 static void __exit netem_module_exit(void)
578 unregister_qdisc(&netem_qdisc_ops);
580 module_init(netem_module_init)
581 module_exit(netem_module_exit)
582 MODULE_LICENSE("GPL");