]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/sched/sch_generic.c
5b261e91bdbdef1e71b60348f31cedad6380740e
[karo-tx-linux.git] / net / sched / sch_generic.c
1 /*
2  * net/sched/sch_generic.c      Generic packet scheduler routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
11  *              - Ingress support
12  */
13
14 #include <linux/bitops.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/sched.h>
19 #include <linux/string.h>
20 #include <linux/errno.h>
21 #include <linux/netdevice.h>
22 #include <linux/skbuff.h>
23 #include <linux/rtnetlink.h>
24 #include <linux/init.h>
25 #include <linux/rcupdate.h>
26 #include <linux/list.h>
27 #include <linux/slab.h>
28 #include <linux/if_vlan.h>
29 #include <net/sch_generic.h>
30 #include <net/pkt_sched.h>
31 #include <net/dst.h>
32
33 /* Qdisc to use by default */
34 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
35 EXPORT_SYMBOL(default_qdisc_ops);
36
37 /* Main transmission queue. */
38
39 /* Modifications to data participating in scheduling must be protected with
40  * qdisc_lock(qdisc) spinlock.
41  *
42  * The idea is the following:
43  * - enqueue, dequeue are serialized via qdisc root lock
44  * - ingress filtering is also serialized via qdisc root lock
45  * - updates to tree and tree walking are only done under the rtnl mutex.
46  */
47
48 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
49 {
50         skb_dst_force(skb);
51         q->gso_skb = skb;
52         q->qstats.requeues++;
53         q->q.qlen++;    /* it's still part of the queue */
54         __netif_schedule(q);
55
56         return 0;
57 }
58
59 static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
60 {
61         struct sk_buff *skb = q->gso_skb;
62         const struct netdev_queue *txq = q->dev_queue;
63
64         if (unlikely(skb)) {
65                 /* check the reason of requeuing without tx lock first */
66                 txq = skb_get_tx_queue(txq->dev, skb);
67                 if (!netif_xmit_frozen_or_stopped(txq)) {
68                         q->gso_skb = NULL;
69                         q->q.qlen--;
70                 } else
71                         skb = NULL;
72         } else {
73                 if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
74                         skb = q->dequeue(q);
75                         if (skb)
76                                 skb = validate_xmit_skb(skb, qdisc_dev(q));
77                 }
78         }
79
80         return skb;
81 }
82
83 static inline int handle_dev_cpu_collision(struct sk_buff *skb,
84                                            struct netdev_queue *dev_queue,
85                                            struct Qdisc *q)
86 {
87         int ret;
88
89         if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
90                 /*
91                  * Same CPU holding the lock. It may be a transient
92                  * configuration error, when hard_start_xmit() recurses. We
93                  * detect it by checking xmit owner and drop the packet when
94                  * deadloop is detected. Return OK to try the next skb.
95                  */
96                 kfree_skb_list(skb);
97                 net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n",
98                                      dev_queue->dev->name);
99                 ret = qdisc_qlen(q);
100         } else {
101                 /*
102                  * Another cpu is holding lock, requeue & delay xmits for
103                  * some time.
104                  */
105                 __this_cpu_inc(softnet_data.cpu_collision);
106                 ret = dev_requeue_skb(skb, q);
107         }
108
109         return ret;
110 }
111
112 /*
113  * Transmit possibly several skbs, and handle the return status as
114  * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
115  * only one CPU can execute this function.
116  *
117  * Returns to the caller:
118  *                              0  - queue is empty or throttled.
119  *                              >0 - queue is not empty.
120  */
121 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
122                     struct net_device *dev, struct netdev_queue *txq,
123                     spinlock_t *root_lock)
124 {
125         int ret = NETDEV_TX_BUSY;
126
127         /* And release qdisc */
128         spin_unlock(root_lock);
129
130         HARD_TX_LOCK(dev, txq, smp_processor_id());
131         if (!netif_xmit_frozen_or_stopped(txq))
132                 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
133
134         HARD_TX_UNLOCK(dev, txq);
135
136         spin_lock(root_lock);
137
138         if (dev_xmit_complete(ret)) {
139                 /* Driver sent out skb successfully or skb was consumed */
140                 ret = qdisc_qlen(q);
141         } else if (ret == NETDEV_TX_LOCKED) {
142                 /* Driver try lock failed */
143                 ret = handle_dev_cpu_collision(skb, txq, q);
144         } else {
145                 /* Driver returned NETDEV_TX_BUSY - requeue skb */
146                 if (unlikely(ret != NETDEV_TX_BUSY))
147                         net_warn_ratelimited("BUG %s code %d qlen %d\n",
148                                              dev->name, ret, q->q.qlen);
149
150                 ret = dev_requeue_skb(skb, q);
151         }
152
153         if (ret && netif_xmit_frozen_or_stopped(txq))
154                 ret = 0;
155
156         return ret;
157 }
158
159 /*
160  * NOTE: Called under qdisc_lock(q) with locally disabled BH.
161  *
162  * __QDISC___STATE_RUNNING guarantees only one CPU can process
163  * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
164  * this queue.
165  *
166  *  netif_tx_lock serializes accesses to device driver.
167  *
168  *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
169  *  if one is grabbed, another must be free.
170  *
171  * Note, that this procedure can be called by a watchdog timer
172  *
173  * Returns to the caller:
174  *                              0  - queue is empty or throttled.
175  *                              >0 - queue is not empty.
176  *
177  */
178 static inline int qdisc_restart(struct Qdisc *q)
179 {
180         struct netdev_queue *txq;
181         struct net_device *dev;
182         spinlock_t *root_lock;
183         struct sk_buff *skb;
184
185         /* Dequeue packet */
186         skb = dequeue_skb(q);
187         if (unlikely(!skb))
188                 return 0;
189
190         WARN_ON_ONCE(skb_dst_is_noref(skb));
191
192         root_lock = qdisc_lock(q);
193         dev = qdisc_dev(q);
194         txq = skb_get_tx_queue(dev, skb);
195
196         return sch_direct_xmit(skb, q, dev, txq, root_lock);
197 }
198
199 void __qdisc_run(struct Qdisc *q)
200 {
201         int quota = weight_p;
202
203         while (qdisc_restart(q)) {
204                 /*
205                  * Ordered by possible occurrence: Postpone processing if
206                  * 1. we've exceeded packet quota
207                  * 2. another process needs the CPU;
208                  */
209                 if (--quota <= 0 || need_resched()) {
210                         __netif_schedule(q);
211                         break;
212                 }
213         }
214
215         qdisc_run_end(q);
216 }
217
218 unsigned long dev_trans_start(struct net_device *dev)
219 {
220         unsigned long val, res;
221         unsigned int i;
222
223         if (is_vlan_dev(dev))
224                 dev = vlan_dev_real_dev(dev);
225         res = dev->trans_start;
226         for (i = 0; i < dev->num_tx_queues; i++) {
227                 val = netdev_get_tx_queue(dev, i)->trans_start;
228                 if (val && time_after(val, res))
229                         res = val;
230         }
231         dev->trans_start = res;
232
233         return res;
234 }
235 EXPORT_SYMBOL(dev_trans_start);
236
237 static void dev_watchdog(unsigned long arg)
238 {
239         struct net_device *dev = (struct net_device *)arg;
240
241         netif_tx_lock(dev);
242         if (!qdisc_tx_is_noop(dev)) {
243                 if (netif_device_present(dev) &&
244                     netif_running(dev) &&
245                     netif_carrier_ok(dev)) {
246                         int some_queue_timedout = 0;
247                         unsigned int i;
248                         unsigned long trans_start;
249
250                         for (i = 0; i < dev->num_tx_queues; i++) {
251                                 struct netdev_queue *txq;
252
253                                 txq = netdev_get_tx_queue(dev, i);
254                                 /*
255                                  * old device drivers set dev->trans_start
256                                  */
257                                 trans_start = txq->trans_start ? : dev->trans_start;
258                                 if (netif_xmit_stopped(txq) &&
259                                     time_after(jiffies, (trans_start +
260                                                          dev->watchdog_timeo))) {
261                                         some_queue_timedout = 1;
262                                         txq->trans_timeout++;
263                                         break;
264                                 }
265                         }
266
267                         if (some_queue_timedout) {
268                                 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
269                                        dev->name, netdev_drivername(dev), i);
270                                 dev->netdev_ops->ndo_tx_timeout(dev);
271                         }
272                         if (!mod_timer(&dev->watchdog_timer,
273                                        round_jiffies(jiffies +
274                                                      dev->watchdog_timeo)))
275                                 dev_hold(dev);
276                 }
277         }
278         netif_tx_unlock(dev);
279
280         dev_put(dev);
281 }
282
283 void __netdev_watchdog_up(struct net_device *dev)
284 {
285         if (dev->netdev_ops->ndo_tx_timeout) {
286                 if (dev->watchdog_timeo <= 0)
287                         dev->watchdog_timeo = 5*HZ;
288                 if (!mod_timer(&dev->watchdog_timer,
289                                round_jiffies(jiffies + dev->watchdog_timeo)))
290                         dev_hold(dev);
291         }
292 }
293
294 static void dev_watchdog_up(struct net_device *dev)
295 {
296         __netdev_watchdog_up(dev);
297 }
298
299 static void dev_watchdog_down(struct net_device *dev)
300 {
301         netif_tx_lock_bh(dev);
302         if (del_timer(&dev->watchdog_timer))
303                 dev_put(dev);
304         netif_tx_unlock_bh(dev);
305 }
306
307 /**
308  *      netif_carrier_on - set carrier
309  *      @dev: network device
310  *
311  * Device has detected that carrier.
312  */
313 void netif_carrier_on(struct net_device *dev)
314 {
315         if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
316                 if (dev->reg_state == NETREG_UNINITIALIZED)
317                         return;
318                 atomic_inc(&dev->carrier_changes);
319                 linkwatch_fire_event(dev);
320                 if (netif_running(dev))
321                         __netdev_watchdog_up(dev);
322         }
323 }
324 EXPORT_SYMBOL(netif_carrier_on);
325
326 /**
327  *      netif_carrier_off - clear carrier
328  *      @dev: network device
329  *
330  * Device has detected loss of carrier.
331  */
332 void netif_carrier_off(struct net_device *dev)
333 {
334         if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
335                 if (dev->reg_state == NETREG_UNINITIALIZED)
336                         return;
337                 atomic_inc(&dev->carrier_changes);
338                 linkwatch_fire_event(dev);
339         }
340 }
341 EXPORT_SYMBOL(netif_carrier_off);
342
343 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
344    under all circumstances. It is difficult to invent anything faster or
345    cheaper.
346  */
347
348 static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
349 {
350         kfree_skb(skb);
351         return NET_XMIT_CN;
352 }
353
354 static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
355 {
356         return NULL;
357 }
358
359 struct Qdisc_ops noop_qdisc_ops __read_mostly = {
360         .id             =       "noop",
361         .priv_size      =       0,
362         .enqueue        =       noop_enqueue,
363         .dequeue        =       noop_dequeue,
364         .peek           =       noop_dequeue,
365         .owner          =       THIS_MODULE,
366 };
367
368 static struct netdev_queue noop_netdev_queue = {
369         .qdisc          =       &noop_qdisc,
370         .qdisc_sleeping =       &noop_qdisc,
371 };
372
373 struct Qdisc noop_qdisc = {
374         .enqueue        =       noop_enqueue,
375         .dequeue        =       noop_dequeue,
376         .flags          =       TCQ_F_BUILTIN,
377         .ops            =       &noop_qdisc_ops,
378         .list           =       LIST_HEAD_INIT(noop_qdisc.list),
379         .q.lock         =       __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
380         .dev_queue      =       &noop_netdev_queue,
381         .busylock       =       __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
382 };
383 EXPORT_SYMBOL(noop_qdisc);
384
385 static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
386         .id             =       "noqueue",
387         .priv_size      =       0,
388         .enqueue        =       noop_enqueue,
389         .dequeue        =       noop_dequeue,
390         .peek           =       noop_dequeue,
391         .owner          =       THIS_MODULE,
392 };
393
394 static struct Qdisc noqueue_qdisc;
395 static struct netdev_queue noqueue_netdev_queue = {
396         .qdisc          =       &noqueue_qdisc,
397         .qdisc_sleeping =       &noqueue_qdisc,
398 };
399
400 static struct Qdisc noqueue_qdisc = {
401         .enqueue        =       NULL,
402         .dequeue        =       noop_dequeue,
403         .flags          =       TCQ_F_BUILTIN,
404         .ops            =       &noqueue_qdisc_ops,
405         .list           =       LIST_HEAD_INIT(noqueue_qdisc.list),
406         .q.lock         =       __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
407         .dev_queue      =       &noqueue_netdev_queue,
408         .busylock       =       __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
409 };
410
411
412 static const u8 prio2band[TC_PRIO_MAX + 1] = {
413         1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
414 };
415
416 /* 3-band FIFO queue: old style, but should be a bit faster than
417    generic prio+fifo combination.
418  */
419
420 #define PFIFO_FAST_BANDS 3
421
422 /*
423  * Private data for a pfifo_fast scheduler containing:
424  *      - queues for the three band
425  *      - bitmap indicating which of the bands contain skbs
426  */
427 struct pfifo_fast_priv {
428         u32 bitmap;
429         struct sk_buff_head q[PFIFO_FAST_BANDS];
430 };
431
432 /*
433  * Convert a bitmap to the first band number where an skb is queued, where:
434  *      bitmap=0 means there are no skbs on any band.
435  *      bitmap=1 means there is an skb on band 0.
436  *      bitmap=7 means there are skbs on all 3 bands, etc.
437  */
438 static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
439
440 static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
441                                              int band)
442 {
443         return priv->q + band;
444 }
445
446 static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
447 {
448         if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
449                 int band = prio2band[skb->priority & TC_PRIO_MAX];
450                 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
451                 struct sk_buff_head *list = band2list(priv, band);
452
453                 priv->bitmap |= (1 << band);
454                 qdisc->q.qlen++;
455                 return __qdisc_enqueue_tail(skb, qdisc, list);
456         }
457
458         return qdisc_drop(skb, qdisc);
459 }
460
461 static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
462 {
463         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
464         int band = bitmap2band[priv->bitmap];
465
466         if (likely(band >= 0)) {
467                 struct sk_buff_head *list = band2list(priv, band);
468                 struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
469
470                 qdisc->q.qlen--;
471                 if (skb_queue_empty(list))
472                         priv->bitmap &= ~(1 << band);
473
474                 return skb;
475         }
476
477         return NULL;
478 }
479
480 static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
481 {
482         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
483         int band = bitmap2band[priv->bitmap];
484
485         if (band >= 0) {
486                 struct sk_buff_head *list = band2list(priv, band);
487
488                 return skb_peek(list);
489         }
490
491         return NULL;
492 }
493
494 static void pfifo_fast_reset(struct Qdisc *qdisc)
495 {
496         int prio;
497         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
498
499         for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
500                 __qdisc_reset_queue(qdisc, band2list(priv, prio));
501
502         priv->bitmap = 0;
503         qdisc->qstats.backlog = 0;
504         qdisc->q.qlen = 0;
505 }
506
507 static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
508 {
509         struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
510
511         memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
512         if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
513                 goto nla_put_failure;
514         return skb->len;
515
516 nla_put_failure:
517         return -1;
518 }
519
520 static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
521 {
522         int prio;
523         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
524
525         for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
526                 skb_queue_head_init(band2list(priv, prio));
527
528         /* Can by-pass the queue discipline */
529         qdisc->flags |= TCQ_F_CAN_BYPASS;
530         return 0;
531 }
532
533 struct Qdisc_ops pfifo_fast_ops __read_mostly = {
534         .id             =       "pfifo_fast",
535         .priv_size      =       sizeof(struct pfifo_fast_priv),
536         .enqueue        =       pfifo_fast_enqueue,
537         .dequeue        =       pfifo_fast_dequeue,
538         .peek           =       pfifo_fast_peek,
539         .init           =       pfifo_fast_init,
540         .reset          =       pfifo_fast_reset,
541         .dump           =       pfifo_fast_dump,
542         .owner          =       THIS_MODULE,
543 };
544
545 static struct lock_class_key qdisc_tx_busylock;
546
547 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
548                           const struct Qdisc_ops *ops)
549 {
550         void *p;
551         struct Qdisc *sch;
552         unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
553         int err = -ENOBUFS;
554         struct net_device *dev = dev_queue->dev;
555
556         p = kzalloc_node(size, GFP_KERNEL,
557                          netdev_queue_numa_node_read(dev_queue));
558
559         if (!p)
560                 goto errout;
561         sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
562         /* if we got non aligned memory, ask more and do alignment ourself */
563         if (sch != p) {
564                 kfree(p);
565                 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
566                                  netdev_queue_numa_node_read(dev_queue));
567                 if (!p)
568                         goto errout;
569                 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
570                 sch->padded = (char *) sch - (char *) p;
571         }
572         INIT_LIST_HEAD(&sch->list);
573         skb_queue_head_init(&sch->q);
574
575         spin_lock_init(&sch->busylock);
576         lockdep_set_class(&sch->busylock,
577                           dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
578
579         sch->ops = ops;
580         sch->enqueue = ops->enqueue;
581         sch->dequeue = ops->dequeue;
582         sch->dev_queue = dev_queue;
583         dev_hold(dev);
584         atomic_set(&sch->refcnt, 1);
585
586         return sch;
587 errout:
588         return ERR_PTR(err);
589 }
590
591 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
592                                 const struct Qdisc_ops *ops,
593                                 unsigned int parentid)
594 {
595         struct Qdisc *sch;
596
597         if (!try_module_get(ops->owner))
598                 goto errout;
599
600         sch = qdisc_alloc(dev_queue, ops);
601         if (IS_ERR(sch))
602                 goto errout;
603         sch->parent = parentid;
604
605         if (!ops->init || ops->init(sch, NULL) == 0)
606                 return sch;
607
608         qdisc_destroy(sch);
609 errout:
610         return NULL;
611 }
612 EXPORT_SYMBOL(qdisc_create_dflt);
613
614 /* Under qdisc_lock(qdisc) and BH! */
615
616 void qdisc_reset(struct Qdisc *qdisc)
617 {
618         const struct Qdisc_ops *ops = qdisc->ops;
619
620         if (ops->reset)
621                 ops->reset(qdisc);
622
623         if (qdisc->gso_skb) {
624                 kfree_skb(qdisc->gso_skb);
625                 qdisc->gso_skb = NULL;
626                 qdisc->q.qlen = 0;
627         }
628 }
629 EXPORT_SYMBOL(qdisc_reset);
630
631 static void qdisc_rcu_free(struct rcu_head *head)
632 {
633         struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
634
635         kfree((char *) qdisc - qdisc->padded);
636 }
637
638 void qdisc_destroy(struct Qdisc *qdisc)
639 {
640         const struct Qdisc_ops  *ops = qdisc->ops;
641
642         if (qdisc->flags & TCQ_F_BUILTIN ||
643             !atomic_dec_and_test(&qdisc->refcnt))
644                 return;
645
646 #ifdef CONFIG_NET_SCHED
647         qdisc_list_del(qdisc);
648
649         qdisc_put_stab(rtnl_dereference(qdisc->stab));
650 #endif
651         gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
652         if (ops->reset)
653                 ops->reset(qdisc);
654         if (ops->destroy)
655                 ops->destroy(qdisc);
656
657         module_put(ops->owner);
658         dev_put(qdisc_dev(qdisc));
659
660         kfree_skb(qdisc->gso_skb);
661         /*
662          * gen_estimator est_timer() might access qdisc->q.lock,
663          * wait a RCU grace period before freeing qdisc.
664          */
665         call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
666 }
667 EXPORT_SYMBOL(qdisc_destroy);
668
669 /* Attach toplevel qdisc to device queue. */
670 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
671                               struct Qdisc *qdisc)
672 {
673         struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
674         spinlock_t *root_lock;
675
676         root_lock = qdisc_lock(oqdisc);
677         spin_lock_bh(root_lock);
678
679         /* Prune old scheduler */
680         if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
681                 qdisc_reset(oqdisc);
682
683         /* ... and graft new one */
684         if (qdisc == NULL)
685                 qdisc = &noop_qdisc;
686         dev_queue->qdisc_sleeping = qdisc;
687         rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
688
689         spin_unlock_bh(root_lock);
690
691         return oqdisc;
692 }
693 EXPORT_SYMBOL(dev_graft_qdisc);
694
695 static void attach_one_default_qdisc(struct net_device *dev,
696                                      struct netdev_queue *dev_queue,
697                                      void *_unused)
698 {
699         struct Qdisc *qdisc = &noqueue_qdisc;
700
701         if (dev->tx_queue_len) {
702                 qdisc = qdisc_create_dflt(dev_queue,
703                                           default_qdisc_ops, TC_H_ROOT);
704                 if (!qdisc) {
705                         netdev_info(dev, "activation failed\n");
706                         return;
707                 }
708                 if (!netif_is_multiqueue(dev))
709                         qdisc->flags |= TCQ_F_ONETXQUEUE;
710         }
711         dev_queue->qdisc_sleeping = qdisc;
712 }
713
714 static void attach_default_qdiscs(struct net_device *dev)
715 {
716         struct netdev_queue *txq;
717         struct Qdisc *qdisc;
718
719         txq = netdev_get_tx_queue(dev, 0);
720
721         if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
722                 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
723                 dev->qdisc = txq->qdisc_sleeping;
724                 atomic_inc(&dev->qdisc->refcnt);
725         } else {
726                 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
727                 if (qdisc) {
728                         dev->qdisc = qdisc;
729                         qdisc->ops->attach(qdisc);
730                 }
731         }
732 }
733
734 static void transition_one_qdisc(struct net_device *dev,
735                                  struct netdev_queue *dev_queue,
736                                  void *_need_watchdog)
737 {
738         struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
739         int *need_watchdog_p = _need_watchdog;
740
741         if (!(new_qdisc->flags & TCQ_F_BUILTIN))
742                 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
743
744         rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
745         if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
746                 dev_queue->trans_start = 0;
747                 *need_watchdog_p = 1;
748         }
749 }
750
751 void dev_activate(struct net_device *dev)
752 {
753         int need_watchdog;
754
755         /* No queueing discipline is attached to device;
756          * create default one for devices, which need queueing
757          * and noqueue_qdisc for virtual interfaces
758          */
759
760         if (dev->qdisc == &noop_qdisc)
761                 attach_default_qdiscs(dev);
762
763         if (!netif_carrier_ok(dev))
764                 /* Delay activation until next carrier-on event */
765                 return;
766
767         need_watchdog = 0;
768         netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
769         if (dev_ingress_queue(dev))
770                 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
771
772         if (need_watchdog) {
773                 dev->trans_start = jiffies;
774                 dev_watchdog_up(dev);
775         }
776 }
777 EXPORT_SYMBOL(dev_activate);
778
779 static void dev_deactivate_queue(struct net_device *dev,
780                                  struct netdev_queue *dev_queue,
781                                  void *_qdisc_default)
782 {
783         struct Qdisc *qdisc_default = _qdisc_default;
784         struct Qdisc *qdisc;
785
786         qdisc = dev_queue->qdisc;
787         if (qdisc) {
788                 spin_lock_bh(qdisc_lock(qdisc));
789
790                 if (!(qdisc->flags & TCQ_F_BUILTIN))
791                         set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
792
793                 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
794                 qdisc_reset(qdisc);
795
796                 spin_unlock_bh(qdisc_lock(qdisc));
797         }
798 }
799
800 static bool some_qdisc_is_busy(struct net_device *dev)
801 {
802         unsigned int i;
803
804         for (i = 0; i < dev->num_tx_queues; i++) {
805                 struct netdev_queue *dev_queue;
806                 spinlock_t *root_lock;
807                 struct Qdisc *q;
808                 int val;
809
810                 dev_queue = netdev_get_tx_queue(dev, i);
811                 q = dev_queue->qdisc_sleeping;
812                 root_lock = qdisc_lock(q);
813
814                 spin_lock_bh(root_lock);
815
816                 val = (qdisc_is_running(q) ||
817                        test_bit(__QDISC_STATE_SCHED, &q->state));
818
819                 spin_unlock_bh(root_lock);
820
821                 if (val)
822                         return true;
823         }
824         return false;
825 }
826
827 /**
828  *      dev_deactivate_many - deactivate transmissions on several devices
829  *      @head: list of devices to deactivate
830  *
831  *      This function returns only when all outstanding transmissions
832  *      have completed, unless all devices are in dismantle phase.
833  */
834 void dev_deactivate_many(struct list_head *head)
835 {
836         struct net_device *dev;
837         bool sync_needed = false;
838
839         list_for_each_entry(dev, head, close_list) {
840                 netdev_for_each_tx_queue(dev, dev_deactivate_queue,
841                                          &noop_qdisc);
842                 if (dev_ingress_queue(dev))
843                         dev_deactivate_queue(dev, dev_ingress_queue(dev),
844                                              &noop_qdisc);
845
846                 dev_watchdog_down(dev);
847                 sync_needed |= !dev->dismantle;
848         }
849
850         /* Wait for outstanding qdisc-less dev_queue_xmit calls.
851          * This is avoided if all devices are in dismantle phase :
852          * Caller will call synchronize_net() for us
853          */
854         if (sync_needed)
855                 synchronize_net();
856
857         /* Wait for outstanding qdisc_run calls. */
858         list_for_each_entry(dev, head, close_list)
859                 while (some_qdisc_is_busy(dev))
860                         yield();
861 }
862
863 void dev_deactivate(struct net_device *dev)
864 {
865         LIST_HEAD(single);
866
867         list_add(&dev->close_list, &single);
868         dev_deactivate_many(&single);
869         list_del(&single);
870 }
871 EXPORT_SYMBOL(dev_deactivate);
872
873 static void dev_init_scheduler_queue(struct net_device *dev,
874                                      struct netdev_queue *dev_queue,
875                                      void *_qdisc)
876 {
877         struct Qdisc *qdisc = _qdisc;
878
879         dev_queue->qdisc = qdisc;
880         dev_queue->qdisc_sleeping = qdisc;
881 }
882
883 void dev_init_scheduler(struct net_device *dev)
884 {
885         dev->qdisc = &noop_qdisc;
886         netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
887         if (dev_ingress_queue(dev))
888                 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
889
890         setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
891 }
892
893 static void shutdown_scheduler_queue(struct net_device *dev,
894                                      struct netdev_queue *dev_queue,
895                                      void *_qdisc_default)
896 {
897         struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
898         struct Qdisc *qdisc_default = _qdisc_default;
899
900         if (qdisc) {
901                 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
902                 dev_queue->qdisc_sleeping = qdisc_default;
903
904                 qdisc_destroy(qdisc);
905         }
906 }
907
908 void dev_shutdown(struct net_device *dev)
909 {
910         netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
911         if (dev_ingress_queue(dev))
912                 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
913         qdisc_destroy(dev->qdisc);
914         dev->qdisc = &noop_qdisc;
915
916         WARN_ON(timer_pending(&dev->watchdog_timer));
917 }
918
919 void psched_ratecfg_precompute(struct psched_ratecfg *r,
920                                const struct tc_ratespec *conf,
921                                u64 rate64)
922 {
923         memset(r, 0, sizeof(*r));
924         r->overhead = conf->overhead;
925         r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
926         r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
927         r->mult = 1;
928         /*
929          * The deal here is to replace a divide by a reciprocal one
930          * in fast path (a reciprocal divide is a multiply and a shift)
931          *
932          * Normal formula would be :
933          *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
934          *
935          * We compute mult/shift to use instead :
936          *  time_in_ns = (len * mult) >> shift;
937          *
938          * We try to get the highest possible mult value for accuracy,
939          * but have to make sure no overflows will ever happen.
940          */
941         if (r->rate_bytes_ps > 0) {
942                 u64 factor = NSEC_PER_SEC;
943
944                 for (;;) {
945                         r->mult = div64_u64(factor, r->rate_bytes_ps);
946                         if (r->mult & (1U << 31) || factor & (1ULL << 63))
947                                 break;
948                         factor <<= 1;
949                         r->shift++;
950                 }
951         }
952 }
953 EXPORT_SYMBOL(psched_ratecfg_precompute);