]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/sched/sch_teql.c
Merge branches 'fixes' and 'fwnet' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mv-sheeva.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master
57 {
58         struct Qdisc_ops qops;
59         struct net_device *dev;
60         struct Qdisc *slaves;
61         struct list_head master_list;
62         unsigned long   tx_bytes;
63         unsigned long   tx_packets;
64         unsigned long   tx_errors;
65         unsigned long   tx_dropped;
66 };
67
68 struct teql_sched_data
69 {
70         struct Qdisc *next;
71         struct teql_master *m;
72         struct neighbour *ncache;
73         struct sk_buff_head q;
74 };
75
76 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
77
78 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
79
80 /* "teql*" qdisc routines */
81
82 static int
83 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
84 {
85         struct net_device *dev = qdisc_dev(sch);
86         struct teql_sched_data *q = qdisc_priv(sch);
87
88         if (q->q.qlen < dev->tx_queue_len) {
89                 __skb_queue_tail(&q->q, skb);
90                 qdisc_bstats_update(sch, skb);
91                 return NET_XMIT_SUCCESS;
92         }
93
94         kfree_skb(skb);
95         sch->qstats.drops++;
96         return NET_XMIT_DROP;
97 }
98
99 static struct sk_buff *
100 teql_dequeue(struct Qdisc* sch)
101 {
102         struct teql_sched_data *dat = qdisc_priv(sch);
103         struct netdev_queue *dat_queue;
104         struct sk_buff *skb;
105
106         skb = __skb_dequeue(&dat->q);
107         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
108         if (skb == NULL) {
109                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
110                 if (m) {
111                         dat->m->slaves = sch;
112                         netif_wake_queue(m);
113                 }
114         }
115         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
116         return skb;
117 }
118
119 static struct sk_buff *
120 teql_peek(struct Qdisc* sch)
121 {
122         /* teql is meant to be used as root qdisc */
123         return NULL;
124 }
125
126 static __inline__ void
127 teql_neigh_release(struct neighbour *n)
128 {
129         if (n)
130                 neigh_release(n);
131 }
132
133 static void
134 teql_reset(struct Qdisc* sch)
135 {
136         struct teql_sched_data *dat = qdisc_priv(sch);
137
138         skb_queue_purge(&dat->q);
139         sch->q.qlen = 0;
140         teql_neigh_release(xchg(&dat->ncache, NULL));
141 }
142
143 static void
144 teql_destroy(struct Qdisc* sch)
145 {
146         struct Qdisc *q, *prev;
147         struct teql_sched_data *dat = qdisc_priv(sch);
148         struct teql_master *master = dat->m;
149
150         if ((prev = master->slaves) != NULL) {
151                 do {
152                         q = NEXT_SLAVE(prev);
153                         if (q == sch) {
154                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
155                                 if (q == master->slaves) {
156                                         master->slaves = NEXT_SLAVE(q);
157                                         if (q == master->slaves) {
158                                                 struct netdev_queue *txq;
159                                                 spinlock_t *root_lock;
160
161                                                 txq = netdev_get_tx_queue(master->dev, 0);
162                                                 master->slaves = NULL;
163
164                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
165                                                 spin_lock_bh(root_lock);
166                                                 qdisc_reset(txq->qdisc);
167                                                 spin_unlock_bh(root_lock);
168                                         }
169                                 }
170                                 skb_queue_purge(&dat->q);
171                                 teql_neigh_release(xchg(&dat->ncache, NULL));
172                                 break;
173                         }
174
175                 } while ((prev = q) != master->slaves);
176         }
177 }
178
179 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
180 {
181         struct net_device *dev = qdisc_dev(sch);
182         struct teql_master *m = (struct teql_master*)sch->ops;
183         struct teql_sched_data *q = qdisc_priv(sch);
184
185         if (dev->hard_header_len > m->dev->hard_header_len)
186                 return -EINVAL;
187
188         if (m->dev == dev)
189                 return -ELOOP;
190
191         q->m = m;
192
193         skb_queue_head_init(&q->q);
194
195         if (m->slaves) {
196                 if (m->dev->flags & IFF_UP) {
197                         if ((m->dev->flags & IFF_POINTOPOINT &&
198                              !(dev->flags & IFF_POINTOPOINT)) ||
199                             (m->dev->flags & IFF_BROADCAST &&
200                              !(dev->flags & IFF_BROADCAST)) ||
201                             (m->dev->flags & IFF_MULTICAST &&
202                              !(dev->flags & IFF_MULTICAST)) ||
203                             dev->mtu < m->dev->mtu)
204                                 return -EINVAL;
205                 } else {
206                         if (!(dev->flags&IFF_POINTOPOINT))
207                                 m->dev->flags &= ~IFF_POINTOPOINT;
208                         if (!(dev->flags&IFF_BROADCAST))
209                                 m->dev->flags &= ~IFF_BROADCAST;
210                         if (!(dev->flags&IFF_MULTICAST))
211                                 m->dev->flags &= ~IFF_MULTICAST;
212                         if (dev->mtu < m->dev->mtu)
213                                 m->dev->mtu = dev->mtu;
214                 }
215                 q->next = NEXT_SLAVE(m->slaves);
216                 NEXT_SLAVE(m->slaves) = sch;
217         } else {
218                 q->next = sch;
219                 m->slaves = sch;
220                 m->dev->mtu = dev->mtu;
221                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
222         }
223         return 0;
224 }
225
226
227 static int
228 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
229 {
230         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
231         struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
232         struct neighbour *mn = skb_dst(skb)->neighbour;
233         struct neighbour *n = q->ncache;
234
235         if (mn->tbl == NULL)
236                 return -EINVAL;
237         if (n && n->tbl == mn->tbl &&
238             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
239                 atomic_inc(&n->refcnt);
240         } else {
241                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
242                 if (IS_ERR(n))
243                         return PTR_ERR(n);
244         }
245         if (neigh_event_send(n, skb_res) == 0) {
246                 int err;
247                 char haddr[MAX_ADDR_LEN];
248
249                 neigh_ha_snapshot(haddr, n, dev);
250                 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
251                                       NULL, skb->len);
252
253                 if (err < 0) {
254                         neigh_release(n);
255                         return -EINVAL;
256                 }
257                 teql_neigh_release(xchg(&q->ncache, n));
258                 return 0;
259         }
260         neigh_release(n);
261         return (skb_res == NULL) ? -EAGAIN : 1;
262 }
263
264 static inline int teql_resolve(struct sk_buff *skb,
265                                struct sk_buff *skb_res, struct net_device *dev)
266 {
267         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
268         if (txq->qdisc == &noop_qdisc)
269                 return -ENODEV;
270
271         if (dev->header_ops == NULL ||
272             skb_dst(skb) == NULL ||
273             skb_dst(skb)->neighbour == NULL)
274                 return 0;
275         return __teql_resolve(skb, skb_res, dev);
276 }
277
278 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
279 {
280         struct teql_master *master = netdev_priv(dev);
281         struct Qdisc *start, *q;
282         int busy;
283         int nores;
284         int subq = skb_get_queue_mapping(skb);
285         struct sk_buff *skb_res = NULL;
286
287         start = master->slaves;
288
289 restart:
290         nores = 0;
291         busy = 0;
292
293         if ((q = start) == NULL)
294                 goto drop;
295
296         do {
297                 struct net_device *slave = qdisc_dev(q);
298                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
299                 const struct net_device_ops *slave_ops = slave->netdev_ops;
300
301                 if (slave_txq->qdisc_sleeping != q)
302                         continue;
303                 if (__netif_subqueue_stopped(slave, subq) ||
304                     !netif_running(slave)) {
305                         busy = 1;
306                         continue;
307                 }
308
309                 switch (teql_resolve(skb, skb_res, slave)) {
310                 case 0:
311                         if (__netif_tx_trylock(slave_txq)) {
312                                 unsigned int length = qdisc_pkt_len(skb);
313
314                                 if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
315                                     slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
316                                         txq_trans_update(slave_txq);
317                                         __netif_tx_unlock(slave_txq);
318                                         master->slaves = NEXT_SLAVE(q);
319                                         netif_wake_queue(dev);
320                                         master->tx_packets++;
321                                         master->tx_bytes += length;
322                                         return NETDEV_TX_OK;
323                                 }
324                                 __netif_tx_unlock(slave_txq);
325                         }
326                         if (netif_queue_stopped(dev))
327                                 busy = 1;
328                         break;
329                 case 1:
330                         master->slaves = NEXT_SLAVE(q);
331                         return NETDEV_TX_OK;
332                 default:
333                         nores = 1;
334                         break;
335                 }
336                 __skb_pull(skb, skb_network_offset(skb));
337         } while ((q = NEXT_SLAVE(q)) != start);
338
339         if (nores && skb_res == NULL) {
340                 skb_res = skb;
341                 goto restart;
342         }
343
344         if (busy) {
345                 netif_stop_queue(dev);
346                 return NETDEV_TX_BUSY;
347         }
348         master->tx_errors++;
349
350 drop:
351         master->tx_dropped++;
352         dev_kfree_skb(skb);
353         return NETDEV_TX_OK;
354 }
355
356 static int teql_master_open(struct net_device *dev)
357 {
358         struct Qdisc * q;
359         struct teql_master *m = netdev_priv(dev);
360         int mtu = 0xFFFE;
361         unsigned flags = IFF_NOARP|IFF_MULTICAST;
362
363         if (m->slaves == NULL)
364                 return -EUNATCH;
365
366         flags = FMASK;
367
368         q = m->slaves;
369         do {
370                 struct net_device *slave = qdisc_dev(q);
371
372                 if (slave == NULL)
373                         return -EUNATCH;
374
375                 if (slave->mtu < mtu)
376                         mtu = slave->mtu;
377                 if (slave->hard_header_len > LL_MAX_HEADER)
378                         return -EINVAL;
379
380                 /* If all the slaves are BROADCAST, master is BROADCAST
381                    If all the slaves are PtP, master is PtP
382                    Otherwise, master is NBMA.
383                  */
384                 if (!(slave->flags&IFF_POINTOPOINT))
385                         flags &= ~IFF_POINTOPOINT;
386                 if (!(slave->flags&IFF_BROADCAST))
387                         flags &= ~IFF_BROADCAST;
388                 if (!(slave->flags&IFF_MULTICAST))
389                         flags &= ~IFF_MULTICAST;
390         } while ((q = NEXT_SLAVE(q)) != m->slaves);
391
392         m->dev->mtu = mtu;
393         m->dev->flags = (m->dev->flags&~FMASK) | flags;
394         netif_start_queue(m->dev);
395         return 0;
396 }
397
398 static int teql_master_close(struct net_device *dev)
399 {
400         netif_stop_queue(dev);
401         return 0;
402 }
403
404 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
405                                                      struct rtnl_link_stats64 *stats)
406 {
407         struct teql_master *m = netdev_priv(dev);
408
409         stats->tx_packets       = m->tx_packets;
410         stats->tx_bytes         = m->tx_bytes;
411         stats->tx_errors        = m->tx_errors;
412         stats->tx_dropped       = m->tx_dropped;
413         return stats;
414 }
415
416 static int teql_master_mtu(struct net_device *dev, int new_mtu)
417 {
418         struct teql_master *m = netdev_priv(dev);
419         struct Qdisc *q;
420
421         if (new_mtu < 68)
422                 return -EINVAL;
423
424         q = m->slaves;
425         if (q) {
426                 do {
427                         if (new_mtu > qdisc_dev(q)->mtu)
428                                 return -EINVAL;
429                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
430         }
431
432         dev->mtu = new_mtu;
433         return 0;
434 }
435
436 static const struct net_device_ops teql_netdev_ops = {
437         .ndo_open       = teql_master_open,
438         .ndo_stop       = teql_master_close,
439         .ndo_start_xmit = teql_master_xmit,
440         .ndo_get_stats64 = teql_master_stats64,
441         .ndo_change_mtu = teql_master_mtu,
442 };
443
444 static __init void teql_master_setup(struct net_device *dev)
445 {
446         struct teql_master *master = netdev_priv(dev);
447         struct Qdisc_ops *ops = &master->qops;
448
449         master->dev     = dev;
450         ops->priv_size  = sizeof(struct teql_sched_data);
451
452         ops->enqueue    =       teql_enqueue;
453         ops->dequeue    =       teql_dequeue;
454         ops->peek       =       teql_peek;
455         ops->init       =       teql_qdisc_init;
456         ops->reset      =       teql_reset;
457         ops->destroy    =       teql_destroy;
458         ops->owner      =       THIS_MODULE;
459
460         dev->netdev_ops =       &teql_netdev_ops;
461         dev->type               = ARPHRD_VOID;
462         dev->mtu                = 1500;
463         dev->tx_queue_len       = 100;
464         dev->flags              = IFF_NOARP;
465         dev->hard_header_len    = LL_MAX_HEADER;
466         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
467 }
468
469 static LIST_HEAD(master_dev_list);
470 static int max_equalizers = 1;
471 module_param(max_equalizers, int, 0);
472 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
473
474 static int __init teql_init(void)
475 {
476         int i;
477         int err = -ENODEV;
478
479         for (i = 0; i < max_equalizers; i++) {
480                 struct net_device *dev;
481                 struct teql_master *master;
482
483                 dev = alloc_netdev(sizeof(struct teql_master),
484                                   "teql%d", teql_master_setup);
485                 if (!dev) {
486                         err = -ENOMEM;
487                         break;
488                 }
489
490                 if ((err = register_netdev(dev))) {
491                         free_netdev(dev);
492                         break;
493                 }
494
495                 master = netdev_priv(dev);
496
497                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
498                 err = register_qdisc(&master->qops);
499
500                 if (err) {
501                         unregister_netdev(dev);
502                         free_netdev(dev);
503                         break;
504                 }
505
506                 list_add_tail(&master->master_list, &master_dev_list);
507         }
508         return i ? 0 : err;
509 }
510
511 static void __exit teql_exit(void)
512 {
513         struct teql_master *master, *nxt;
514
515         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
516
517                 list_del(&master->master_list);
518
519                 unregister_qdisc(&master->qops);
520                 unregister_netdev(master->dev);
521                 free_netdev(master->dev);
522         }
523 }
524
525 module_init(teql_init);
526 module_exit(teql_exit);
527
528 MODULE_LICENSE("GPL");