2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
52 struct sock *mroute6_socket;
55 /* Big lock, protecting vif table, mrt cache and mroute socket state.
56 Note that the changes are semaphored via rtnl_lock.
59 static DEFINE_RWLOCK(mrt_lock);
62 * Multicast router control variables
65 static struct mif_device vif6_table[MAXMIFS]; /* Devices */
68 #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
70 static int mroute_do_assert; /* Set in PIM assert */
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 static int mroute_do_pim;
74 #define mroute_do_pim 0
77 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
79 static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
80 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
82 /* Special spinlock for queue of unresolved entries */
83 static DEFINE_SPINLOCK(mfc_unres_lock);
85 /* We return to original Alan's scheme. Hash table of resolved
86 entries is changed only in process context and protected
87 with weak lock mrt_lock. Queue of unresolved entries is protected
88 with strong spinlock mfc_unres_lock.
90 In this case data path is free of exclusive locks at all.
93 static struct kmem_cache *mrt_cachep __read_mostly;
95 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
96 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
97 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
99 #ifdef CONFIG_IPV6_PIMSM_V2
100 static struct inet6_protocol pim6_protocol;
103 static struct timer_list ipmr_expire_timer;
106 #ifdef CONFIG_PROC_FS
108 struct ipmr_mfc_iter {
109 struct mfc6_cache **cache;
114 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
116 struct mfc6_cache *mfc;
118 it->cache = mfc6_cache_array;
119 read_lock(&mrt_lock);
120 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
124 read_unlock(&mrt_lock);
126 it->cache = &mfc_unres_queue;
127 spin_lock_bh(&mfc_unres_lock);
128 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
131 spin_unlock_bh(&mfc_unres_lock);
141 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
144 struct ipmr_vif_iter {
148 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
151 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 if (!MIF_EXISTS(iter->ct))
155 return &vif6_table[iter->ct];
160 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
163 read_lock(&mrt_lock);
164 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
168 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
170 struct ipmr_vif_iter *iter = seq->private;
173 if (v == SEQ_START_TOKEN)
174 return ip6mr_vif_seq_idx(iter, 0);
176 while (++iter->ct < maxvif) {
177 if (!MIF_EXISTS(iter->ct))
179 return &vif6_table[iter->ct];
184 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
187 read_unlock(&mrt_lock);
190 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
192 if (v == SEQ_START_TOKEN) {
194 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
196 const struct mif_device *vif = v;
197 const char *name = vif->dev ? vif->dev->name : "none";
200 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n",
202 name, vif->bytes_in, vif->pkt_in,
203 vif->bytes_out, vif->pkt_out,
209 static struct seq_operations ip6mr_vif_seq_ops = {
210 .start = ip6mr_vif_seq_start,
211 .next = ip6mr_vif_seq_next,
212 .stop = ip6mr_vif_seq_stop,
213 .show = ip6mr_vif_seq_show,
216 static int ip6mr_vif_open(struct inode *inode, struct file *file)
218 return seq_open_private(file, &ip6mr_vif_seq_ops,
219 sizeof(struct ipmr_vif_iter));
222 static struct file_operations ip6mr_vif_fops = {
223 .owner = THIS_MODULE,
224 .open = ip6mr_vif_open,
227 .release = seq_release,
230 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
232 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
238 struct mfc6_cache *mfc = v;
239 struct ipmr_mfc_iter *it = seq->private;
243 if (v == SEQ_START_TOKEN)
244 return ipmr_mfc_seq_idx(seq->private, 0);
249 if (it->cache == &mfc_unres_queue)
252 BUG_ON(it->cache != mfc6_cache_array);
254 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 mfc = mfc6_cache_array[it->ct];
260 /* exhausted cache_array, show unresolved */
261 read_unlock(&mrt_lock);
262 it->cache = &mfc_unres_queue;
265 spin_lock_bh(&mfc_unres_lock);
266 mfc = mfc_unres_queue;
271 spin_unlock_bh(&mfc_unres_lock);
277 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
279 struct ipmr_mfc_iter *it = seq->private;
281 if (it->cache == &mfc_unres_queue)
282 spin_unlock_bh(&mfc_unres_lock);
283 else if (it->cache == mfc6_cache_array)
284 read_unlock(&mrt_lock);
287 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
291 if (v == SEQ_START_TOKEN) {
295 "Iif Pkts Bytes Wrong Oifs\n");
297 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private;
301 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
305 mfc->mfc_un.res.bytes,
306 mfc->mfc_un.res.wrong_if);
308 if (it->cache != &mfc_unres_queue) {
309 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) {
312 mfc->mfc_un.res.ttls[n] < 255)
315 n, mfc->mfc_un.res.ttls[n]);
323 static struct seq_operations ipmr_mfc_seq_ops = {
324 .start = ipmr_mfc_seq_start,
325 .next = ipmr_mfc_seq_next,
326 .stop = ipmr_mfc_seq_stop,
327 .show = ipmr_mfc_seq_show,
330 static int ipmr_mfc_open(struct inode *inode, struct file *file)
332 return seq_open_private(file, &ipmr_mfc_seq_ops,
333 sizeof(struct ipmr_mfc_iter));
336 static struct file_operations ip6mr_mfc_fops = {
337 .owner = THIS_MODULE,
338 .open = ipmr_mfc_open,
341 .release = seq_release,
345 #ifdef CONFIG_IPV6_PIMSM_V2
346 static int reg_vif_num = -1;
348 static int pim6_rcv(struct sk_buff *skb)
350 struct pimreghdr *pim;
351 struct ipv6hdr *encap;
352 struct net_device *reg_dev = NULL;
354 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
357 pim = (struct pimreghdr *)skb_transport_header(skb);
358 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 (pim->flags & PIM_NULL_REGISTER) ||
360 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
361 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
364 /* check if the inner packet is destined to mcast group */
365 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
368 if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 encap->payload_len == 0 ||
370 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
373 read_lock(&mrt_lock);
374 if (reg_vif_num >= 0)
375 reg_dev = vif6_table[reg_vif_num].dev;
378 read_unlock(&mrt_lock);
383 skb->mac_header = skb->network_header;
384 skb_pull(skb, (u8 *)encap - skb->data);
385 skb_reset_network_header(skb);
387 skb->protocol = htons(ETH_P_IP);
389 skb->pkt_type = PACKET_HOST;
390 dst_release(skb->dst);
391 ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
392 ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
403 static struct inet6_protocol pim6_protocol = {
407 /* Service routines creating virtual interfaces: PIMREG */
409 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
411 read_lock(&mrt_lock);
412 ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
413 ((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
414 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 read_unlock(&mrt_lock);
420 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
422 return (struct net_device_stats *)netdev_priv(dev);
425 static void reg_vif_setup(struct net_device *dev)
427 dev->type = ARPHRD_PIMREG;
428 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
429 dev->flags = IFF_NOARP;
430 dev->hard_start_xmit = reg_vif_xmit;
431 dev->get_stats = reg_vif_get_stats;
432 dev->destructor = free_netdev;
435 static struct net_device *ip6mr_reg_vif(void)
437 struct net_device *dev;
438 struct inet6_dev *in_dev;
440 dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
446 if (register_netdevice(dev)) {
452 in_dev = ipv6_find_idev(dev);
462 /* allow the register to be completed before unregistering. */
466 unregister_netdevice(dev);
475 static int mif6_delete(int vifi)
477 struct mif_device *v;
478 struct net_device *dev;
479 if (vifi < 0 || vifi >= maxvif)
480 return -EADDRNOTAVAIL;
482 v = &vif6_table[vifi];
484 write_lock_bh(&mrt_lock);
489 write_unlock_bh(&mrt_lock);
490 return -EADDRNOTAVAIL;
493 #ifdef CONFIG_IPV6_PIMSM_V2
494 if (vifi == reg_vif_num)
498 if (vifi + 1 == maxvif) {
500 for (tmp = vifi - 1; tmp >= 0; tmp--) {
507 write_unlock_bh(&mrt_lock);
509 dev_set_allmulti(dev, -1);
511 if (v->flags & MIFF_REGISTER)
512 unregister_netdevice(dev);
518 /* Destroy an unresolved cache entry, killing queued skbs
519 and reporting error to netlink readers.
522 static void ip6mr_destroy_unres(struct mfc6_cache *c)
526 atomic_dec(&cache_resolve_queue_len);
528 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
529 if (ipv6_hdr(skb)->version == 0) {
530 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
531 nlh->nlmsg_type = NLMSG_ERROR;
532 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
533 skb_trim(skb, nlh->nlmsg_len);
534 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
535 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
540 kmem_cache_free(mrt_cachep, c);
544 /* Single timer process for all the unresolved queue. */
546 static void ipmr_do_expire_process(unsigned long dummy)
548 unsigned long now = jiffies;
549 unsigned long expires = 10 * HZ;
550 struct mfc6_cache *c, **cp;
552 cp = &mfc_unres_queue;
554 while ((c = *cp) != NULL) {
555 if (time_after(c->mfc_un.unres.expires, now)) {
557 unsigned long interval = c->mfc_un.unres.expires - now;
558 if (interval < expires)
565 ip6mr_destroy_unres(c);
568 if (atomic_read(&cache_resolve_queue_len))
569 mod_timer(&ipmr_expire_timer, jiffies + expires);
572 static void ipmr_expire_process(unsigned long dummy)
574 if (!spin_trylock(&mfc_unres_lock)) {
575 mod_timer(&ipmr_expire_timer, jiffies + 1);
579 if (atomic_read(&cache_resolve_queue_len))
580 ipmr_do_expire_process(dummy);
582 spin_unlock(&mfc_unres_lock);
585 /* Fill oifs list. It is called under write locked mrt_lock. */
587 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
591 cache->mfc_un.res.minvif = MAXMIFS;
592 cache->mfc_un.res.maxvif = 0;
593 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
595 for (vifi = 0; vifi < maxvif; vifi++) {
596 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
597 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
598 if (cache->mfc_un.res.minvif > vifi)
599 cache->mfc_un.res.minvif = vifi;
600 if (cache->mfc_un.res.maxvif <= vifi)
601 cache->mfc_un.res.maxvif = vifi + 1;
606 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
608 int vifi = vifc->mif6c_mifi;
609 struct mif_device *v = &vif6_table[vifi];
610 struct net_device *dev;
613 if (MIF_EXISTS(vifi))
616 switch (vifc->mif6c_flags) {
617 #ifdef CONFIG_IPV6_PIMSM_V2
620 * Special Purpose VIF in PIM
621 * All the packets will be sent to the daemon
623 if (reg_vif_num >= 0)
625 dev = ip6mr_reg_vif();
631 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
633 return -EADDRNOTAVAIL;
640 dev_set_allmulti(dev, 1);
643 * Fill in the VIF structures
645 v->rate_limit = vifc->vifc_rate_limit;
646 v->flags = vifc->mif6c_flags;
648 v->flags |= VIFF_STATIC;
649 v->threshold = vifc->vifc_threshold;
654 v->link = dev->ifindex;
655 if (v->flags & MIFF_REGISTER)
656 v->link = dev->iflink;
658 /* And finish update writing critical data */
659 write_lock_bh(&mrt_lock);
662 #ifdef CONFIG_IPV6_PIMSM_V2
663 if (v->flags & MIFF_REGISTER)
666 if (vifi + 1 > maxvif)
668 write_unlock_bh(&mrt_lock);
672 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
674 int line = MFC6_HASH(mcastgrp, origin);
675 struct mfc6_cache *c;
677 for (c = mfc6_cache_array[line]; c; c = c->next) {
678 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
679 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
686 * Allocate a multicast cache entry
688 static struct mfc6_cache *ip6mr_cache_alloc(void)
690 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
693 memset(c, 0, sizeof(*c));
694 c->mfc_un.res.minvif = MAXMIFS;
698 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
700 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
703 memset(c, 0, sizeof(*c));
704 skb_queue_head_init(&c->mfc_un.unres.unresolved);
705 c->mfc_un.unres.expires = jiffies + 10 * HZ;
710 * A cache entry has gone into a resolved state from queued
713 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
718 * Play the pending entries through our router
721 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
722 if (ipv6_hdr(skb)->version == 0) {
724 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
726 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
727 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
729 nlh->nlmsg_type = NLMSG_ERROR;
730 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
731 skb_trim(skb, nlh->nlmsg_len);
732 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
734 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
736 ip6_mr_forward(skb, c);
741 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
742 * expects the following bizarre scheme.
744 * Called under mrt_lock.
747 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
753 #ifdef CONFIG_IPV6_PIMSM_V2
754 if (assert == MRT6MSG_WHOLEPKT)
755 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
759 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
764 /* I suppose that internal messages
765 * do not require checksums */
767 skb->ip_summed = CHECKSUM_UNNECESSARY;
769 #ifdef CONFIG_IPV6_PIMSM_V2
770 if (assert == MRT6MSG_WHOLEPKT) {
771 /* Ugly, but we have no choice with this interface.
772 Duplicate old header, fix length etc.
773 And all this only to mangle msg->im6_msgtype and
774 to set msg->im6_mbz to "mbz" :-)
776 skb_push(skb, -skb_network_offset(pkt));
778 skb_push(skb, sizeof(*msg));
779 skb_reset_transport_header(skb);
780 msg = (struct mrt6msg *)skb_transport_header(skb);
782 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
783 msg->im6_mif = reg_vif_num;
785 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
786 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
788 skb->ip_summed = CHECKSUM_UNNECESSARY;
796 skb_put(skb, sizeof(struct ipv6hdr));
797 skb_reset_network_header(skb);
798 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
803 skb_put(skb, sizeof(*msg));
804 skb_reset_transport_header(skb);
805 msg = (struct mrt6msg *)skb_transport_header(skb);
808 msg->im6_msgtype = assert;
811 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
812 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
814 skb->dst = dst_clone(pkt->dst);
815 skb->ip_summed = CHECKSUM_UNNECESSARY;
817 skb_pull(skb, sizeof(struct ipv6hdr));
820 if (mroute6_socket == NULL) {
826 * Deliver to user space multicast routing algorithms
828 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
830 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
838 * Queue a packet for resolution. It gets locked cache entry!
842 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
845 struct mfc6_cache *c;
847 spin_lock_bh(&mfc_unres_lock);
848 for (c = mfc_unres_queue; c; c = c->next) {
849 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
850 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
856 * Create a new entry if allowable
859 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
860 (c = ip6mr_cache_alloc_unres()) == NULL) {
861 spin_unlock_bh(&mfc_unres_lock);
868 * Fill in the new cache entry
871 c->mf6c_origin = ipv6_hdr(skb)->saddr;
872 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
875 * Reflect first query at pim6sd
877 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
878 /* If the report failed throw the cache entry
881 spin_unlock_bh(&mfc_unres_lock);
883 kmem_cache_free(mrt_cachep, c);
888 atomic_inc(&cache_resolve_queue_len);
889 c->next = mfc_unres_queue;
892 ipmr_do_expire_process(1);
896 * See if we can append the packet
898 if (c->mfc_un.unres.unresolved.qlen > 3) {
902 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
906 spin_unlock_bh(&mfc_unres_lock);
911 * MFC6 cache manipulation by user space
914 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
917 struct mfc6_cache *c, **cp;
919 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
921 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
922 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
923 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
924 write_lock_bh(&mrt_lock);
926 write_unlock_bh(&mrt_lock);
928 kmem_cache_free(mrt_cachep, c);
935 static int ip6mr_device_event(struct notifier_block *this,
936 unsigned long event, void *ptr)
938 struct net_device *dev = ptr;
939 struct mif_device *v;
942 if (dev_net(dev) != &init_net)
945 if (event != NETDEV_UNREGISTER)
949 for (ct = 0; ct < maxvif; ct++, v++) {
956 static struct notifier_block ip6_mr_notifier = {
957 .notifier_call = ip6mr_device_event
961 * Setup for IP multicast routing
964 void __init ip6_mr_init(void)
966 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
967 sizeof(struct mfc6_cache),
968 0, SLAB_HWCACHE_ALIGN,
971 panic("cannot allocate ip6_mrt_cache");
973 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
974 register_netdevice_notifier(&ip6_mr_notifier);
975 #ifdef CONFIG_PROC_FS
976 proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
977 proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
982 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
985 struct mfc6_cache *uc, *c, **cp;
986 unsigned char ttls[MAXMIFS];
989 memset(ttls, 255, MAXMIFS);
990 for (i = 0; i < MAXMIFS; i++) {
991 if (IF_ISSET(i, &mfc->mf6cc_ifset))
996 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
998 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
999 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1000 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1005 write_lock_bh(&mrt_lock);
1006 c->mf6c_parent = mfc->mf6cc_parent;
1007 ip6mr_update_thresholds(c, ttls);
1009 c->mfc_flags |= MFC_STATIC;
1010 write_unlock_bh(&mrt_lock);
1014 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1017 c = ip6mr_cache_alloc();
1021 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1022 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1023 c->mf6c_parent = mfc->mf6cc_parent;
1024 ip6mr_update_thresholds(c, ttls);
1026 c->mfc_flags |= MFC_STATIC;
1028 write_lock_bh(&mrt_lock);
1029 c->next = mfc6_cache_array[line];
1030 mfc6_cache_array[line] = c;
1031 write_unlock_bh(&mrt_lock);
1034 * Check to see if we resolved a queued list. If so we
1035 * need to send on the frames and tidy up.
1037 spin_lock_bh(&mfc_unres_lock);
1038 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1040 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1041 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1043 if (atomic_dec_and_test(&cache_resolve_queue_len))
1044 del_timer(&ipmr_expire_timer);
1048 spin_unlock_bh(&mfc_unres_lock);
1051 ip6mr_cache_resolve(uc, c);
1052 kmem_cache_free(mrt_cachep, uc);
1058 * Close the multicast socket, and clear the vif tables etc
1061 static void mroute_clean_tables(struct sock *sk)
1066 * Shut down all active vif entries
1068 for (i = 0; i < maxvif; i++) {
1069 if (!(vif6_table[i].flags & VIFF_STATIC))
1076 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1077 struct mfc6_cache *c, **cp;
1079 cp = &mfc6_cache_array[i];
1080 while ((c = *cp) != NULL) {
1081 if (c->mfc_flags & MFC_STATIC) {
1085 write_lock_bh(&mrt_lock);
1087 write_unlock_bh(&mrt_lock);
1089 kmem_cache_free(mrt_cachep, c);
1093 if (atomic_read(&cache_resolve_queue_len) != 0) {
1094 struct mfc6_cache *c;
1096 spin_lock_bh(&mfc_unres_lock);
1097 while (mfc_unres_queue != NULL) {
1098 c = mfc_unres_queue;
1099 mfc_unres_queue = c->next;
1100 spin_unlock_bh(&mfc_unres_lock);
1102 ip6mr_destroy_unres(c);
1104 spin_lock_bh(&mfc_unres_lock);
1106 spin_unlock_bh(&mfc_unres_lock);
1110 static int ip6mr_sk_init(struct sock *sk)
1115 write_lock_bh(&mrt_lock);
1116 if (likely(mroute6_socket == NULL))
1117 mroute6_socket = sk;
1120 write_unlock_bh(&mrt_lock);
1127 int ip6mr_sk_done(struct sock *sk)
1132 if (sk == mroute6_socket) {
1133 write_lock_bh(&mrt_lock);
1134 mroute6_socket = NULL;
1135 write_unlock_bh(&mrt_lock);
1137 mroute_clean_tables(sk);
1146 * Socket options and virtual interface manipulation. The whole
1147 * virtual interface system is a complete heap, but unfortunately
1148 * that's how BSD mrouted happens to think. Maybe one day with a proper
1149 * MOSPF/PIM router set up we can clean this up.
1152 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1159 if (optname != MRT6_INIT) {
1160 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1166 if (sk->sk_type != SOCK_RAW ||
1167 inet_sk(sk)->num != IPPROTO_ICMPV6)
1169 if (optlen < sizeof(int))
1172 return ip6mr_sk_init(sk);
1175 return ip6mr_sk_done(sk);
1178 if (optlen < sizeof(vif))
1180 if (copy_from_user(&vif, optval, sizeof(vif)))
1182 if (vif.mif6c_mifi >= MAXMIFS)
1185 ret = mif6_add(&vif, sk == mroute6_socket);
1190 if (optlen < sizeof(mifi_t))
1192 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1195 ret = mif6_delete(mifi);
1200 * Manipulate the forwarding caches. These live
1201 * in a sort of kernel/user symbiosis.
1205 if (optlen < sizeof(mfc))
1207 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1210 if (optname == MRT6_DEL_MFC)
1211 ret = ip6mr_mfc_delete(&mfc);
1213 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1218 * Control PIM assert (to activate pim will activate assert)
1223 if (get_user(v, (int __user *)optval))
1225 mroute_do_assert = !!v;
1229 #ifdef CONFIG_IPV6_PIMSM_V2
1233 if (get_user(v, (int __user *)optval))
1238 if (v != mroute_do_pim) {
1240 mroute_do_assert = v;
1242 ret = inet6_add_protocol(&pim6_protocol,
1245 ret = inet6_del_protocol(&pim6_protocol,
1256 * Spurious command, or MRT_VERSION which you cannot
1260 return -ENOPROTOOPT;
1265 * Getsock opt support for the multicast routing system.
1268 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1278 #ifdef CONFIG_IPV6_PIMSM_V2
1280 val = mroute_do_pim;
1284 val = mroute_do_assert;
1287 return -ENOPROTOOPT;
1290 if (get_user(olr, optlen))
1293 olr = min_t(int, olr, sizeof(int));
1297 if (put_user(olr, optlen))
1299 if (copy_to_user(optval, &val, olr))
1305 * The IP multicast ioctl support routines.
1308 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1310 struct sioc_sg_req6 sr;
1311 struct sioc_mif_req6 vr;
1312 struct mif_device *vif;
1313 struct mfc6_cache *c;
1316 case SIOCGETMIFCNT_IN6:
1317 if (copy_from_user(&vr, arg, sizeof(vr)))
1319 if (vr.mifi >= maxvif)
1321 read_lock(&mrt_lock);
1322 vif = &vif6_table[vr.mifi];
1323 if (MIF_EXISTS(vr.mifi)) {
1324 vr.icount = vif->pkt_in;
1325 vr.ocount = vif->pkt_out;
1326 vr.ibytes = vif->bytes_in;
1327 vr.obytes = vif->bytes_out;
1328 read_unlock(&mrt_lock);
1330 if (copy_to_user(arg, &vr, sizeof(vr)))
1334 read_unlock(&mrt_lock);
1335 return -EADDRNOTAVAIL;
1336 case SIOCGETSGCNT_IN6:
1337 if (copy_from_user(&sr, arg, sizeof(sr)))
1340 read_lock(&mrt_lock);
1341 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1343 sr.pktcnt = c->mfc_un.res.pkt;
1344 sr.bytecnt = c->mfc_un.res.bytes;
1345 sr.wrong_if = c->mfc_un.res.wrong_if;
1346 read_unlock(&mrt_lock);
1348 if (copy_to_user(arg, &sr, sizeof(sr)))
1352 read_unlock(&mrt_lock);
1353 return -EADDRNOTAVAIL;
1355 return -ENOIOCTLCMD;
1360 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1363 return dst_output(skb);
1367 * Processing handlers for ip6mr_forward
1370 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1372 struct ipv6hdr *ipv6h;
1373 struct mif_device *vif = &vif6_table[vifi];
1374 struct net_device *dev;
1375 struct dst_entry *dst;
1378 if (vif->dev == NULL)
1381 #ifdef CONFIG_IPV6_PIMSM_V2
1382 if (vif->flags & MIFF_REGISTER) {
1384 vif->bytes_out += skb->len;
1385 ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
1386 ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
1387 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1393 ipv6h = ipv6_hdr(skb);
1395 fl = (struct flowi) {
1398 { .daddr = ipv6h->daddr, }
1402 dst = ip6_route_output(&init_net, NULL, &fl);
1406 dst_release(skb->dst);
1410 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1411 * not only before forwarding, but after forwarding on all output
1412 * interfaces. It is clear, if mrouter runs a multicasting
1413 * program, it should receive packets not depending to what interface
1414 * program is joined.
1415 * If we will not make it, the program will have to join on all
1416 * interfaces. On the other hand, multihoming host (or router, but
1417 * not mrouter) cannot join to more than one interface - it will
1418 * result in receiving multiple packets.
1423 vif->bytes_out += skb->len;
1425 /* We are about to write */
1426 /* XXX: extension headers? */
1427 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1430 ipv6h = ipv6_hdr(skb);
1433 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1435 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1436 ip6mr_forward2_finish);
1443 static int ip6mr_find_vif(struct net_device *dev)
1446 for (ct = maxvif - 1; ct >= 0; ct--) {
1447 if (vif6_table[ct].dev == dev)
1453 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1458 vif = cache->mf6c_parent;
1459 cache->mfc_un.res.pkt++;
1460 cache->mfc_un.res.bytes += skb->len;
1463 * Wrong interface: drop packet and (maybe) send PIM assert.
1465 if (vif6_table[vif].dev != skb->dev) {
1468 cache->mfc_un.res.wrong_if++;
1469 true_vifi = ip6mr_find_vif(skb->dev);
1471 if (true_vifi >= 0 && mroute_do_assert &&
1472 /* pimsm uses asserts, when switching from RPT to SPT,
1473 so that we cannot check that packet arrived on an oif.
1474 It is bad, but otherwise we would need to move pretty
1475 large chunk of pimd to kernel. Ough... --ANK
1477 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1479 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1480 cache->mfc_un.res.last_assert = jiffies;
1481 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1486 vif6_table[vif].pkt_in++;
1487 vif6_table[vif].bytes_in += skb->len;
1492 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1493 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1495 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1497 ip6mr_forward2(skb2, cache, psend);
1503 ip6mr_forward2(skb, cache, psend);
1514 * Multicast packets for forwarding arrive here
1517 int ip6_mr_input(struct sk_buff *skb)
1519 struct mfc6_cache *cache;
1521 read_lock(&mrt_lock);
1522 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1525 * No usable cache entry
1527 if (cache == NULL) {
1530 vif = ip6mr_find_vif(skb->dev);
1532 int err = ip6mr_cache_unresolved(vif, skb);
1533 read_unlock(&mrt_lock);
1537 read_unlock(&mrt_lock);
1542 ip6_mr_forward(skb, cache);
1544 read_unlock(&mrt_lock);
1551 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1554 struct rtnexthop *nhp;
1555 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
1556 u8 *b = skb_tail_pointer(skb);
1557 struct rtattr *mp_head;
1560 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1562 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1564 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1565 if (c->mfc_un.res.ttls[ct] < 255) {
1566 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1567 goto rtattr_failure;
1568 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1569 nhp->rtnh_flags = 0;
1570 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1571 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1572 nhp->rtnh_len = sizeof(*nhp);
1575 mp_head->rta_type = RTA_MULTIPATH;
1576 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1577 rtm->rtm_type = RTN_MULTICAST;
1585 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1588 struct mfc6_cache *cache;
1589 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1591 read_lock(&mrt_lock);
1592 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1595 struct sk_buff *skb2;
1596 struct ipv6hdr *iph;
1597 struct net_device *dev;
1601 read_unlock(&mrt_lock);
1606 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1607 read_unlock(&mrt_lock);
1611 /* really correct? */
1612 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1614 read_unlock(&mrt_lock);
1618 skb_reset_transport_header(skb2);
1620 skb_put(skb2, sizeof(struct ipv6hdr));
1621 skb_reset_network_header(skb2);
1623 iph = ipv6_hdr(skb2);
1626 iph->flow_lbl[0] = 0;
1627 iph->flow_lbl[1] = 0;
1628 iph->flow_lbl[2] = 0;
1629 iph->payload_len = 0;
1630 iph->nexthdr = IPPROTO_NONE;
1632 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1633 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1635 err = ip6mr_cache_unresolved(vif, skb2);
1636 read_unlock(&mrt_lock);
1641 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1642 cache->mfc_flags |= MFC_NOTIFY;
1644 err = ip6mr_fill_mroute(skb, cache, rtm);
1645 read_unlock(&mrt_lock);