2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on net/ipv4/icmp.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 * Andi Kleen : exception handling
22 * Andi Kleen add rate limits. never reply to a icmp.
23 * add more length checks and other fixes.
24 * yoshfuji : ensure to sent parameter problem for
26 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
28 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/types.h>
35 #include <linux/socket.h>
37 #include <linux/kernel.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/skbuff.h>
41 #include <linux/init.h>
42 #include <linux/netfilter.h>
45 #include <linux/sysctl.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/icmpv6.h>
56 #include <net/ip6_checksum.h>
57 #include <net/protocol.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
65 #include <net/inet_common.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
71 * The ICMP socket(s). This is the most convenient way to flow control
72 * our ICMP output as well as maintain a clean interface throughout
73 * all layers. All Socketless IP sends will soon be gone.
75 * On SMP we have one ICMP socket per-cpu.
77 static inline struct sock *icmpv6_sk(struct net *net)
79 return net->ipv6.icmp_sk[smp_processor_id()];
82 static int icmpv6_rcv(struct sk_buff *skb);
84 static const struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv,
86 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
89 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
96 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
97 /* This can happen if the output path (f.e. SIT or
98 * ip6ip6 tunnel) signals dst_link_failure() for an
99 * outgoing ICMP6 packet.
107 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
109 spin_unlock_bh(&sk->sk_lock.slock);
113 * Slightly more convenient version of icmpv6_send.
115 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
117 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
122 * Figure out, may we reply to this packet with icmp error.
124 * We do not reply, if:
125 * - it was icmp error message.
126 * - it is truncated, so that it is known, that protocol is ICMPV6
127 * (i.e. in the middle of some exthdr)
132 static int is_ineligible(struct sk_buff *skb)
134 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
135 int len = skb->len - ptr;
136 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
144 if (nexthdr == IPPROTO_ICMPV6) {
146 tp = skb_header_pointer(skb,
147 ptr+offsetof(struct icmp6hdr, icmp6_type),
148 sizeof(_type), &_type);
150 !(*tp & ICMPV6_INFOMSG_MASK))
157 * Check the ICMP output rate limit
159 static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
162 struct dst_entry *dst;
163 struct net *net = sock_net(sk);
166 /* Informational messages are not limited. */
167 if (type & ICMPV6_INFOMSG_MASK)
170 /* Do not limit pmtu discovery, it would break it. */
171 if (type == ICMPV6_PKT_TOOBIG)
175 * Look up the output route.
176 * XXX: perhaps the expire for routing entries cloned by
177 * this lookup should be more aggressive (not longer than timeout).
179 dst = ip6_route_output(net, sk, fl);
181 IP6_INC_STATS(net, ip6_dst_idev(dst),
182 IPSTATS_MIB_OUTNOROUTES);
183 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 int tmo = net->ipv6.sysctl.icmpv6_time;
189 /* Give more bandwidth to wider prefixes. */
190 if (rt->rt6i_dst.plen < 128)
191 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 res = xrlim_allow(dst, tmo);
200 * an inline helper for the "simple" if statement below
201 * checks if parameter problem report is caused by an
202 * unrecognized IPv6 option that has the Option Type
203 * highest-order two bits set to 10
206 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
210 offset += skb_network_offset(skb);
211 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
214 return (*op & 0xC0) == 0x80;
217 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
220 struct icmp6hdr *icmp6h;
223 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
226 icmp6h = icmp6_hdr(skb);
227 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
228 icmp6h->icmp6_cksum = 0;
230 if (skb_queue_len(&sk->sk_write_queue) == 1) {
231 skb->csum = csum_partial(icmp6h,
232 sizeof(struct icmp6hdr), skb->csum);
233 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
240 skb_queue_walk(&sk->sk_write_queue, skb) {
241 tmp_csum = csum_add(tmp_csum, skb->csum);
244 tmp_csum = csum_partial(icmp6h,
245 sizeof(struct icmp6hdr), tmp_csum);
246 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
251 ip6_push_pending_frames(sk);
262 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
264 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
265 struct sk_buff *org_skb = msg->skb;
268 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
270 skb->csum = csum_block_add(skb->csum, csum, odd);
271 if (!(msg->type & ICMPV6_INFOMSG_MASK))
272 nf_ct_attach(skb, org_skb);
276 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
277 static void mip6_addr_swap(struct sk_buff *skb)
279 struct ipv6hdr *iph = ipv6_hdr(skb);
280 struct inet6_skb_parm *opt = IP6CB(skb);
281 struct ipv6_destopt_hao *hao;
286 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
287 if (likely(off >= 0)) {
288 hao = (struct ipv6_destopt_hao *)
289 (skb_network_header(skb) + off);
290 ipv6_addr_copy(&tmp, &iph->saddr);
291 ipv6_addr_copy(&iph->saddr, &hao->addr);
292 ipv6_addr_copy(&hao->addr, &tmp);
297 static inline void mip6_addr_swap(struct sk_buff *skb) {}
301 * Send an ICMP message in response to a packet in error
303 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
305 struct net *net = dev_net(skb->dev);
306 struct inet6_dev *idev = NULL;
307 struct ipv6hdr *hdr = ipv6_hdr(skb);
309 struct ipv6_pinfo *np;
310 struct in6_addr *saddr = NULL;
311 struct dst_entry *dst;
312 struct dst_entry *dst2;
313 struct icmp6hdr tmp_hdr;
316 struct icmpv6_msg msg;
323 if ((u8 *)hdr < skb->head ||
324 (skb->network_header + sizeof(*hdr)) > skb->tail)
328 * Make sure we respect the rules
329 * i.e. RFC 1885 2.4(e)
330 * Rule (e.1) is enforced by not using icmpv6_send
331 * in any code that processes icmp errors.
333 addr_type = ipv6_addr_type(&hdr->daddr);
335 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
342 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
343 if (type != ICMPV6_PKT_TOOBIG &&
344 !(type == ICMPV6_PARAMPROB &&
345 code == ICMPV6_UNK_OPTION &&
346 (opt_unrec(skb, info))))
352 addr_type = ipv6_addr_type(&hdr->saddr);
358 if (addr_type & IPV6_ADDR_LINKLOCAL)
359 iif = skb->dev->ifindex;
362 * Must not send error if the source does not uniquely
363 * identify a single node (RFC2463 Section 2.4).
364 * We check unspecified / multicast addresses here,
365 * and anycast addresses will be checked later.
367 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
368 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
373 * Never answer to a ICMP packet.
375 if (is_ineligible(skb)) {
376 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
382 memset(&fl, 0, sizeof(fl));
383 fl.proto = IPPROTO_ICMPV6;
384 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
386 ipv6_addr_copy(&fl.fl6_src, saddr);
388 fl.fl_icmp_type = type;
389 fl.fl_icmp_code = code;
390 security_skb_classify_flow(skb, &fl);
392 sk = icmpv6_xmit_lock(net);
397 if (!icmpv6_xrlim_allow(sk, type, &fl))
400 tmp_hdr.icmp6_type = type;
401 tmp_hdr.icmp6_code = code;
402 tmp_hdr.icmp6_cksum = 0;
403 tmp_hdr.icmp6_pointer = htonl(info);
405 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
406 fl.oif = np->mcast_oif;
408 err = ip6_dst_lookup(sk, &dst, &fl);
413 * We won't send icmp if the destination is known
416 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
417 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
418 goto out_dst_release;
421 /* No need to clone since we're just using its address. */
424 err = xfrm_lookup(net, &dst, &fl, sk, 0);
437 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
438 goto relookup_failed;
440 if (ip6_dst_lookup(sk, &dst2, &fl2))
441 goto relookup_failed;
443 err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
450 goto out_dst_release;
459 if (ipv6_addr_is_multicast(&fl.fl6_dst))
460 hlimit = np->mcast_hops;
462 hlimit = np->hop_limit;
464 hlimit = ip6_dst_hoplimit(dst);
467 msg.offset = skb_network_offset(skb);
470 len = skb->len - msg.offset;
471 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
473 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
474 goto out_dst_release;
477 idev = in6_dev_get(skb->dev);
479 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
480 len + sizeof(struct icmp6hdr),
481 sizeof(struct icmp6hdr), hlimit,
482 np->tclass, NULL, &fl, (struct rt6_info*)dst,
485 ip6_flush_pending_frames(sk);
488 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
491 if (likely(idev != NULL))
496 icmpv6_xmit_unlock(sk);
499 EXPORT_SYMBOL(icmpv6_send);
501 static void icmpv6_echo_reply(struct sk_buff *skb)
503 struct net *net = dev_net(skb->dev);
505 struct inet6_dev *idev;
506 struct ipv6_pinfo *np;
507 struct in6_addr *saddr = NULL;
508 struct icmp6hdr *icmph = icmp6_hdr(skb);
509 struct icmp6hdr tmp_hdr;
511 struct icmpv6_msg msg;
512 struct dst_entry *dst;
516 saddr = &ipv6_hdr(skb)->daddr;
518 if (!ipv6_unicast_destination(skb))
521 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
522 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
524 memset(&fl, 0, sizeof(fl));
525 fl.proto = IPPROTO_ICMPV6;
526 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
528 ipv6_addr_copy(&fl.fl6_src, saddr);
529 fl.oif = skb->dev->ifindex;
530 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
531 security_skb_classify_flow(skb, &fl);
533 sk = icmpv6_xmit_lock(net);
538 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
539 fl.oif = np->mcast_oif;
541 err = ip6_dst_lookup(sk, &dst, &fl);
544 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0)
547 if (ipv6_addr_is_multicast(&fl.fl6_dst))
548 hlimit = np->mcast_hops;
550 hlimit = np->hop_limit;
552 hlimit = ip6_dst_hoplimit(dst);
554 idev = in6_dev_get(skb->dev);
558 msg.type = ICMPV6_ECHO_REPLY;
560 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
561 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
562 (struct rt6_info*)dst, MSG_DONTWAIT);
565 ip6_flush_pending_frames(sk);
568 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
571 if (likely(idev != NULL))
575 icmpv6_xmit_unlock(sk);
578 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
580 const struct inet6_protocol *ipprot;
585 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
588 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
589 if (ipv6_ext_hdr(nexthdr)) {
590 /* now skip over extension headers */
591 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
595 inner_offset = sizeof(struct ipv6hdr);
598 /* Checkin header including 8 bytes of inner protocol header. */
599 if (!pskb_may_pull(skb, inner_offset+8))
602 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
603 Without this we will not able f.e. to make source routed
605 Corresponding argument (opt) to notifiers is already added.
609 hash = nexthdr & (MAX_INET_PROTOS - 1);
612 ipprot = rcu_dereference(inet6_protos[hash]);
613 if (ipprot && ipprot->err_handler)
614 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
617 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
621 * Handle icmp messages
624 static int icmpv6_rcv(struct sk_buff *skb)
626 struct net_device *dev = skb->dev;
627 struct inet6_dev *idev = __in6_dev_get(dev);
628 struct in6_addr *saddr, *daddr;
629 struct ipv6hdr *orig_hdr;
630 struct icmp6hdr *hdr;
633 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
634 struct sec_path *sp = skb_sec_path(skb);
637 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
641 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
644 nh = skb_network_offset(skb);
645 skb_set_network_header(skb, sizeof(*hdr));
647 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
650 skb_set_network_header(skb, nh);
653 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
655 saddr = &ipv6_hdr(skb)->saddr;
656 daddr = &ipv6_hdr(skb)->daddr;
658 /* Perform checksum. */
659 switch (skb->ip_summed) {
660 case CHECKSUM_COMPLETE:
661 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
666 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
668 if (__skb_checksum_complete(skb)) {
669 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
675 if (!pskb_pull(skb, sizeof(*hdr)))
678 hdr = icmp6_hdr(skb);
680 type = hdr->icmp6_type;
682 ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
685 case ICMPV6_ECHO_REQUEST:
686 icmpv6_echo_reply(skb);
689 case ICMPV6_ECHO_REPLY:
690 /* we couldn't care less */
693 case ICMPV6_PKT_TOOBIG:
694 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
695 standard destination cache. Seems, only "advanced"
696 destination cache will allow to solve this problem
699 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
701 hdr = icmp6_hdr(skb);
702 orig_hdr = (struct ipv6hdr *) (hdr + 1);
703 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
704 ntohl(hdr->icmp6_mtu));
707 * Drop through to notify
710 case ICMPV6_DEST_UNREACH:
711 case ICMPV6_TIME_EXCEED:
712 case ICMPV6_PARAMPROB:
713 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
716 case NDISC_ROUTER_SOLICITATION:
717 case NDISC_ROUTER_ADVERTISEMENT:
718 case NDISC_NEIGHBOUR_SOLICITATION:
719 case NDISC_NEIGHBOUR_ADVERTISEMENT:
724 case ICMPV6_MGM_QUERY:
725 igmp6_event_query(skb);
728 case ICMPV6_MGM_REPORT:
729 igmp6_event_report(skb);
732 case ICMPV6_MGM_REDUCTION:
733 case ICMPV6_NI_QUERY:
734 case ICMPV6_NI_REPLY:
735 case ICMPV6_MLD2_REPORT:
736 case ICMPV6_DHAAD_REQUEST:
737 case ICMPV6_DHAAD_REPLY:
738 case ICMPV6_MOBILE_PREFIX_SOL:
739 case ICMPV6_MOBILE_PREFIX_ADV:
743 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
746 if (type & ICMPV6_INFOMSG_MASK)
750 * error of unknown type.
751 * must pass to upper level
754 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
761 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
767 void icmpv6_flow_init(struct sock *sk, struct flowi *fl,
769 const struct in6_addr *saddr,
770 const struct in6_addr *daddr,
773 memset(fl, 0, sizeof(*fl));
774 ipv6_addr_copy(&fl->fl6_src, saddr);
775 ipv6_addr_copy(&fl->fl6_dst, daddr);
776 fl->proto = IPPROTO_ICMPV6;
777 fl->fl_icmp_type = type;
778 fl->fl_icmp_code = 0;
780 security_sk_classify_flow(sk, fl);
784 * Special lock-class for __icmpv6_sk:
786 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
788 static int __net_init icmpv6_sk_init(struct net *net)
794 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
795 if (net->ipv6.icmp_sk == NULL)
798 for_each_possible_cpu(i) {
799 err = inet_ctl_sock_create(&sk, PF_INET6,
800 SOCK_RAW, IPPROTO_ICMPV6, net);
803 "Failed to initialize the ICMP6 control socket "
809 net->ipv6.icmp_sk[i] = sk;
812 * Split off their lock-class, because sk->sk_dst_lock
813 * gets used from softirqs, which is safe for
814 * __icmpv6_sk (because those never get directly used
815 * via userspace syscalls), but unsafe for normal sockets.
817 lockdep_set_class(&sk->sk_dst_lock,
818 &icmpv6_socket_sk_dst_lock_key);
820 /* Enough space for 2 64K ICMP packets, including
821 * sk_buff struct overhead.
824 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
829 for (j = 0; j < i; j++)
830 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
831 kfree(net->ipv6.icmp_sk);
835 static void __net_exit icmpv6_sk_exit(struct net *net)
839 for_each_possible_cpu(i) {
840 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
842 kfree(net->ipv6.icmp_sk);
845 static struct pernet_operations icmpv6_sk_ops = {
846 .init = icmpv6_sk_init,
847 .exit = icmpv6_sk_exit,
850 int __init icmpv6_init(void)
854 err = register_pernet_subsys(&icmpv6_sk_ops);
859 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
864 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
865 unregister_pernet_subsys(&icmpv6_sk_ops);
869 void icmpv6_cleanup(void)
871 unregister_pernet_subsys(&icmpv6_sk_ops);
872 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
876 static const struct icmp6_err {
884 { /* ADM_PROHIBITED */
888 { /* Was NOT_NEIGHBOUR, now reserved */
902 int icmpv6_err_convert(u8 type, u8 code, int *err)
909 case ICMPV6_DEST_UNREACH:
911 if (code <= ICMPV6_PORT_UNREACH) {
912 *err = tab_unreach[code].err;
913 fatal = tab_unreach[code].fatal;
917 case ICMPV6_PKT_TOOBIG:
921 case ICMPV6_PARAMPROB:
926 case ICMPV6_TIME_EXCEED:
934 EXPORT_SYMBOL(icmpv6_err_convert);
937 ctl_table ipv6_icmp_table_template[] = {
939 .procname = "ratelimit",
940 .data = &init_net.ipv6.sysctl.icmpv6_time,
941 .maxlen = sizeof(int),
943 .proc_handler = proc_dointvec_ms_jiffies,
948 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
950 struct ctl_table *table;
952 table = kmemdup(ipv6_icmp_table_template,
953 sizeof(ipv6_icmp_table_template),
957 table[0].data = &net->ipv6.sysctl.icmpv6_time;