1 //==========================================================================
3 // sys/netinet/ip_output.c
7 //==========================================================================
8 //####BSDCOPYRIGHTBEGIN####
10 // -------------------------------------------
12 // Portions of this software may have been derived from OpenBSD or other sources,
13 // and are covered by the appropriate copyright disclaimers included herein.
15 // -------------------------------------------
17 //####BSDCOPYRIGHTEND####
18 //==========================================================================
19 //#####DESCRIPTIONBEGIN####
22 // Contributors: gthomas
28 //####DESCRIPTIONEND####
30 //==========================================================================
33 /* $OpenBSD: ip_output.c,v 1.57 1999/12/10 08:55:23 angelos Exp $ */
34 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */
37 * Copyright (c) 1982, 1986, 1988, 1990, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the University of
51 * California, Berkeley and its contributors.
52 * 4. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
71 #include <sys/param.h>
72 #include <sys/malloc.h>
74 #include <sys/errno.h>
75 #include <sys/protosw.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
79 #include <sys/systm.h>
81 #include <sys/kernel.h>
90 #include <net/route.h>
92 #include <netinet/in.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/ip.h>
95 #include <netinet/in_pcb.h>
96 #include <netinet/in_var.h>
97 #include <netinet/ip_var.h>
100 #include <machine/mtpr.h>
103 #include <machine/stdarg.h>
106 #include <netinet/ip_ah.h>
107 #include <netinet/ip_esp.h>
108 #include <netinet/udp.h>
109 #include <netinet/tcp.h>
110 #include <net/pfkeyv2.h>
113 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0)
119 #define offsetof(s, e) ((int)&((s *)0)->e)
122 extern u_int8_t get_sa_require __P((struct inpcb *));
126 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
127 static void ip_mloopback
128 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
129 #if defined(IPFILTER) || defined(IPFILTER_LKM)
130 int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
134 extern int ipsec_auth_default_level;
135 extern int ipsec_esp_trans_default_level;
136 extern int ipsec_esp_network_default_level;
138 extern int pfkeyv2_acquire(struct tdb *, int);
146 * IP output. The packet in mbuf chain m contains a skeletal IP
147 * header (with len, off, ttl, proto, tos, src, dst).
148 * The mbuf chain containing the packet will be freed.
149 * The mbuf opt, if present, will not be freed.
153 ip_output(struct mbuf *m0, ...)
155 ip_output(m0, va_alist)
160 register struct ip *ip, *mhip;
161 register struct ifnet *ifp;
162 register struct mbuf *m = m0;
163 register int hlen = sizeof (struct ip);
164 int len, off, error = 0;
165 struct route iproute;
166 struct sockaddr_in *dst;
167 struct in_ifaddr *ia;
171 struct ip_moptions *imo;
174 union sockaddr_union sunion;
180 struct route_enc re0, *re = &re0;
181 struct sockaddr_encap *ddst, *gw;
182 u_int8_t sa_require, sa_have = 0;
192 opt = va_arg(ap, struct mbuf *);
193 ro = va_arg(ap, struct route *);
194 flags = va_arg(ap, int);
195 imo = va_arg(ap, struct ip_moptions *);
197 inp = va_arg(ap, struct inpcb *);
202 if ((m->m_flags & M_PKTHDR) == 0)
203 panic("ip_output no HDR");
206 m = ip_insertoptions(m, opt, &len);
209 ip = mtod(m, struct ip *);
213 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
214 ip->ip_v = IPVERSION;
217 ip->ip_id = ip_randomid();
219 ip->ip_id = htons(ip_id++);
221 ip->ip_hl = hlen >> 2;
222 ipstat.ips_localout++;
224 hlen = ip->ip_hl << 2;
232 bzero((caddr_t)ro, sizeof (*ro));
234 dst = satosin(&ro->ro_dst);
236 * If there is a cached route,
237 * check that it is to the same destination
238 * and is still up. If not, free it and try again.
240 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
241 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
243 ro->ro_rt = (struct rtentry *)0;
245 if (ro->ro_rt == 0) {
246 dst->sin_family = AF_INET;
247 dst->sin_len = sizeof(*dst);
248 dst->sin_addr = ip->ip_dst;
251 * If routing to interface only,
252 * short circuit routing lookup.
254 if (flags & IP_ROUTETOIF) {
255 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
256 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
257 ipstat.ips_noroute++;
266 if (ro->ro_rt == 0) {
267 ipstat.ips_noroute++;
268 error = EHOSTUNREACH;
271 ia = ifatoia(ro->ro_rt->rt_ifa);
272 ifp = ro->ro_rt->rt_ifp;
274 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
275 dst = satosin(ro->ro_rt->rt_gateway);
277 if (IN_MULTICAST(ip->ip_dst.s_addr)) {
278 struct in_multi *inm;
280 m->m_flags |= M_MCAST;
282 * IP destination address is multicast. Make sure "dst"
283 * still points to the address in "ro". (It may have been
284 * changed to point to a gateway address, above.)
286 dst = satosin(&ro->ro_dst);
288 * See if the caller provided any multicast options
291 ip->ip_ttl = imo->imo_multicast_ttl;
292 if (imo->imo_multicast_ifp != NULL)
293 ifp = imo->imo_multicast_ifp;
295 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
297 * Confirm that the outgoing interface supports multicast.
299 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
300 ipstat.ips_noroute++;
305 * If source address not specified yet, use address
306 * of outgoing interface.
308 if (ip->ip_src.s_addr == INADDR_ANY) {
309 register struct in_ifaddr *ia;
311 for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next)
312 if (ia->ia_ifp == ifp) {
313 ip->ip_src = ia->ia_addr.sin_addr;
318 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
320 (imo == NULL || imo->imo_multicast_loop)) {
322 * If we belong to the destination multicast group
323 * on the outgoing interface, and the caller did not
324 * forbid loopback, loop back a copy.
326 ip_mloopback(ifp, m, dst);
331 * If we are acting as a multicast router, perform
332 * multicast forwarding as if the packet had just
333 * arrived on the interface to which we are about
334 * to send. The multicast forwarding function
335 * recursively calls this function, using the
336 * IP_FORWARDING flag to prevent infinite recursion.
338 * Multicasts that are looped back by ip_mloopback(),
339 * above, will be forwarded by the ip_input() routine,
342 extern struct socket *ip_mrouter;
344 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
345 if (ip_mforward(m, ifp) != 0) {
353 * Multicasts with a time-to-live of zero may be looped-
354 * back, above, but must not be transmitted on a network.
355 * Also, multicasts addressed to the loopback interface
356 * are not sent -- the above call to ip_mloopback() will
357 * loop back a copy if this host actually belongs to the
358 * destination group on the loopback interface.
360 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
369 * If source address not specified yet, use address
370 * of outgoing interface.
372 if (ip->ip_src.s_addr == INADDR_ANY)
373 ip->ip_src = ia->ia_addr.sin_addr;
376 * Look for broadcast address and
377 * and verify user is allowed to send
380 if (in_broadcast(dst->sin_addr, ifp)) {
381 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
382 error = EADDRNOTAVAIL;
385 if ((flags & IP_ALLOWBROADCAST) == 0) {
389 /* don't allow broadcast messages to be fragmented */
390 if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
394 m->m_flags |= M_BCAST;
396 m->m_flags &= ~M_BCAST;
401 * Check if the packet needs encapsulation.
403 if (!(flags & IP_ENCAPSULATED) &&
405 inp->inp_seclevel[SL_AUTH] != IPSEC_LEVEL_BYPASS ||
406 inp->inp_seclevel[SL_ESP_TRANS] != IPSEC_LEVEL_BYPASS ||
407 inp->inp_seclevel[SL_ESP_NETWORK] != IPSEC_LEVEL_BYPASS)) {
409 sa_require = get_sa_require(inp);
411 sa_require = inp->inp_secrequire;
413 bzero((caddr_t) re, sizeof(*re));
416 * splnet is chosen over spltdb because we are not allowed to
417 * lower the level, and udp_output calls us in splnet().
422 * Check if there was an outgoing SA bound to the flow
423 * from a transport protocol.
425 if (inp && inp->inp_tdb &&
426 (inp->inp_tdb->tdb_dst.sin.sin_addr.s_addr == INADDR_ANY ||
427 !bcmp(&inp->inp_tdb->tdb_dst.sin.sin_addr,
428 &ip->ip_dst, sizeof(ip->ip_dst)))) {
438 ddst = (struct sockaddr_encap *) &re->re_dst;
439 ddst->sen_family = PF_KEY;
440 ddst->sen_len = SENT_IP4_LEN;
441 ddst->sen_type = SENT_IP4;
442 ddst->sen_ip_src = ip->ip_src;
443 ddst->sen_ip_dst = ip->ip_dst;
444 ddst->sen_proto = ip->ip_p;
448 if (m->m_len < hlen + 2 * sizeof(u_int16_t)) {
449 if ((m = m_pullup(m, hlen + 2 *
450 sizeof(u_int16_t))) == 0)
452 ip = mtod(m, struct ip *);
454 udp = (struct udphdr *) (mtod(m, u_char *) + hlen);
455 ddst->sen_sport = ntohs(udp->uh_sport);
456 ddst->sen_dport = ntohs(udp->uh_dport);
460 if (m->m_len < hlen + 2 * sizeof(u_int16_t)) {
461 if ((m = m_pullup(m, hlen + 2 *
462 sizeof(u_int16_t))) == 0)
464 ip = mtod(m, struct ip *);
466 tcp = (struct tcphdr *) (mtod(m, u_char *) + hlen);
467 ddst->sen_sport = ntohs(tcp->th_sport);
468 ddst->sen_dport = ntohs(tcp->th_dport);
476 rtalloc((struct route *) re);
477 if (re->re_rt == NULL) {
482 gw = (struct sockaddr_encap *) (re->re_rt->rt_gateway);
485 if (gw == NULL || ((gw->sen_type != SENT_IPSP) &&
486 (gw->sen_type != SENT_IPSP6))) {
488 DPRINTF(("ip_output(): no gw or gw data not IPSP\n"));
492 error = EHOSTUNREACH;
498 * There might be a specific route, that tells us to avoid
499 * doing IPsec; this is useful for specific routes that we
500 * don't want to have IPsec applied on, like the key
504 if ((gw != NULL) && (gw->sen_ipsp_sproto == 0) &&
505 (gw->sen_ipsp_spi == 0)) {
506 if ((gw->sen_family == AF_INET) &&
507 (gw->sen_ipsp_dst.s_addr == 0)) {
513 if ((gw->sen_family == AF_INET6) &&
514 IN6_IS_ADDR_UNSPECIFIED(&gw->sen_ipsp6_dst)) {
522 * At this point we have an IPSP "gateway" (tunnel) spec.
523 * Use the destination of the tunnel and the SPI to
524 * look up the necessary Tunnel Control Block. Look it up,
525 * and then pass it, along with the packet and the gw,
526 * to the appropriate transformation.
528 bzero(&sunion, sizeof(sunion));
530 if (gw->sen_type == SENT_IPSP) {
531 sunion.sin.sin_family = AF_INET;
532 sunion.sin.sin_len = sizeof(struct sockaddr_in);
533 sunion.sin.sin_addr = gw->sen_ipsp_dst;
536 if (gw->sen_type == SENT_IPSP6) {
537 sunion.sin6.sin6_family = AF_INET6;
538 sunion.sin6.sin6_len = sizeof(struct sockaddr_in6);
539 sunion.sin6.sin6_addr = gw->sen_ipsp6_dst;
543 tdb = (struct tdb *) gettdb(gw->sen_ipsp_spi, &sunion,
544 gw->sen_ipsp_sproto);
547 * For VPNs a route with a reserved SPI is used to
548 * indicate the need for an SA when none is established.
550 if (((ntohl(gw->sen_ipsp_spi) == SPI_LOCAL_USE) &&
551 (gw->sen_type == SENT_IPSP)) ||
552 ((ntohl(gw->sen_ipsp6_spi) == SPI_LOCAL_USE) &&
553 (gw->sen_type == SENT_IPSP6))) {
556 * XXX We should construct a TDB from system
557 * default (which should be tunable via sysctl).
558 * For now, drop packet and ignore SPD entry.
564 if (tdb->tdb_authalgxform)
565 sa_require = NOTIFY_SATYPE_AUTH;
566 if (tdb->tdb_encalgxform)
567 sa_require |= NOTIFY_SATYPE_CONF;
568 if (tdb->tdb_flags & TDBF_TUNNELING)
569 sa_require |= NOTIFY_SATYPE_TUNNEL;
572 /* PF_KEYv2 notification message */
573 if (tdb && tdb->tdb_satype != SADB_X_SATYPE_BYPASS)
574 if ((error = pfkeyv2_acquire(tdb, 0)) != 0)
580 * When sa_require is set, the packet will be dropped
588 ip->ip_len = htons((u_short) ip->ip_len);
589 ip->ip_off = htons((u_short) ip->ip_off);
593 * Now we check if this tdb has all the transforms which
594 * are requried by the socket or our default policy.
596 SPI_CHAIN_ATTRIB(sa_have, tdb_onext, tdb);
598 if (sa_require & ~sa_have)
603 if (gw->sen_type == SENT_IPSP)
604 DPRINTF(("ip_output(): non-existant TDB for SA %s/%08x/%u\n", inet_ntoa4(gw->sen_ipsp_dst), ntohl(gw->sen_ipsp_spi), gw->sen_ipsp_sproto));
607 DPRINTF(("ip_output(): non-existant TDB for SA %s/%08x/%u\n", inet6_ntoa4(gw->sen_ipsp6_dst), ntohl(gw->sen_ipsp6_spi), gw->sen_ipsp6_sproto));
612 error = EHOSTUNREACH;
617 for (t = tdb; t != NULL; t = t->tdb_onext)
618 if ((t->tdb_sproto == IPPROTO_ESP && !esp_enable) ||
619 (t->tdb_sproto == IPPROTO_AH && !ah_enable)) {
620 DPRINTF(("ip_output(): IPSec outbound packet dropped due to policy\n"));
624 error = EHOSTUNREACH;
629 while (tdb && tdb->tdb_xform) {
630 /* Check if the SPI is invalid */
631 if (tdb->tdb_flags & TDBF_INVALID) {
633 DPRINTF(("ip_output(): attempt to use invalid SA %s/%08x/%u\n", ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi), tdb->tdb_sproto));
642 if (tdb->tdb_dst.sa.sa_family != AF_INET) {
644 DPRINTF(("ip_output(): attempt to use SA %s/%08x/%u for protocol family %d\n", ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi), tdb->tdb_sproto, tdb->tdb_dst.sa.sa_family));
652 /* Register first use, setup expiration timer */
653 if (tdb->tdb_first_use == 0) {
654 tdb->tdb_first_use = time.tv_sec;
655 tdb_expiration(tdb, TDBEXP_TIMEOUT);
658 /* Check for tunneling */
659 if (((tdb->tdb_dst.sa.sa_family == AF_INET) &&
660 (tdb->tdb_dst.sin.sin_addr.s_addr !=
662 (tdb->tdb_dst.sin.sin_addr.s_addr !=
663 ip->ip_dst.s_addr)) ||
664 (tdb->tdb_dst.sa.sa_family == AF_INET6) ||
665 ((tdb->tdb_flags & TDBF_TUNNELING) &&
666 (tdb->tdb_xform->xf_type != XF_IP4))) {
667 /* Fix length and checksum */
668 ip->ip_len = htons(m->m_pkthdr.len);
669 ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
670 error = ipe4_output(m, tdb, &mp,
672 offsetof(struct ip, ip_p));
681 if (tdb->tdb_dst.sa.sa_family == AF_INET)
684 if (tdb->tdb_dst.sa.sa_family == AF_INET6)
691 if ((tdb->tdb_xform->xf_type == XF_IP4) &&
692 (tdb->tdb_dst.sa.sa_family == AF_INET)) {
693 ip = mtod(m, struct ip *);
694 ip->ip_len = htons(m->m_pkthdr.len);
695 ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
699 if ((tdb->tdb_xform->xf_type == XF_IP4) &&
700 (tdb->tdb_dst.sa.sa_family == AF_INET6)) {
701 ip6 = mtod(m, struct ip6_hdr *);
702 ip6->ip6_plen = htons(m->m_pkthdr.len);
708 * This assumes that there is only just an IPv6
712 error = (*(tdb->tdb_xform->xf_output))(m, tdb, &mp, sizeof(struct ip6_hdr), offsetof(struct ip6_hdr, ip6_nxt));
716 error = (*(tdb->tdb_xform->xf_output))(m, tdb, &mp, ip->ip_hl << 2, offsetof(struct ip, ip_p));
717 if (!error && mp == NULL)
732 ip = mtod(m, struct ip *);
733 ip->ip_len = htons(m->m_pkthdr.len);
738 ip6 = mtod(m, struct ip6_hdr *);
739 ip6->ip6_plen = htons(m->m_pkthdr.len);
742 tdb = tdb->tdb_onext;
747 ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
750 * At this point, m is pointing to an mbuf chain with the
751 * processed packet. Call ourselves recursively, but
752 * bypass the encap code.
758 ip = mtod(m, struct ip *);
762 return ip_output(m, NULL, NULL,
763 IP_ENCAPSULATED | IP_RAWOUTPUT,
769 ip6 = mtod(m, struct ip6_hdr *);
770 NTOHS(ip6->ip6_plen);
772 /* Naturally, ip6_output() has to honor those two flags */
773 return ip6_output(m, NULL, NULL,
774 IP_ENCAPSULATED | IP_RAWOUTPUT,
780 /* This is for possible future use, don't move or delete */
783 /* No IPSec processing though it was required, drop packet */
785 error = EHOSTUNREACH;
792 #if defined(IPFILTER) || defined(IPFILTER_LKM)
794 * looks like most checking has been done now...do a filter check
798 if (fr_checkp && (*fr_checkp)(ip, hlen, ifp, 1, &m0)) {
799 error = EHOSTUNREACH;
802 ip = mtod(m = m0, struct ip *);
806 * If small enough for interface, can just send directly.
808 if ((u_int16_t)ip->ip_len <= ifp->if_mtu) {
809 ip->ip_len = htons((u_int16_t)ip->ip_len);
810 ip->ip_off = htons((u_int16_t)ip->ip_off);
812 ip->ip_sum = in_cksum(m, hlen);
813 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
818 * Too large for interface; fragment if possible.
819 * Must be able to put at least 8 bytes per fragment.
823 * If IPsec packet is too big for the interface, try fragment it.
824 * XXX This really is a quickhack. May be inappropriate.
825 * XXX fails if somebody is sending AH'ed packet, with:
826 * sizeof(packet without AH) < mtu < sizeof(packet with AH)
828 if (sab && ip->ip_p != IPPROTO_AH && (flags & IP_FORWARDING) == 0)
829 ip->ip_off &= ~IP_DF;
831 if (ip->ip_off & IP_DF) {
833 ipstat.ips_cantfrag++;
836 len = (ifp->if_mtu - hlen) &~ 7;
843 int mhlen, firstlen = len;
844 struct mbuf **mnext = &m->m_nextpkt;
847 * Loop through length of segment after first fragment,
848 * make new header and copy data of each part and link onto chain.
851 mhlen = sizeof (struct ip);
852 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
853 MGETHDR(m, M_DONTWAIT, MT_HEADER);
856 ipstat.ips_odropped++;
860 mnext = &m->m_nextpkt;
861 m->m_data += max_linkhdr;
862 mhip = mtod(m, struct ip *);
864 if (hlen > sizeof (struct ip)) {
865 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
866 mhip->ip_hl = mhlen >> 2;
869 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
870 if (ip->ip_off & IP_MF)
871 mhip->ip_off |= IP_MF;
872 if (off + len >= (u_int16_t)ip->ip_len)
873 len = (u_int16_t)ip->ip_len - off;
875 mhip->ip_off |= IP_MF;
876 mhip->ip_len = htons((u_int16_t)(len + mhlen));
877 m->m_next = m_copy(m0, off, len);
878 if (m->m_next == 0) {
879 error = ENOBUFS; /* ??? */
880 ipstat.ips_odropped++;
883 m->m_pkthdr.len = mhlen + len;
884 m->m_pkthdr.rcvif = (struct ifnet *)0;
885 mhip->ip_off = htons((u_int16_t)mhip->ip_off);
887 mhip->ip_sum = in_cksum(m, mhlen);
888 ipstat.ips_ofragments++;
891 * Update first fragment by trimming what's been copied out
892 * and updating header, then send each fragment (in order).
895 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
896 m->m_pkthdr.len = hlen + firstlen;
897 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
898 ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF));
900 ip->ip_sum = in_cksum(m, hlen);
902 for (m = m0; m; m = m0) {
906 error = (*ifp->if_output)(ifp, m, sintosa(dst),
913 ipstat.ips_fragmented++;
916 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
925 * Insert IP options into preformed packet.
926 * Adjust IP destination as required for IP source routing,
927 * as indicated by a non-zero in_addr at the start of the options.
930 ip_insertoptions(m, opt, phlen)
931 register struct mbuf *m;
935 register struct ipoption *p = mtod(opt, struct ipoption *);
937 register struct ip *ip = mtod(m, struct ip *);
940 optlen = opt->m_len - sizeof(p->ipopt_dst);
941 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
942 return (m); /* XXX should fail */
943 if (p->ipopt_dst.s_addr)
944 ip->ip_dst = p->ipopt_dst;
945 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
946 MGETHDR(n, M_DONTWAIT, MT_HEADER);
949 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
950 m->m_len -= sizeof(struct ip);
951 m->m_data += sizeof(struct ip);
954 m->m_len = optlen + sizeof(struct ip);
955 m->m_data += max_linkhdr;
956 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
960 m->m_pkthdr.len += optlen;
961 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
963 ip = mtod(m, struct ip *);
964 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
965 *phlen = sizeof(struct ip) + optlen;
966 ip->ip_len += optlen;
971 * Copy options from ip to jp,
972 * omitting those not copied during fragmentation.
978 register u_char *cp, *dp;
979 int opt, optlen, cnt;
981 cp = (u_char *)(ip + 1);
982 dp = (u_char *)(jp + 1);
983 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
984 for (; cnt > 0; cnt -= optlen, cp += optlen) {
986 if (opt == IPOPT_EOL)
988 if (opt == IPOPT_NOP) {
989 /* Preserve for IP mcast tunnel's LSRR alignment. */
994 optlen = cp[IPOPT_OLEN];
995 /* bogus lengths should have been caught by ip_dooptions */
998 if (IPOPT_COPIED(opt)) {
999 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
1003 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1009 * IP socket option processing.
1012 ip_ctloutput(op, so, level, optname, mp)
1018 register struct inpcb *inp = sotoinpcb(so);
1019 register struct mbuf *m = *mp;
1020 register int optval = 0;
1022 struct proc *p = curproc; /* XXX */
1024 struct tdb_ident *tdbip, tdbi;
1029 if (level != IPPROTO_IP) {
1031 if (op == PRCO_SETOPT && *mp)
1033 } else switch (op) {
1040 return (ip_pcbopts(optname, &inp->inp_options, m));
1042 return (ip_pcbopts(&inp->inp_options, m));
1048 case IP_RECVRETOPTS:
1049 case IP_RECVDSTADDR:
1050 if (m == NULL || m->m_len != sizeof(int))
1053 optval = *mtod(m, int *);
1057 inp->inp_ip.ip_tos = optval;
1061 inp->inp_ip.ip_ttl = optval;
1063 #define OPTSET(bit) \
1065 inp->inp_flags |= bit; \
1067 inp->inp_flags &= ~bit;
1070 OPTSET(INP_RECVOPTS);
1073 case IP_RECVRETOPTS:
1074 OPTSET(INP_RECVRETOPTS);
1077 case IP_RECVDSTADDR:
1078 OPTSET(INP_RECVDSTADDR);
1085 case IP_MULTICAST_IF:
1086 case IP_MULTICAST_TTL:
1087 case IP_MULTICAST_LOOP:
1088 case IP_ADD_MEMBERSHIP:
1089 case IP_DROP_MEMBERSHIP:
1090 error = ip_setmoptions(optname, &inp->inp_moptions, m);
1094 if (m == 0 || m->m_len != sizeof(int))
1097 optval = *mtod(m, int *);
1101 case IP_PORTRANGE_DEFAULT:
1102 inp->inp_flags &= ~(INP_LOWPORT);
1103 inp->inp_flags &= ~(INP_HIGHPORT);
1106 case IP_PORTRANGE_HIGH:
1107 inp->inp_flags &= ~(INP_LOWPORT);
1108 inp->inp_flags |= INP_HIGHPORT;
1111 case IP_PORTRANGE_LOW:
1112 inp->inp_flags &= ~(INP_HIGHPORT);
1113 inp->inp_flags |= INP_LOWPORT;
1128 if (m == 0 || m->m_len != sizeof(struct tdb_ident)) {
1131 tdbip = mtod(m, struct tdb_ident *);
1132 tdb = gettdb(tdbip->spi, &tdbip->dst,
1137 tdb_add_inp(tdb, inp);
1144 case IP_ESP_TRANS_LEVEL:
1145 case IP_ESP_NETWORK_LEVEL:
1149 if (m == 0 || m->m_len != sizeof(int)) {
1153 optval = *mtod(m, u_char *);
1155 if (optval < IPSEC_LEVEL_BYPASS ||
1156 optval > IPSEC_LEVEL_UNIQUE) {
1163 if (optval < ipsec_auth_default_level &&
1164 suser(p->p_ucred, &p->p_acflag)) {
1168 inp->inp_seclevel[SL_AUTH] = optval;
1171 case IP_ESP_TRANS_LEVEL:
1172 if (optval < ipsec_esp_trans_default_level &&
1173 suser(p->p_ucred, &p->p_acflag)) {
1177 inp->inp_seclevel[SL_ESP_TRANS] = optval;
1180 case IP_ESP_NETWORK_LEVEL:
1181 if (optval < ipsec_esp_network_default_level &&
1182 suser(p->p_ucred, &p->p_acflag)) {
1186 inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1190 inp->inp_secrequire = get_sa_require(inp);
1195 error = ENOPROTOOPT;
1206 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1207 if (inp->inp_options) {
1208 m->m_len = inp->inp_options->m_len;
1209 bcopy(mtod(inp->inp_options, caddr_t),
1210 mtod(m, caddr_t), (unsigned)m->m_len);
1218 case IP_RECVRETOPTS:
1219 case IP_RECVDSTADDR:
1220 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1221 m->m_len = sizeof(int);
1225 optval = inp->inp_ip.ip_tos;
1229 optval = inp->inp_ip.ip_ttl;
1232 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1235 optval = OPTBIT(INP_RECVOPTS);
1238 case IP_RECVRETOPTS:
1239 optval = OPTBIT(INP_RECVRETOPTS);
1242 case IP_RECVDSTADDR:
1243 optval = OPTBIT(INP_RECVDSTADDR);
1246 *mtod(m, int *) = optval;
1249 case IP_MULTICAST_IF:
1250 case IP_MULTICAST_TTL:
1251 case IP_MULTICAST_LOOP:
1252 case IP_ADD_MEMBERSHIP:
1253 case IP_DROP_MEMBERSHIP:
1254 error = ip_getmoptions(optname, inp->inp_moptions, mp);
1258 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1259 m->m_len = sizeof(int);
1261 if (inp->inp_flags & INP_HIGHPORT)
1262 optval = IP_PORTRANGE_HIGH;
1263 else if (inp->inp_flags & INP_LOWPORT)
1264 optval = IP_PORTRANGE_LOW;
1268 *mtod(m, int *) = optval;
1276 if (inp->inp_tdb == NULL) {
1279 tdbi.spi = inp->inp_tdb->tdb_spi;
1280 tdbi.dst = inp->inp_tdb->tdb_dst;
1281 tdbi.proto = inp->inp_tdb->tdb_sproto;
1282 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1283 m->m_len = sizeof(tdbi);
1284 bcopy((caddr_t)&tdbi, mtod(m, caddr_t),
1285 (unsigned)m->m_len);
1292 case IP_ESP_TRANS_LEVEL:
1293 case IP_ESP_NETWORK_LEVEL:
1295 *mtod(m, int *) = IPSEC_LEVEL_NONE;
1299 optval = inp->inp_seclevel[SL_AUTH];
1302 case IP_ESP_TRANS_LEVEL:
1303 optval = inp->inp_seclevel[SL_ESP_TRANS];
1306 case IP_ESP_NETWORK_LEVEL:
1307 optval = inp->inp_seclevel[SL_ESP_NETWORK];
1310 *mtod(m, int *) = optval;
1314 error = ENOPROTOOPT;
1323 * Set up IP options in pcb for insertion in output packets.
1324 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1325 * with destination address if source routed.
1329 ip_pcbopts(optname, pcbopt, m)
1332 ip_pcbopts(pcbopt, m)
1334 struct mbuf **pcbopt;
1335 register struct mbuf *m;
1337 register int cnt, optlen;
1338 register u_char *cp;
1341 /* turn off any old options */
1343 (void)m_free(*pcbopt);
1345 if (m == (struct mbuf *)0 || m->m_len == 0) {
1347 * Only turning off any previous options.
1355 if (m->m_len % sizeof(int32_t))
1359 * IP first-hop destination address will be stored before
1360 * actual options; move other options back
1361 * and clear it when none present.
1363 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1366 m->m_len += sizeof(struct in_addr);
1367 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1368 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1369 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1371 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1372 opt = cp[IPOPT_OPTVAL];
1373 if (opt == IPOPT_EOL)
1375 if (opt == IPOPT_NOP)
1378 optlen = cp[IPOPT_OLEN];
1379 if (optlen <= IPOPT_OLEN || optlen > cnt)
1390 * user process specifies route as:
1392 * D must be our final destination (but we can't
1393 * check that since we may not have connected yet).
1394 * A is first hop destination, which doesn't appear in
1395 * actual IP option, but is stored before the options.
1397 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1399 m->m_len -= sizeof(struct in_addr);
1400 cnt -= sizeof(struct in_addr);
1401 optlen -= sizeof(struct in_addr);
1402 cp[IPOPT_OLEN] = optlen;
1404 * Move first hop before start of options.
1406 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1407 sizeof(struct in_addr));
1409 * Then copy rest of options back
1410 * to close up the deleted entry.
1412 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1413 sizeof(struct in_addr)),
1414 (caddr_t)&cp[IPOPT_OFFSET+1],
1415 (unsigned)cnt + sizeof(struct in_addr));
1419 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1430 * Set the IP multicast options in response to user setsockopt().
1433 ip_setmoptions(optname, imop, m)
1435 struct ip_moptions **imop;
1438 register int error = 0;
1441 struct in_addr addr;
1442 register struct ip_mreq *mreq;
1443 register struct ifnet *ifp;
1444 register struct ip_moptions *imo = *imop;
1446 register struct sockaddr_in *dst;
1450 * No multicast option buffer attached to the pcb;
1451 * allocate one and initialize to default values.
1453 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1459 imo->imo_multicast_ifp = NULL;
1460 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1461 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1462 imo->imo_num_memberships = 0;
1467 case IP_MULTICAST_IF:
1469 * Select the interface for outgoing multicast packets.
1471 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1475 addr = *(mtod(m, struct in_addr *));
1477 * INADDR_ANY is used to remove a previous selection.
1478 * When no interface is selected, a default one is
1479 * chosen every time a multicast packet is sent.
1481 if (addr.s_addr == INADDR_ANY) {
1482 imo->imo_multicast_ifp = NULL;
1486 * The selected interface is identified by its local
1487 * IP address. Find the interface and confirm that
1488 * it supports multicasting.
1490 INADDR_TO_IFP(addr, ifp);
1491 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1492 error = EADDRNOTAVAIL;
1495 imo->imo_multicast_ifp = ifp;
1498 case IP_MULTICAST_TTL:
1500 * Set the IP time-to-live for outgoing multicast packets.
1502 if (m == NULL || m->m_len != 1) {
1506 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1509 case IP_MULTICAST_LOOP:
1511 * Set the loopback flag for outgoing multicast packets.
1512 * Must be zero or one.
1514 if (m == NULL || m->m_len != 1 ||
1515 (loop = *(mtod(m, u_char *))) > 1) {
1519 imo->imo_multicast_loop = loop;
1522 case IP_ADD_MEMBERSHIP:
1524 * Add a multicast group membership.
1525 * Group must be a valid IP multicast address.
1527 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1531 mreq = mtod(m, struct ip_mreq *);
1532 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1537 * If no interface address was provided, use the interface of
1538 * the route to the given multicast address.
1540 if (mreq->imr_interface.s_addr == INADDR_ANY) {
1542 dst = satosin(&ro.ro_dst);
1543 dst->sin_len = sizeof(*dst);
1544 dst->sin_family = AF_INET;
1545 dst->sin_addr = mreq->imr_multiaddr;
1547 if (ro.ro_rt == NULL) {
1548 error = EADDRNOTAVAIL;
1551 ifp = ro.ro_rt->rt_ifp;
1554 INADDR_TO_IFP(mreq->imr_interface, ifp);
1557 * See if we found an interface, and confirm that it
1558 * supports multicast.
1560 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1561 error = EADDRNOTAVAIL;
1565 * See if the membership already exists or if all the
1566 * membership slots are full.
1568 for (i = 0; i < imo->imo_num_memberships; ++i) {
1569 if (imo->imo_membership[i]->inm_ifp == ifp &&
1570 imo->imo_membership[i]->inm_addr.s_addr
1571 == mreq->imr_multiaddr.s_addr)
1574 if (i < imo->imo_num_memberships) {
1578 if (i == IP_MAX_MEMBERSHIPS) {
1579 error = ETOOMANYREFS;
1583 * Everything looks good; add a new record to the multicast
1584 * address list for the given interface.
1586 if ((imo->imo_membership[i] =
1587 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1591 ++imo->imo_num_memberships;
1594 case IP_DROP_MEMBERSHIP:
1596 * Drop a multicast group membership.
1597 * Group must be a valid IP multicast address.
1599 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1603 mreq = mtod(m, struct ip_mreq *);
1604 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1609 * If an interface address was specified, get a pointer
1610 * to its ifnet structure.
1612 if (mreq->imr_interface.s_addr == INADDR_ANY)
1615 INADDR_TO_IFP(mreq->imr_interface, ifp);
1617 error = EADDRNOTAVAIL;
1622 * Find the membership in the membership array.
1624 for (i = 0; i < imo->imo_num_memberships; ++i) {
1626 imo->imo_membership[i]->inm_ifp == ifp) &&
1627 imo->imo_membership[i]->inm_addr.s_addr ==
1628 mreq->imr_multiaddr.s_addr)
1631 if (i == imo->imo_num_memberships) {
1632 error = EADDRNOTAVAIL;
1636 * Give up the multicast address record to which the
1637 * membership points.
1639 in_delmulti(imo->imo_membership[i]);
1641 * Remove the gap in the membership array.
1643 for (++i; i < imo->imo_num_memberships; ++i)
1644 imo->imo_membership[i-1] = imo->imo_membership[i];
1645 --imo->imo_num_memberships;
1654 * If all options have default values, no need to keep the mbuf.
1656 if (imo->imo_multicast_ifp == NULL &&
1657 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1658 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1659 imo->imo_num_memberships == 0) {
1660 free(*imop, M_IPMOPTS);
1668 * Return the IP multicast options in response to user getsockopt().
1671 ip_getmoptions(optname, imo, mp)
1673 register struct ip_moptions *imo;
1674 register struct mbuf **mp;
1678 struct in_addr *addr;
1679 struct in_ifaddr *ia;
1681 *mp = m_get(M_WAIT, MT_SOOPTS);
1685 case IP_MULTICAST_IF:
1686 addr = mtod(*mp, struct in_addr *);
1687 (*mp)->m_len = sizeof(struct in_addr);
1688 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1689 addr->s_addr = INADDR_ANY;
1691 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1692 addr->s_addr = (ia == NULL) ? INADDR_ANY
1693 : ia->ia_addr.sin_addr.s_addr;
1697 case IP_MULTICAST_TTL:
1698 ttl = mtod(*mp, u_char *);
1700 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1701 : imo->imo_multicast_ttl;
1704 case IP_MULTICAST_LOOP:
1705 loop = mtod(*mp, u_char *);
1707 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1708 : imo->imo_multicast_loop;
1712 return (EOPNOTSUPP);
1717 * Discard the IP multicast options.
1720 ip_freemoptions(imo)
1721 register struct ip_moptions *imo;
1726 for (i = 0; i < imo->imo_num_memberships; ++i)
1727 in_delmulti(imo->imo_membership[i]);
1728 free(imo, M_IPMOPTS);
1733 * Routine called from ip_output() to loop back a copy of an IP multicast
1734 * packet to the input queue of a specified interface. Note that this
1735 * calls the output routine of the loopback "driver", but with an interface
1736 * pointer that might NOT be &loif -- easier than replicating that code here.
1739 ip_mloopback(ifp, m, dst)
1741 register struct mbuf *m;
1742 register struct sockaddr_in *dst;
1744 register struct ip *ip;
1747 copym = m_copy(m, 0, M_COPYALL);
1748 if (copym != NULL) {
1750 * We don't bother to fragment if the IP length is greater
1751 * than the interface's MTU. Can this possibly matter?
1753 ip = mtod(copym, struct ip *);
1754 ip->ip_len = htons((u_int16_t)ip->ip_len);
1755 ip->ip_off = htons((u_int16_t)ip->ip_off);
1757 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1758 (void) looutput(ifp, copym, sintosa(dst), NULL);