]> git.karo-electronics.de Git - karo-tx-redboot.git/blob - packages/net/tcpip/v2_0/src/sys/netinet/ip_output.c
Initial revision
[karo-tx-redboot.git] / packages / net / tcpip / v2_0 / src / sys / netinet / ip_output.c
1 //==========================================================================
2 //
3 //      sys/netinet/ip_output.c
4 //
5 //     
6 //
7 //==========================================================================
8 //####BSDCOPYRIGHTBEGIN####
9 //
10 // -------------------------------------------
11 //
12 // Portions of this software may have been derived from OpenBSD or other sources,
13 // and are covered by the appropriate copyright disclaimers included herein.
14 //
15 // -------------------------------------------
16 //
17 //####BSDCOPYRIGHTEND####
18 //==========================================================================
19 //#####DESCRIPTIONBEGIN####
20 //
21 // Author(s):    gthomas
22 // Contributors: gthomas
23 // Date:         2000-01-10
24 // Purpose:      
25 // Description:  
26 //              
27 //
28 //####DESCRIPTIONEND####
29 //
30 //==========================================================================
31
32
33 /*      $OpenBSD: ip_output.c,v 1.57 1999/12/10 08:55:23 angelos Exp $  */
34 /*      $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $  */
35
36 /*
37  * Copyright (c) 1982, 1986, 1988, 1990, 1993
38  *      The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by the University of
51  *      California, Berkeley and its contributors.
52  * 4. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  *
68  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
69  */
70
71 #include <sys/param.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/errno.h>
75 #include <sys/protosw.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #ifndef __ECOS
79 #include <sys/systm.h>
80 #endif
81 #include <sys/kernel.h>
82 #ifndef __ECOS
83 #include <sys/proc.h>
84
85 #include <vm/vm.h>
86 #include <sys/proc.h>
87 #endif
88
89 #include <net/if.h>
90 #include <net/route.h>
91
92 #include <netinet/in.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/ip.h>
95 #include <netinet/in_pcb.h>
96 #include <netinet/in_var.h>
97 #include <netinet/ip_var.h>
98
99 #ifdef vax
100 #include <machine/mtpr.h>
101 #endif
102
103 #include <machine/stdarg.h>
104
105 #ifdef IPSEC
106 #include <netinet/ip_ah.h>
107 #include <netinet/ip_esp.h>
108 #include <netinet/udp.h>
109 #include <netinet/tcp.h>
110 #include <net/pfkeyv2.h>
111
112 #ifdef ENCDEBUG
113 #define DPRINTF(x)    do { if (encdebug) printf x ; } while (0)
114 #else
115 #define DPRINTF(x)
116 #endif
117
118 #ifndef offsetof
119 #define offsetof(s, e) ((int)&((s *)0)->e)
120 #endif
121
122 extern u_int8_t get_sa_require  __P((struct inpcb *));
123
124 #endif /* IPSEC */
125
126 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
127 static void ip_mloopback
128         __P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
129 #if defined(IPFILTER) || defined(IPFILTER_LKM)
130 int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
131 #endif
132
133 #ifdef IPSEC
134 extern int ipsec_auth_default_level;
135 extern int ipsec_esp_trans_default_level;
136 extern int ipsec_esp_network_default_level;
137
138 extern int pfkeyv2_acquire(struct tdb *, int);
139 #endif
140
141 #ifndef RAMDOM_IP_ID
142 u_short ip_id;
143 #endif
144
145 /*
146  * IP output.  The packet in mbuf chain m contains a skeletal IP
147  * header (with len, off, ttl, proto, tos, src, dst).
148  * The mbuf chain containing the packet will be freed.
149  * The mbuf opt, if present, will not be freed.
150  */
151 int
152 #if __STDC__
153 ip_output(struct mbuf *m0, ...)
154 #else
155 ip_output(m0, va_alist)
156         struct mbuf *m0;
157         va_dcl
158 #endif
159 {
160         register struct ip *ip, *mhip;
161         register struct ifnet *ifp;
162         register struct mbuf *m = m0;
163         register int hlen = sizeof (struct ip);
164         int len, off, error = 0;
165         struct route iproute;
166         struct sockaddr_in *dst;
167         struct in_ifaddr *ia;
168         struct mbuf *opt;
169         struct route *ro;
170         int flags;
171         struct ip_moptions *imo;
172         va_list ap;
173 #ifdef IPSEC
174         union sockaddr_union sunion;
175         struct mbuf *mp;
176         struct udphdr *udp;
177         struct tcphdr *tcp;
178         struct inpcb *inp;
179
180         struct route_enc re0, *re = &re0;
181         struct sockaddr_encap *ddst, *gw;
182         u_int8_t sa_require, sa_have = 0;
183         struct tdb *tdb, *t;
184         int s, ip6flag;
185
186 #ifdef INET6
187         struct ip6_hdr *ip6;
188 #endif /* INET6 */
189 #endif /* IPSEC */
190
191         va_start(ap, m0);
192         opt = va_arg(ap, struct mbuf *);
193         ro = va_arg(ap, struct route *);
194         flags = va_arg(ap, int);
195         imo = va_arg(ap, struct ip_moptions *);
196 #ifdef IPSEC
197         inp = va_arg(ap, struct inpcb *);
198 #endif /* IPSEC */
199         va_end(ap);
200
201 #ifdef  DIAGNOSTIC
202         if ((m->m_flags & M_PKTHDR) == 0)
203                 panic("ip_output no HDR");
204 #endif
205         if (opt) {
206                 m = ip_insertoptions(m, opt, &len);
207                 hlen = len;
208         }
209         ip = mtod(m, struct ip *);
210         /*
211          * Fill in IP header.
212          */
213         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
214                 ip->ip_v = IPVERSION;
215                 ip->ip_off &= IP_DF;
216 #ifdef RANDOM_IP_ID
217                 ip->ip_id = ip_randomid();
218 #else
219                 ip->ip_id = htons(ip_id++);
220 #endif
221                 ip->ip_hl = hlen >> 2;
222                 ipstat.ips_localout++;
223         } else {
224                 hlen = ip->ip_hl << 2;
225         }
226
227         /*
228          * Route packet.
229          */
230         if (ro == 0) {
231                 ro = &iproute;
232                 bzero((caddr_t)ro, sizeof (*ro));
233         }
234         dst = satosin(&ro->ro_dst);
235         /*
236          * If there is a cached route,
237          * check that it is to the same destination
238          * and is still up.  If not, free it and try again.
239          */
240         if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
241             dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
242                 RTFREE(ro->ro_rt);
243                 ro->ro_rt = (struct rtentry *)0;
244         }
245         if (ro->ro_rt == 0) {
246                 dst->sin_family = AF_INET;
247                 dst->sin_len = sizeof(*dst);
248                 dst->sin_addr = ip->ip_dst;
249         }
250         /*
251          * If routing to interface only,
252          * short circuit routing lookup.
253          */
254         if (flags & IP_ROUTETOIF) {
255                 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
256                     (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
257                         ipstat.ips_noroute++;
258                         error = ENETUNREACH;
259                         goto bad;
260                 }
261                 ifp = ia->ia_ifp;
262                 ip->ip_ttl = 1;
263         } else {
264                 if (ro->ro_rt == 0)
265                         rtalloc(ro);
266                 if (ro->ro_rt == 0) {
267                         ipstat.ips_noroute++;
268                         error = EHOSTUNREACH;
269                         goto bad;
270                 }
271                 ia = ifatoia(ro->ro_rt->rt_ifa);
272                 ifp = ro->ro_rt->rt_ifp;
273                 ro->ro_rt->rt_use++;
274                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
275                         dst = satosin(ro->ro_rt->rt_gateway);
276         }
277         if (IN_MULTICAST(ip->ip_dst.s_addr)) {
278                 struct in_multi *inm;
279
280                 m->m_flags |= M_MCAST;
281                 /*
282                  * IP destination address is multicast.  Make sure "dst"
283                  * still points to the address in "ro".  (It may have been
284                  * changed to point to a gateway address, above.)
285                  */
286                 dst = satosin(&ro->ro_dst);
287                 /*
288                  * See if the caller provided any multicast options
289                  */
290                 if (imo != NULL) {
291                         ip->ip_ttl = imo->imo_multicast_ttl;
292                         if (imo->imo_multicast_ifp != NULL)
293                                 ifp = imo->imo_multicast_ifp;
294                 } else
295                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
296                 /*
297                  * Confirm that the outgoing interface supports multicast.
298                  */
299                 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
300                         ipstat.ips_noroute++;
301                         error = ENETUNREACH;
302                         goto bad;
303                 }
304                 /*
305                  * If source address not specified yet, use address
306                  * of outgoing interface.
307                  */
308                 if (ip->ip_src.s_addr == INADDR_ANY) {
309                         register struct in_ifaddr *ia;
310
311                         for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next)
312                                 if (ia->ia_ifp == ifp) {
313                                         ip->ip_src = ia->ia_addr.sin_addr;
314                                         break;
315                                 }
316                 }
317
318                 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
319                 if (inm != NULL &&
320                    (imo == NULL || imo->imo_multicast_loop)) {
321                         /*
322                          * If we belong to the destination multicast group
323                          * on the outgoing interface, and the caller did not
324                          * forbid loopback, loop back a copy.
325                          */
326                         ip_mloopback(ifp, m, dst);
327                 }
328 #ifdef MROUTING
329                 else {
330                         /*
331                          * If we are acting as a multicast router, perform
332                          * multicast forwarding as if the packet had just
333                          * arrived on the interface to which we are about
334                          * to send.  The multicast forwarding function
335                          * recursively calls this function, using the
336                          * IP_FORWARDING flag to prevent infinite recursion.
337                          *
338                          * Multicasts that are looped back by ip_mloopback(),
339                          * above, will be forwarded by the ip_input() routine,
340                          * if necessary.
341                          */
342                         extern struct socket *ip_mrouter;
343
344                         if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
345                                 if (ip_mforward(m, ifp) != 0) {
346                                         m_freem(m);
347                                         goto done;
348                                 }
349                         }
350                 }
351 #endif
352                 /*
353                  * Multicasts with a time-to-live of zero may be looped-
354                  * back, above, but must not be transmitted on a network.
355                  * Also, multicasts addressed to the loopback interface
356                  * are not sent -- the above call to ip_mloopback() will
357                  * loop back a copy if this host actually belongs to the
358                  * destination group on the loopback interface.
359                  */
360                 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
361                         m_freem(m);
362                         goto done;
363                 }
364
365                 goto sendit;
366         }
367 #ifndef notdef
368         /*
369          * If source address not specified yet, use address
370          * of outgoing interface.
371          */
372         if (ip->ip_src.s_addr == INADDR_ANY)
373                 ip->ip_src = ia->ia_addr.sin_addr;
374 #endif
375         /*
376          * Look for broadcast address and
377          * and verify user is allowed to send
378          * such a packet.
379          */
380         if (in_broadcast(dst->sin_addr, ifp)) {
381                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
382                         error = EADDRNOTAVAIL;
383                         goto bad;
384                 }
385                 if ((flags & IP_ALLOWBROADCAST) == 0) {
386                         error = EACCES;
387                         goto bad;
388                 }
389                 /* don't allow broadcast messages to be fragmented */
390                 if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
391                         error = EMSGSIZE;
392                         goto bad;
393                 }
394                 m->m_flags |= M_BCAST;
395         } else
396                 m->m_flags &= ~M_BCAST;
397
398 sendit:
399 #ifdef IPSEC
400         /*
401          * Check if the packet needs encapsulation.
402          */
403         if (!(flags & IP_ENCAPSULATED) &&
404             (inp == NULL || 
405              inp->inp_seclevel[SL_AUTH] != IPSEC_LEVEL_BYPASS ||
406              inp->inp_seclevel[SL_ESP_TRANS] != IPSEC_LEVEL_BYPASS ||
407              inp->inp_seclevel[SL_ESP_NETWORK] != IPSEC_LEVEL_BYPASS)) {
408                 if (inp == NULL)
409                         sa_require = get_sa_require(inp);
410                 else
411                         sa_require = inp->inp_secrequire;
412
413                 bzero((caddr_t) re, sizeof(*re));
414
415                 /*
416                  * splnet is chosen over spltdb because we are not allowed to
417                  * lower the level, and udp_output calls us in splnet().
418                  */
419                 s = splnet();
420
421                 /*
422                  * Check if there was an outgoing SA bound to the flow
423                  * from a transport protocol.
424                  */
425                 if (inp && inp->inp_tdb &&
426                     (inp->inp_tdb->tdb_dst.sin.sin_addr.s_addr == INADDR_ANY ||
427                      !bcmp(&inp->inp_tdb->tdb_dst.sin.sin_addr,
428                            &ip->ip_dst, sizeof(ip->ip_dst)))) {
429                         tdb = inp->inp_tdb;
430                         goto have_tdb;
431                 }
432
433                 if (!ipsec_in_use) {
434                         splx(s);
435                         goto no_encap;
436                 }
437
438                 ddst = (struct sockaddr_encap *) &re->re_dst;
439                 ddst->sen_family = PF_KEY;
440                 ddst->sen_len = SENT_IP4_LEN;
441                 ddst->sen_type = SENT_IP4;
442                 ddst->sen_ip_src = ip->ip_src;
443                 ddst->sen_ip_dst = ip->ip_dst;
444                 ddst->sen_proto = ip->ip_p;
445
446                 switch (ip->ip_p) {
447                 case IPPROTO_UDP:
448                         if (m->m_len < hlen + 2 * sizeof(u_int16_t)) {
449                                 if ((m = m_pullup(m, hlen + 2 *
450                                     sizeof(u_int16_t))) == 0)
451                                         return ENOBUFS;
452                                 ip = mtod(m, struct ip *);
453                         }
454                         udp = (struct udphdr *) (mtod(m, u_char *) + hlen);
455                         ddst->sen_sport = ntohs(udp->uh_sport);
456                         ddst->sen_dport = ntohs(udp->uh_dport);
457                         break;
458
459                 case IPPROTO_TCP:
460                         if (m->m_len < hlen + 2 * sizeof(u_int16_t)) {
461                                 if ((m = m_pullup(m, hlen + 2 *
462                                     sizeof(u_int16_t))) == 0)
463                                         return ENOBUFS;
464                                 ip = mtod(m, struct ip *);
465                         }
466                         tcp = (struct tcphdr *) (mtod(m, u_char *) + hlen);
467                         ddst->sen_sport = ntohs(tcp->th_sport);
468                         ddst->sen_dport = ntohs(tcp->th_dport);
469                         break;
470
471                 default:
472                         ddst->sen_sport = 0;
473                         ddst->sen_dport = 0;
474                 }
475
476                 rtalloc((struct route *) re);
477                 if (re->re_rt == NULL) {
478                         splx(s);
479                         goto no_encap;
480                 }
481
482                 gw = (struct sockaddr_encap *) (re->re_rt->rt_gateway);
483
484                 /* Sanity check */
485                 if (gw == NULL || ((gw->sen_type != SENT_IPSP) &&
486                                    (gw->sen_type != SENT_IPSP6))) {
487                         splx(s);
488                         DPRINTF(("ip_output(): no gw or gw data not IPSP\n"));
489
490                         if (re->re_rt)
491                                 RTFREE(re->re_rt);
492                         error = EHOSTUNREACH;
493                         m_freem(m);
494                         goto done;
495                 }
496
497                 /*
498                  * There might be a specific route, that tells us to avoid
499                  * doing IPsec; this is useful for specific routes that we
500                  * don't want to have IPsec applied on, like the key
501                  * management ports.
502                  */
503
504                 if ((gw != NULL) && (gw->sen_ipsp_sproto == 0) &&
505                     (gw->sen_ipsp_spi == 0)) {
506                     if ((gw->sen_family == AF_INET) &&
507                         (gw->sen_ipsp_dst.s_addr == 0)) {
508                         splx(s);
509                         goto no_encap;
510                     }
511
512 #ifdef INET6
513                     if ((gw->sen_family == AF_INET6) &&
514                         IN6_IS_ADDR_UNSPECIFIED(&gw->sen_ipsp6_dst)) {
515                         splx(s);
516                         goto no_encap;
517                     }
518 #endif /* INET6 */
519                 }
520
521                 /*
522                  * At this point we have an IPSP "gateway" (tunnel) spec.
523                  * Use the destination of the tunnel and the SPI to
524                  * look up the necessary Tunnel Control Block. Look it up,
525                  * and then pass it, along with the packet and the gw,
526                  * to the appropriate transformation.
527                  */
528                 bzero(&sunion, sizeof(sunion));
529
530                 if (gw->sen_type == SENT_IPSP) {
531                     sunion.sin.sin_family = AF_INET;
532                     sunion.sin.sin_len = sizeof(struct sockaddr_in);
533                     sunion.sin.sin_addr = gw->sen_ipsp_dst;
534                 }
535 #ifdef INET6
536                 if (gw->sen_type == SENT_IPSP6) {
537                     sunion.sin6.sin6_family = AF_INET6;
538                     sunion.sin6.sin6_len = sizeof(struct sockaddr_in6);
539                     sunion.sin6.sin6_addr = gw->sen_ipsp6_dst;
540                 }
541 #endif /* INET6 */
542
543                 tdb = (struct tdb *) gettdb(gw->sen_ipsp_spi, &sunion,
544                                             gw->sen_ipsp_sproto);
545
546                 /* 
547                  * For VPNs a route with a reserved SPI is used to
548                  * indicate the need for an SA when none is established.
549                  */
550                 if (((ntohl(gw->sen_ipsp_spi) == SPI_LOCAL_USE) &&
551                      (gw->sen_type == SENT_IPSP)) ||
552                     ((ntohl(gw->sen_ipsp6_spi) == SPI_LOCAL_USE) &&
553                      (gw->sen_type == SENT_IPSP6))) {
554                     if (tdb == NULL) {
555                         /*
556                          * XXX We should construct a TDB from system
557                          * default (which should be tunable via sysctl).
558                          * For now, drop packet and ignore SPD entry.
559                          */
560                         splx(s);
561                         goto no_encap;
562                     }
563                     else {
564                         if (tdb->tdb_authalgxform)
565                           sa_require = NOTIFY_SATYPE_AUTH;
566                         if (tdb->tdb_encalgxform)
567                           sa_require |= NOTIFY_SATYPE_CONF;
568                         if (tdb->tdb_flags & TDBF_TUNNELING)
569                           sa_require |= NOTIFY_SATYPE_TUNNEL;
570                     }
571
572                     /* PF_KEYv2 notification message */
573                     if (tdb && tdb->tdb_satype != SADB_X_SATYPE_BYPASS)
574                             if ((error = pfkeyv2_acquire(tdb, 0)) != 0)
575                                     return error;
576
577                     splx(s);
578
579                     /* 
580                      * When sa_require is set, the packet will be dropped
581                      * at no_encap.
582                      */
583                     goto no_encap;
584                 }
585
586              have_tdb:
587
588                 ip->ip_len = htons((u_short) ip->ip_len);
589                 ip->ip_off = htons((u_short) ip->ip_off);
590                 ip->ip_sum = 0;
591
592                 /*
593                  * Now we check if this tdb has all the transforms which
594                  * are requried by the socket or our default policy.
595                  */
596                 SPI_CHAIN_ATTRIB(sa_have, tdb_onext, tdb);
597
598                 if (sa_require & ~sa_have)
599                         goto no_encap;
600
601                 if (tdb == NULL) {
602                         splx(s);
603                         if (gw->sen_type == SENT_IPSP)
604                           DPRINTF(("ip_output(): non-existant TDB for SA %s/%08x/%u\n", inet_ntoa4(gw->sen_ipsp_dst), ntohl(gw->sen_ipsp_spi), gw->sen_ipsp_sproto));
605 #ifdef INET6
606                         else
607                           DPRINTF(("ip_output(): non-existant TDB for SA %s/%08x/%u\n", inet6_ntoa4(gw->sen_ipsp6_dst), ntohl(gw->sen_ipsp6_spi), gw->sen_ipsp6_sproto));
608 #endif /* INET6 */        
609
610                         if (re->re_rt)
611                                 RTFREE(re->re_rt);
612                         error = EHOSTUNREACH;
613                         m_freem(m);
614                         goto done;
615                 }
616
617                 for (t = tdb; t != NULL; t = t->tdb_onext)
618                     if ((t->tdb_sproto == IPPROTO_ESP && !esp_enable) ||
619                         (t->tdb_sproto == IPPROTO_AH && !ah_enable)) {
620                         DPRINTF(("ip_output(): IPSec outbound packet dropped due to policy\n"));
621
622                         if (re->re_rt)
623                                 RTFREE(re->re_rt);
624                         error = EHOSTUNREACH;
625                         m_freem(m);
626                         goto done;
627                     }
628
629                 while (tdb && tdb->tdb_xform) {
630                         /* Check if the SPI is invalid */
631                         if (tdb->tdb_flags & TDBF_INVALID) {
632                                 splx(s);
633                                 DPRINTF(("ip_output(): attempt to use invalid SA %s/%08x/%u\n", ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi), tdb->tdb_sproto));
634                                 m_freem(m);
635                                 if (re->re_rt)
636                                         RTFREE(re->re_rt);
637                                 return ENXIO;
638                         }
639
640 #ifndef INET6
641                         /* Sanity check */
642                         if (tdb->tdb_dst.sa.sa_family != AF_INET) {
643                             splx(s);
644                                 DPRINTF(("ip_output(): attempt to use SA %s/%08x/%u for protocol family %d\n", ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi), tdb->tdb_sproto, tdb->tdb_dst.sa.sa_family));
645                                 m_freem(m);
646                                 if (re->re_rt)
647                                         RTFREE(re->re_rt);
648                                 return ENXIO;
649                         }
650 #endif /* INET6 */
651
652                         /* Register first use, setup expiration timer */
653                         if (tdb->tdb_first_use == 0) {
654                                 tdb->tdb_first_use = time.tv_sec;
655                                 tdb_expiration(tdb, TDBEXP_TIMEOUT);
656                         }
657
658                         /* Check for tunneling */
659                         if (((tdb->tdb_dst.sa.sa_family == AF_INET) &&
660                              (tdb->tdb_dst.sin.sin_addr.s_addr != 
661                               INADDR_ANY) &&
662                              (tdb->tdb_dst.sin.sin_addr.s_addr !=
663                               ip->ip_dst.s_addr)) ||
664                             (tdb->tdb_dst.sa.sa_family == AF_INET6) ||
665                             ((tdb->tdb_flags & TDBF_TUNNELING) &&
666                              (tdb->tdb_xform->xf_type != XF_IP4))) {
667                                 /* Fix length and checksum */
668                                 ip->ip_len = htons(m->m_pkthdr.len);
669                                 ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
670                                 error = ipe4_output(m, tdb, &mp,
671                                                     ip->ip_hl << 2,
672                                                     offsetof(struct ip, ip_p));
673                                 if (mp == NULL)
674                                         error = EFAULT;
675                                 if (error) {
676                                         splx(s);
677                                         if (re->re_rt)
678                                                 RTFREE(re->re_rt);
679                                         return error;
680                                 }
681                                 if (tdb->tdb_dst.sa.sa_family == AF_INET)
682                                         ip6flag = 0;
683 #ifdef INET6
684                                 if (tdb->tdb_dst.sa.sa_family == AF_INET6)
685                                         ip6flag = 1;
686 #endif /* INET6 */
687                                 m = mp;
688                                 mp = NULL;
689                         }
690
691                         if ((tdb->tdb_xform->xf_type == XF_IP4) &&
692                             (tdb->tdb_dst.sa.sa_family == AF_INET)) {
693                                 ip = mtod(m, struct ip *);
694                                 ip->ip_len = htons(m->m_pkthdr.len);
695                                 ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
696                         }
697
698 #ifdef INET6
699                         if ((tdb->tdb_xform->xf_type == XF_IP4) &&
700                             (tdb->tdb_dst.sa.sa_family == AF_INET6)) {
701                             ip6 = mtod(m, struct ip6_hdr *);
702                             ip6->ip6_plen = htons(m->m_pkthdr.len);
703                         }
704 #endif /* INET6 */
705
706 #ifdef INET6
707                         /*
708                          * This assumes that there is only just an IPv6
709                          * header prepended.
710                          */
711                         if (ip6flag)
712                           error = (*(tdb->tdb_xform->xf_output))(m, tdb, &mp, sizeof(struct ip6_hdr), offsetof(struct ip6_hdr, ip6_nxt));
713 #endif /* INET6 */
714
715                         if (!ip6flag)
716                           error = (*(tdb->tdb_xform->xf_output))(m, tdb, &mp, ip->ip_hl << 2, offsetof(struct ip, ip_p));
717                         if (!error && mp == NULL)
718                                 error = EFAULT;
719                         if (error) {
720                                 splx(s);
721                                 if (mp != NULL)
722                                         m_freem(mp);
723                                 if (re->re_rt)
724                                         RTFREE(re->re_rt);
725                                 return error;
726                         }
727
728                         m = mp;
729                         mp = NULL;
730
731                         if (!ip6flag) {
732                             ip = mtod(m, struct ip *);
733                             ip->ip_len = htons(m->m_pkthdr.len);
734                         }
735
736 #ifdef INET6
737                         if (ip6flag) {
738                             ip6 = mtod(m, struct ip6_hdr *);
739                             ip6->ip6_plen = htons(m->m_pkthdr.len);
740                         }
741 #endif /* INET6 */
742                         tdb = tdb->tdb_onext;
743                 }
744                 splx(s);
745
746                 if (!ip6flag)
747                   ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
748
749                 /*
750                  * At this point, m is pointing to an mbuf chain with the
751                  * processed packet. Call ourselves recursively, but
752                  * bypass the encap code.
753                  */
754                 if (re->re_rt)
755                         RTFREE(re->re_rt);
756
757                 if (!ip6flag) {
758                     ip = mtod(m, struct ip *);
759                     NTOHS(ip->ip_len);
760                     NTOHS(ip->ip_off);
761
762                     return ip_output(m, NULL, NULL,
763                                      IP_ENCAPSULATED | IP_RAWOUTPUT,
764                                      NULL, NULL);
765                 }
766
767 #ifdef INET6
768                 if (ip6flag) {
769                     ip6 = mtod(m, struct ip6_hdr *);
770                     NTOHS(ip6->ip6_plen);
771
772                     /* Naturally, ip6_output() has to honor those two flags */
773                     return ip6_output(m, NULL, NULL,
774                                      IP_ENCAPSULATED | IP_RAWOUTPUT,
775                                      NULL, NULL);
776                 }
777 #endif /* INET6 */
778
779 no_encap:
780                 /* This is for possible future use, don't move or delete */
781                 if (re->re_rt)
782                         RTFREE(re->re_rt);
783                 /* No IPSec processing though it was required, drop packet */
784                 if (sa_require) {
785                         error = EHOSTUNREACH;
786                         m_freem(m);
787                         goto done;
788                 }
789         }
790 #endif /* IPSEC */
791
792 #if defined(IPFILTER) || defined(IPFILTER_LKM)
793         /*
794          * looks like most checking has been done now...do a filter check
795          */
796         {
797                 struct mbuf *m0 = m;
798                 if (fr_checkp && (*fr_checkp)(ip, hlen, ifp, 1, &m0)) {
799                         error = EHOSTUNREACH;
800                         goto done;
801                 } else
802                         ip = mtod(m = m0, struct ip *);
803         }
804 #endif
805         /*
806          * If small enough for interface, can just send directly.
807          */
808         if ((u_int16_t)ip->ip_len <= ifp->if_mtu) {
809                 ip->ip_len = htons((u_int16_t)ip->ip_len);
810                 ip->ip_off = htons((u_int16_t)ip->ip_off);
811                 ip->ip_sum = 0;
812                 ip->ip_sum = in_cksum(m, hlen);
813                 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
814                 goto done;
815         }
816
817         /*
818          * Too large for interface; fragment if possible.
819          * Must be able to put at least 8 bytes per fragment.
820          */
821 #if 0
822         /*
823          * If IPsec packet is too big for the interface, try fragment it.
824          * XXX This really is a quickhack.  May be inappropriate.
825          * XXX fails if somebody is sending AH'ed packet, with:
826          *      sizeof(packet without AH) < mtu < sizeof(packet with AH)
827          */
828         if (sab && ip->ip_p != IPPROTO_AH && (flags & IP_FORWARDING) == 0)
829                 ip->ip_off &= ~IP_DF;
830 #endif /*IPSEC*/
831         if (ip->ip_off & IP_DF) {
832                 error = EMSGSIZE;
833                 ipstat.ips_cantfrag++;
834                 goto bad;
835         }
836         len = (ifp->if_mtu - hlen) &~ 7;
837         if (len < 8) {
838                 error = EMSGSIZE;
839                 goto bad;
840         }
841
842     {
843         int mhlen, firstlen = len;
844         struct mbuf **mnext = &m->m_nextpkt;
845
846         /*
847          * Loop through length of segment after first fragment,
848          * make new header and copy data of each part and link onto chain.
849          */
850         m0 = m;
851         mhlen = sizeof (struct ip);
852         for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
853                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
854                 if (m == 0) {
855                         error = ENOBUFS;
856                         ipstat.ips_odropped++;
857                         goto sendorfree;
858                 }
859                 *mnext = m;
860                 mnext = &m->m_nextpkt;
861                 m->m_data += max_linkhdr;
862                 mhip = mtod(m, struct ip *);
863                 *mhip = *ip;
864                 if (hlen > sizeof (struct ip)) {
865                         mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
866                         mhip->ip_hl = mhlen >> 2;
867                 }
868                 m->m_len = mhlen;
869                 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
870                 if (ip->ip_off & IP_MF)
871                         mhip->ip_off |= IP_MF;
872                 if (off + len >= (u_int16_t)ip->ip_len)
873                         len = (u_int16_t)ip->ip_len - off;
874                 else
875                         mhip->ip_off |= IP_MF;
876                 mhip->ip_len = htons((u_int16_t)(len + mhlen));
877                 m->m_next = m_copy(m0, off, len);
878                 if (m->m_next == 0) {
879                         error = ENOBUFS;        /* ??? */
880                         ipstat.ips_odropped++;
881                         goto sendorfree;
882                 }
883                 m->m_pkthdr.len = mhlen + len;
884                 m->m_pkthdr.rcvif = (struct ifnet *)0;
885                 mhip->ip_off = htons((u_int16_t)mhip->ip_off);
886                 mhip->ip_sum = 0;
887                 mhip->ip_sum = in_cksum(m, mhlen);
888                 ipstat.ips_ofragments++;
889         }
890         /*
891          * Update first fragment by trimming what's been copied out
892          * and updating header, then send each fragment (in order).
893          */
894         m = m0;
895         m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
896         m->m_pkthdr.len = hlen + firstlen;
897         ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
898         ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF));
899         ip->ip_sum = 0;
900         ip->ip_sum = in_cksum(m, hlen);
901 sendorfree:
902         for (m = m0; m; m = m0) {
903                 m0 = m->m_nextpkt;
904                 m->m_nextpkt = 0;
905                 if (error == 0)
906                         error = (*ifp->if_output)(ifp, m, sintosa(dst),
907                             ro->ro_rt);
908                 else
909                         m_freem(m);
910         }
911
912         if (error == 0)
913                 ipstat.ips_fragmented++;
914     }
915 done:
916         if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
917                 RTFREE(ro->ro_rt);
918         return (error);
919 bad:
920         m_freem(m0);
921         goto done;
922 }
923
924 /*
925  * Insert IP options into preformed packet.
926  * Adjust IP destination as required for IP source routing,
927  * as indicated by a non-zero in_addr at the start of the options.
928  */
929 static struct mbuf *
930 ip_insertoptions(m, opt, phlen)
931         register struct mbuf *m;
932         struct mbuf *opt;
933         int *phlen;
934 {
935         register struct ipoption *p = mtod(opt, struct ipoption *);
936         struct mbuf *n;
937         register struct ip *ip = mtod(m, struct ip *);
938         unsigned optlen;
939
940         optlen = opt->m_len - sizeof(p->ipopt_dst);
941         if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
942                 return (m);             /* XXX should fail */
943         if (p->ipopt_dst.s_addr)
944                 ip->ip_dst = p->ipopt_dst;
945         if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
946                 MGETHDR(n, M_DONTWAIT, MT_HEADER);
947                 if (n == 0)
948                         return (m);
949                 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
950                 m->m_len -= sizeof(struct ip);
951                 m->m_data += sizeof(struct ip);
952                 n->m_next = m;
953                 m = n;
954                 m->m_len = optlen + sizeof(struct ip);
955                 m->m_data += max_linkhdr;
956                 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
957         } else {
958                 m->m_data -= optlen;
959                 m->m_len += optlen;
960                 m->m_pkthdr.len += optlen;
961                 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
962         }
963         ip = mtod(m, struct ip *);
964         bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
965         *phlen = sizeof(struct ip) + optlen;
966         ip->ip_len += optlen;
967         return (m);
968 }
969
970 /*
971  * Copy options from ip to jp,
972  * omitting those not copied during fragmentation.
973  */
974 int
975 ip_optcopy(ip, jp)
976         struct ip *ip, *jp;
977 {
978         register u_char *cp, *dp;
979         int opt, optlen, cnt;
980
981         cp = (u_char *)(ip + 1);
982         dp = (u_char *)(jp + 1);
983         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
984         for (; cnt > 0; cnt -= optlen, cp += optlen) {
985                 opt = cp[0];
986                 if (opt == IPOPT_EOL)
987                         break;
988                 if (opt == IPOPT_NOP) {
989                         /* Preserve for IP mcast tunnel's LSRR alignment. */
990                         *dp++ = IPOPT_NOP;
991                         optlen = 1;
992                         continue;
993                 } else
994                         optlen = cp[IPOPT_OLEN];
995                 /* bogus lengths should have been caught by ip_dooptions */
996                 if (optlen > cnt)
997                         optlen = cnt;
998                 if (IPOPT_COPIED(opt)) {
999                         bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
1000                         dp += optlen;
1001                 }
1002         }
1003         for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1004                 *dp++ = IPOPT_EOL;
1005         return (optlen);
1006 }
1007
1008 /*
1009  * IP socket option processing.
1010  */
1011 int
1012 ip_ctloutput(op, so, level, optname, mp)
1013         int op;
1014         struct socket *so;
1015         int level, optname;
1016         struct mbuf **mp;
1017 {
1018         register struct inpcb *inp = sotoinpcb(so);
1019         register struct mbuf *m = *mp;
1020         register int optval = 0;
1021 #ifdef IPSEC
1022         struct proc *p = curproc; /* XXX */
1023         struct tdb *tdb;
1024         struct tdb_ident *tdbip, tdbi;
1025         int s;
1026 #endif
1027         int error = 0;
1028
1029         if (level != IPPROTO_IP) {
1030                 error = EINVAL;
1031                 if (op == PRCO_SETOPT && *mp)
1032                         (void) m_free(*mp);
1033         } else switch (op) {
1034
1035         case PRCO_SETOPT:
1036                 switch (optname) {
1037                 case IP_OPTIONS:
1038 #ifdef notyet
1039                 case IP_RETOPTS:
1040                         return (ip_pcbopts(optname, &inp->inp_options, m));
1041 #else
1042                         return (ip_pcbopts(&inp->inp_options, m));
1043 #endif
1044
1045                 case IP_TOS:
1046                 case IP_TTL:
1047                 case IP_RECVOPTS:
1048                 case IP_RECVRETOPTS:
1049                 case IP_RECVDSTADDR:
1050                         if (m == NULL || m->m_len != sizeof(int))
1051                                 error = EINVAL;
1052                         else {
1053                                 optval = *mtod(m, int *);
1054                                 switch (optname) {
1055
1056                                 case IP_TOS:
1057                                         inp->inp_ip.ip_tos = optval;
1058                                         break;
1059
1060                                 case IP_TTL:
1061                                         inp->inp_ip.ip_ttl = optval;
1062                                         break;
1063 #define OPTSET(bit) \
1064         if (optval) \
1065                 inp->inp_flags |= bit; \
1066         else \
1067                 inp->inp_flags &= ~bit;
1068
1069                                 case IP_RECVOPTS:
1070                                         OPTSET(INP_RECVOPTS);
1071                                         break;
1072
1073                                 case IP_RECVRETOPTS:
1074                                         OPTSET(INP_RECVRETOPTS);
1075                                         break;
1076
1077                                 case IP_RECVDSTADDR:
1078                                         OPTSET(INP_RECVDSTADDR);
1079                                         break;
1080                                 }
1081                         }
1082                         break;
1083 #undef OPTSET
1084
1085                 case IP_MULTICAST_IF:
1086                 case IP_MULTICAST_TTL:
1087                 case IP_MULTICAST_LOOP:
1088                 case IP_ADD_MEMBERSHIP:
1089                 case IP_DROP_MEMBERSHIP:
1090                         error = ip_setmoptions(optname, &inp->inp_moptions, m);
1091                         break;
1092
1093                 case IP_PORTRANGE:
1094                         if (m == 0 || m->m_len != sizeof(int))
1095                                 error = EINVAL;
1096                         else {
1097                                 optval = *mtod(m, int *);
1098
1099                                 switch (optval) {
1100
1101                                 case IP_PORTRANGE_DEFAULT:
1102                                         inp->inp_flags &= ~(INP_LOWPORT);
1103                                         inp->inp_flags &= ~(INP_HIGHPORT);
1104                                         break;
1105
1106                                 case IP_PORTRANGE_HIGH:
1107                                         inp->inp_flags &= ~(INP_LOWPORT);
1108                                         inp->inp_flags |= INP_HIGHPORT;
1109                                         break;
1110
1111                                 case IP_PORTRANGE_LOW:
1112                                         inp->inp_flags &= ~(INP_HIGHPORT);
1113                                         inp->inp_flags |= INP_LOWPORT;
1114                                         break;
1115
1116                                 default:
1117
1118                                         error = EINVAL;
1119                                         break;
1120                                 }
1121                         }
1122                         break;
1123                 case IPSEC_OUTSA:
1124 #ifndef IPSEC
1125                         error = EINVAL;
1126 #else
1127                         s = spltdb();
1128                         if (m == 0 || m->m_len != sizeof(struct tdb_ident)) {
1129                                 error = EINVAL;
1130                         } else {
1131                                 tdbip = mtod(m, struct tdb_ident *);
1132                                 tdb = gettdb(tdbip->spi, &tdbip->dst,
1133                                     tdbip->proto);
1134                                 if (tdb == NULL)
1135                                         error = ESRCH;
1136                                 else
1137                                         tdb_add_inp(tdb, inp);
1138                         }
1139                         splx(s);
1140 #endif /* IPSEC */
1141                         break;
1142
1143                 case IP_AUTH_LEVEL:
1144                 case IP_ESP_TRANS_LEVEL:
1145                 case IP_ESP_NETWORK_LEVEL:
1146 #ifndef IPSEC
1147                         error = EINVAL;
1148 #else
1149                         if (m == 0 || m->m_len != sizeof(int)) {
1150                                 error = EINVAL;
1151                                 break;
1152                         }
1153                         optval = *mtod(m, u_char *);
1154
1155                         if (optval < IPSEC_LEVEL_BYPASS || 
1156                             optval > IPSEC_LEVEL_UNIQUE) {
1157                                 error = EINVAL;
1158                                 break;
1159                         }
1160                                 
1161                         switch (optname) {
1162                         case IP_AUTH_LEVEL:
1163                                 if (optval < ipsec_auth_default_level &&
1164                                     suser(p->p_ucred, &p->p_acflag)) {
1165                                         error = EACCES;
1166                                         break;
1167                                 }
1168                                 inp->inp_seclevel[SL_AUTH] = optval;
1169                                 break;
1170
1171                         case IP_ESP_TRANS_LEVEL:
1172                                 if (optval < ipsec_esp_trans_default_level &&
1173                                     suser(p->p_ucred, &p->p_acflag)) {
1174                                         error = EACCES;
1175                                         break;
1176                                 }
1177                                 inp->inp_seclevel[SL_ESP_TRANS] = optval;
1178                                 break;
1179
1180                         case IP_ESP_NETWORK_LEVEL:
1181                                 if (optval < ipsec_esp_network_default_level &&
1182                                     suser(p->p_ucred, &p->p_acflag)) {
1183                                         error = EACCES;
1184                                         break;
1185                                 }
1186                                 inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1187                                 break;
1188                         }
1189                         if (!error)
1190                                 inp->inp_secrequire = get_sa_require(inp);
1191 #endif
1192                         break;
1193
1194                 default:
1195                         error = ENOPROTOOPT;
1196                         break;
1197                 }
1198                 if (m)
1199                         (void)m_free(m);
1200                 break;
1201
1202         case PRCO_GETOPT:
1203                 switch (optname) {
1204                 case IP_OPTIONS:
1205                 case IP_RETOPTS:
1206                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
1207                         if (inp->inp_options) {
1208                                 m->m_len = inp->inp_options->m_len;
1209                                 bcopy(mtod(inp->inp_options, caddr_t),
1210                                     mtod(m, caddr_t), (unsigned)m->m_len);
1211                         } else
1212                                 m->m_len = 0;
1213                         break;
1214
1215                 case IP_TOS:
1216                 case IP_TTL:
1217                 case IP_RECVOPTS:
1218                 case IP_RECVRETOPTS:
1219                 case IP_RECVDSTADDR:
1220                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
1221                         m->m_len = sizeof(int);
1222                         switch (optname) {
1223
1224                         case IP_TOS:
1225                                 optval = inp->inp_ip.ip_tos;
1226                                 break;
1227
1228                         case IP_TTL:
1229                                 optval = inp->inp_ip.ip_ttl;
1230                                 break;
1231
1232 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
1233
1234                         case IP_RECVOPTS:
1235                                 optval = OPTBIT(INP_RECVOPTS);
1236                                 break;
1237
1238                         case IP_RECVRETOPTS:
1239                                 optval = OPTBIT(INP_RECVRETOPTS);
1240                                 break;
1241
1242                         case IP_RECVDSTADDR:
1243                                 optval = OPTBIT(INP_RECVDSTADDR);
1244                                 break;
1245                         }
1246                         *mtod(m, int *) = optval;
1247                         break;
1248
1249                 case IP_MULTICAST_IF:
1250                 case IP_MULTICAST_TTL:
1251                 case IP_MULTICAST_LOOP:
1252                 case IP_ADD_MEMBERSHIP:
1253                 case IP_DROP_MEMBERSHIP:
1254                         error = ip_getmoptions(optname, inp->inp_moptions, mp);
1255                         break;
1256
1257                 case IP_PORTRANGE:
1258                         *mp = m = m_get(M_WAIT, MT_SOOPTS);
1259                         m->m_len = sizeof(int);
1260
1261                         if (inp->inp_flags & INP_HIGHPORT)
1262                                 optval = IP_PORTRANGE_HIGH;
1263                         else if (inp->inp_flags & INP_LOWPORT)
1264                                 optval = IP_PORTRANGE_LOW;
1265                         else
1266                                 optval = 0;
1267
1268                         *mtod(m, int *) = optval;
1269                         break;
1270
1271                 case IPSEC_OUTSA:
1272 #ifndef IPSEC
1273                         error = EINVAL;
1274 #else
1275                         s = spltdb();
1276                         if (inp->inp_tdb == NULL) {
1277                                 error = ENOENT;
1278                         } else {
1279                                 tdbi.spi = inp->inp_tdb->tdb_spi;
1280                                 tdbi.dst = inp->inp_tdb->tdb_dst;
1281                                 tdbi.proto = inp->inp_tdb->tdb_sproto;
1282                                 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1283                                 m->m_len = sizeof(tdbi);
1284                                 bcopy((caddr_t)&tdbi, mtod(m, caddr_t),
1285                                     (unsigned)m->m_len);
1286                         }
1287                         splx(s);
1288 #endif /* IPSEC */
1289                         break;
1290
1291                 case IP_AUTH_LEVEL:
1292                 case IP_ESP_TRANS_LEVEL:
1293                 case IP_ESP_NETWORK_LEVEL:
1294 #ifndef IPSEC
1295                         *mtod(m, int *) = IPSEC_LEVEL_NONE;
1296 #else
1297                         switch (optname) {
1298                         case IP_AUTH_LEVEL:
1299                                     optval = inp->inp_seclevel[SL_AUTH];
1300                                     break;
1301
1302                         case IP_ESP_TRANS_LEVEL:
1303                                     optval = inp->inp_seclevel[SL_ESP_TRANS];
1304                                     break;
1305
1306                         case IP_ESP_NETWORK_LEVEL:
1307                                     optval = inp->inp_seclevel[SL_ESP_NETWORK];
1308                                     break;
1309                         }
1310                         *mtod(m, int *) = optval;
1311 #endif
1312                         break;
1313                 default:
1314                         error = ENOPROTOOPT;
1315                         break;
1316                 }
1317                 break;
1318         }
1319         return (error);
1320 }
1321
1322 /*
1323  * Set up IP options in pcb for insertion in output packets.
1324  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1325  * with destination address if source routed.
1326  */
1327 int
1328 #ifdef notyet
1329 ip_pcbopts(optname, pcbopt, m)
1330         int optname;
1331 #else
1332 ip_pcbopts(pcbopt, m)
1333 #endif
1334         struct mbuf **pcbopt;
1335         register struct mbuf *m;
1336 {
1337         register int cnt, optlen;
1338         register u_char *cp;
1339         u_char opt;
1340
1341         /* turn off any old options */
1342         if (*pcbopt)
1343                 (void)m_free(*pcbopt);
1344         *pcbopt = 0;
1345         if (m == (struct mbuf *)0 || m->m_len == 0) {
1346                 /*
1347                  * Only turning off any previous options.
1348                  */
1349                 if (m)
1350                         (void)m_free(m);
1351                 return (0);
1352         }
1353
1354 #ifndef vax
1355         if (m->m_len % sizeof(int32_t))
1356                 goto bad;
1357 #endif
1358         /*
1359          * IP first-hop destination address will be stored before
1360          * actual options; move other options back
1361          * and clear it when none present.
1362          */
1363         if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1364                 goto bad;
1365         cnt = m->m_len;
1366         m->m_len += sizeof(struct in_addr);
1367         cp = mtod(m, u_char *) + sizeof(struct in_addr);
1368         ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1369         bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1370
1371         for (; cnt > 0; cnt -= optlen, cp += optlen) {
1372                 opt = cp[IPOPT_OPTVAL];
1373                 if (opt == IPOPT_EOL)
1374                         break;
1375                 if (opt == IPOPT_NOP)
1376                         optlen = 1;
1377                 else {
1378                         optlen = cp[IPOPT_OLEN];
1379                         if (optlen <= IPOPT_OLEN || optlen > cnt)
1380                                 goto bad;
1381                 }
1382                 switch (opt) {
1383
1384                 default:
1385                         break;
1386
1387                 case IPOPT_LSRR:
1388                 case IPOPT_SSRR:
1389                         /*
1390                          * user process specifies route as:
1391                          *      ->A->B->C->D
1392                          * D must be our final destination (but we can't
1393                          * check that since we may not have connected yet).
1394                          * A is first hop destination, which doesn't appear in
1395                          * actual IP option, but is stored before the options.
1396                          */
1397                         if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1398                                 goto bad;
1399                         m->m_len -= sizeof(struct in_addr);
1400                         cnt -= sizeof(struct in_addr);
1401                         optlen -= sizeof(struct in_addr);
1402                         cp[IPOPT_OLEN] = optlen;
1403                         /*
1404                          * Move first hop before start of options.
1405                          */
1406                         bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1407                             sizeof(struct in_addr));
1408                         /*
1409                          * Then copy rest of options back
1410                          * to close up the deleted entry.
1411                          */
1412                         ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1413                             sizeof(struct in_addr)),
1414                             (caddr_t)&cp[IPOPT_OFFSET+1],
1415                             (unsigned)cnt + sizeof(struct in_addr));
1416                         break;
1417                 }
1418         }
1419         if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1420                 goto bad;
1421         *pcbopt = m;
1422         return (0);
1423
1424 bad:
1425         (void)m_free(m);
1426         return (EINVAL);
1427 }
1428
1429 /*
1430  * Set the IP multicast options in response to user setsockopt().
1431  */
1432 int
1433 ip_setmoptions(optname, imop, m)
1434         int optname;
1435         struct ip_moptions **imop;
1436         struct mbuf *m;
1437 {
1438         register int error = 0;
1439         u_char loop;
1440         register int i;
1441         struct in_addr addr;
1442         register struct ip_mreq *mreq;
1443         register struct ifnet *ifp;
1444         register struct ip_moptions *imo = *imop;
1445         struct route ro;
1446         register struct sockaddr_in *dst;
1447
1448         if (imo == NULL) {
1449                 /*
1450                  * No multicast option buffer attached to the pcb;
1451                  * allocate one and initialize to default values.
1452                  */
1453                 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1454                     M_WAITOK);
1455
1456                 if (imo == NULL)
1457                         return (ENOBUFS);
1458                 *imop = imo;
1459                 imo->imo_multicast_ifp = NULL;
1460                 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1461                 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1462                 imo->imo_num_memberships = 0;
1463         }
1464
1465         switch (optname) {
1466
1467         case IP_MULTICAST_IF:
1468                 /*
1469                  * Select the interface for outgoing multicast packets.
1470                  */
1471                 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1472                         error = EINVAL;
1473                         break;
1474                 }
1475                 addr = *(mtod(m, struct in_addr *));
1476                 /*
1477                  * INADDR_ANY is used to remove a previous selection.
1478                  * When no interface is selected, a default one is
1479                  * chosen every time a multicast packet is sent.
1480                  */
1481                 if (addr.s_addr == INADDR_ANY) {
1482                         imo->imo_multicast_ifp = NULL;
1483                         break;
1484                 }
1485                 /*
1486                  * The selected interface is identified by its local
1487                  * IP address.  Find the interface and confirm that
1488                  * it supports multicasting.
1489                  */
1490                 INADDR_TO_IFP(addr, ifp);
1491                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1492                         error = EADDRNOTAVAIL;
1493                         break;
1494                 }
1495                 imo->imo_multicast_ifp = ifp;
1496                 break;
1497
1498         case IP_MULTICAST_TTL:
1499                 /*
1500                  * Set the IP time-to-live for outgoing multicast packets.
1501                  */
1502                 if (m == NULL || m->m_len != 1) {
1503                         error = EINVAL;
1504                         break;
1505                 }
1506                 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1507                 break;
1508
1509         case IP_MULTICAST_LOOP:
1510                 /*
1511                  * Set the loopback flag for outgoing multicast packets.
1512                  * Must be zero or one.
1513                  */
1514                 if (m == NULL || m->m_len != 1 ||
1515                    (loop = *(mtod(m, u_char *))) > 1) {
1516                         error = EINVAL;
1517                         break;
1518                 }
1519                 imo->imo_multicast_loop = loop;
1520                 break;
1521
1522         case IP_ADD_MEMBERSHIP:
1523                 /*
1524                  * Add a multicast group membership.
1525                  * Group must be a valid IP multicast address.
1526                  */
1527                 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1528                         error = EINVAL;
1529                         break;
1530                 }
1531                 mreq = mtod(m, struct ip_mreq *);
1532                 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1533                         error = EINVAL;
1534                         break;
1535                 }
1536                 /*
1537                  * If no interface address was provided, use the interface of
1538                  * the route to the given multicast address.
1539                  */
1540                 if (mreq->imr_interface.s_addr == INADDR_ANY) {
1541                         ro.ro_rt = NULL;
1542                         dst = satosin(&ro.ro_dst);
1543                         dst->sin_len = sizeof(*dst);
1544                         dst->sin_family = AF_INET;
1545                         dst->sin_addr = mreq->imr_multiaddr;
1546                         rtalloc(&ro);
1547                         if (ro.ro_rt == NULL) {
1548                                 error = EADDRNOTAVAIL;
1549                                 break;
1550                         }
1551                         ifp = ro.ro_rt->rt_ifp;
1552                         rtfree(ro.ro_rt);
1553                 } else {
1554                         INADDR_TO_IFP(mreq->imr_interface, ifp);
1555                 }
1556                 /*
1557                  * See if we found an interface, and confirm that it
1558                  * supports multicast.
1559                  */
1560                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1561                         error = EADDRNOTAVAIL;
1562                         break;
1563                 }
1564                 /*
1565                  * See if the membership already exists or if all the
1566                  * membership slots are full.
1567                  */
1568                 for (i = 0; i < imo->imo_num_memberships; ++i) {
1569                         if (imo->imo_membership[i]->inm_ifp == ifp &&
1570                             imo->imo_membership[i]->inm_addr.s_addr
1571                                                 == mreq->imr_multiaddr.s_addr)
1572                                 break;
1573                 }
1574                 if (i < imo->imo_num_memberships) {
1575                         error = EADDRINUSE;
1576                         break;
1577                 }
1578                 if (i == IP_MAX_MEMBERSHIPS) {
1579                         error = ETOOMANYREFS;
1580                         break;
1581                 }
1582                 /*
1583                  * Everything looks good; add a new record to the multicast
1584                  * address list for the given interface.
1585                  */
1586                 if ((imo->imo_membership[i] =
1587                     in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1588                         error = ENOBUFS;
1589                         break;
1590                 }
1591                 ++imo->imo_num_memberships;
1592                 break;
1593
1594         case IP_DROP_MEMBERSHIP:
1595                 /*
1596                  * Drop a multicast group membership.
1597                  * Group must be a valid IP multicast address.
1598                  */
1599                 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1600                         error = EINVAL;
1601                         break;
1602                 }
1603                 mreq = mtod(m, struct ip_mreq *);
1604                 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1605                         error = EINVAL;
1606                         break;
1607                 }
1608                 /*
1609                  * If an interface address was specified, get a pointer
1610                  * to its ifnet structure.
1611                  */
1612                 if (mreq->imr_interface.s_addr == INADDR_ANY)
1613                         ifp = NULL;
1614                 else {
1615                         INADDR_TO_IFP(mreq->imr_interface, ifp);
1616                         if (ifp == NULL) {
1617                                 error = EADDRNOTAVAIL;
1618                                 break;
1619                         }
1620                 }
1621                 /*
1622                  * Find the membership in the membership array.
1623                  */
1624                 for (i = 0; i < imo->imo_num_memberships; ++i) {
1625                         if ((ifp == NULL ||
1626                              imo->imo_membership[i]->inm_ifp == ifp) &&
1627                              imo->imo_membership[i]->inm_addr.s_addr ==
1628                              mreq->imr_multiaddr.s_addr)
1629                                 break;
1630                 }
1631                 if (i == imo->imo_num_memberships) {
1632                         error = EADDRNOTAVAIL;
1633                         break;
1634                 }
1635                 /*
1636                  * Give up the multicast address record to which the
1637                  * membership points.
1638                  */
1639                 in_delmulti(imo->imo_membership[i]);
1640                 /*
1641                  * Remove the gap in the membership array.
1642                  */
1643                 for (++i; i < imo->imo_num_memberships; ++i)
1644                         imo->imo_membership[i-1] = imo->imo_membership[i];
1645                 --imo->imo_num_memberships;
1646                 break;
1647
1648         default:
1649                 error = EOPNOTSUPP;
1650                 break;
1651         }
1652
1653         /*
1654          * If all options have default values, no need to keep the mbuf.
1655          */
1656         if (imo->imo_multicast_ifp == NULL &&
1657             imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1658             imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1659             imo->imo_num_memberships == 0) {
1660                 free(*imop, M_IPMOPTS);
1661                 *imop = NULL;
1662         }
1663
1664         return (error);
1665 }
1666
1667 /*
1668  * Return the IP multicast options in response to user getsockopt().
1669  */
1670 int
1671 ip_getmoptions(optname, imo, mp)
1672         int optname;
1673         register struct ip_moptions *imo;
1674         register struct mbuf **mp;
1675 {
1676         u_char *ttl;
1677         u_char *loop;
1678         struct in_addr *addr;
1679         struct in_ifaddr *ia;
1680
1681         *mp = m_get(M_WAIT, MT_SOOPTS);
1682
1683         switch (optname) {
1684
1685         case IP_MULTICAST_IF:
1686                 addr = mtod(*mp, struct in_addr *);
1687                 (*mp)->m_len = sizeof(struct in_addr);
1688                 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1689                         addr->s_addr = INADDR_ANY;
1690                 else {
1691                         IFP_TO_IA(imo->imo_multicast_ifp, ia);
1692                         addr->s_addr = (ia == NULL) ? INADDR_ANY
1693                                         : ia->ia_addr.sin_addr.s_addr;
1694                 }
1695                 return (0);
1696
1697         case IP_MULTICAST_TTL:
1698                 ttl = mtod(*mp, u_char *);
1699                 (*mp)->m_len = 1;
1700                 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1701                                      : imo->imo_multicast_ttl;
1702                 return (0);
1703
1704         case IP_MULTICAST_LOOP:
1705                 loop = mtod(*mp, u_char *);
1706                 (*mp)->m_len = 1;
1707                 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1708                                       : imo->imo_multicast_loop;
1709                 return (0);
1710
1711         default:
1712                 return (EOPNOTSUPP);
1713         }
1714 }
1715
1716 /*
1717  * Discard the IP multicast options.
1718  */
1719 void
1720 ip_freemoptions(imo)
1721         register struct ip_moptions *imo;
1722 {
1723         register int i;
1724
1725         if (imo != NULL) {
1726                 for (i = 0; i < imo->imo_num_memberships; ++i)
1727                         in_delmulti(imo->imo_membership[i]);
1728                 free(imo, M_IPMOPTS);
1729         }
1730 }
1731
1732 /*
1733  * Routine called from ip_output() to loop back a copy of an IP multicast
1734  * packet to the input queue of a specified interface.  Note that this
1735  * calls the output routine of the loopback "driver", but with an interface
1736  * pointer that might NOT be &loif -- easier than replicating that code here.
1737  */
1738 static void
1739 ip_mloopback(ifp, m, dst)
1740         struct ifnet *ifp;
1741         register struct mbuf *m;
1742         register struct sockaddr_in *dst;
1743 {
1744         register struct ip *ip;
1745         struct mbuf *copym;
1746
1747         copym = m_copy(m, 0, M_COPYALL);
1748         if (copym != NULL) {
1749                 /*
1750                  * We don't bother to fragment if the IP length is greater
1751                  * than the interface's MTU.  Can this possibly matter?
1752                  */
1753                 ip = mtod(copym, struct ip *);
1754                 ip->ip_len = htons((u_int16_t)ip->ip_len);
1755                 ip->ip_off = htons((u_int16_t)ip->ip_off);
1756                 ip->ip_sum = 0;
1757                 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1758                 (void) looutput(ifp, copym, sintosa(dst), NULL);
1759         }
1760 }