]> git.karo-electronics.de Git - karo-tx-redboot.git/blob - packages/net/tcpip/v2_0/src/sys/netinet/ip_input.c
Initial revision
[karo-tx-redboot.git] / packages / net / tcpip / v2_0 / src / sys / netinet / ip_input.c
1 //==========================================================================
2 //
3 //      sys/netinet/ip_input.c
4 //
5 //     
6 //
7 //==========================================================================
8 //####BSDCOPYRIGHTBEGIN####
9 //
10 // -------------------------------------------
11 //
12 // Portions of this software may have been derived from OpenBSD or other sources,
13 // and are covered by the appropriate copyright disclaimers included herein.
14 //
15 // -------------------------------------------
16 //
17 //####BSDCOPYRIGHTEND####
18 //==========================================================================
19 //#####DESCRIPTIONBEGIN####
20 //
21 // Author(s):    gthomas
22 // Contributors: gthomas
23 // Date:         2000-01-10
24 // Purpose:      
25 // Description:  
26 //              
27 //
28 //####DESCRIPTIONEND####
29 //
30 //==========================================================================
31
32
33 /*      $OpenBSD: ip_input.c,v 1.44 1999/12/08 06:50:20 itojun Exp $    */
34 /*      $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $   */
35
36 /*
37  * Copyright (c) 1982, 1986, 1988, 1993
38  *      The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by the University of
51  *      California, Berkeley and its contributors.
52  * 4. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  *
68  *      @(#)ip_input.c  8.2 (Berkeley) 1/4/94
69  */
70
71 #include <sys/param.h>
72 #ifndef __ECOS
73 #include <sys/systm.h>
74 #endif
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/domain.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/errno.h>
81 #include <sys/time.h>
82 #include <sys/kernel.h>
83 #ifndef __ECOS
84 #include <sys/syslog.h>
85 #include <sys/proc.h>
86
87 #include <vm/vm.h>
88 #include <sys/sysctl.h>
89 #endif
90
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/route.h>
94
95 #include <netinet/in.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/if_ether.h>
98 #include <netinet/ip.h>
99 #include <netinet/in_pcb.h>
100 #include <netinet/in_var.h>
101 #include <netinet/ip_var.h>
102 #include <netinet/ip_icmp.h>
103 #include <netinet/ip_ipsp.h>
104
105 #ifdef __ECOS
106 #include <stdio.h>    // for 'sprintf()'
107 #endif
108
109 #ifndef IPFORWARDING
110 #ifdef GATEWAY
111 #define IPFORWARDING    1       /* forward IP packets not for us */
112 #else /* GATEWAY */
113 #define IPFORWARDING    0       /* don't forward IP packets not for us */
114 #endif /* GATEWAY */
115 #endif /* IPFORWARDING */
116 #ifndef IPSENDREDIRECTS
117 #define IPSENDREDIRECTS 1
118 #endif
119
120 int encdebug = 0;
121
122 /*
123  * Note: DIRECTED_BROADCAST is handled this way so that previous
124  * configuration using this option will Just Work.
125  */
126 #ifndef IPDIRECTEDBCAST
127 #ifdef DIRECTED_BROADCAST
128 #define IPDIRECTEDBCAST 1
129 #else
130 #define IPDIRECTEDBCAST 0
131 #endif /* DIRECTED_BROADCAST */
132 #endif /* IPDIRECTEDBCAST */
133 int     ipforwarding = IPFORWARDING;
134 int     ipsendredirects = IPSENDREDIRECTS;
135 int     ip_dosourceroute = 0;   /* no src-routing unless sysctl'd to enable */
136 int     ip_defttl = IPDEFTTL;
137 int     ip_directedbcast = IPDIRECTEDBCAST;
138 #ifdef DIAGNOSTIC
139 int     ipprintfs = 0;
140 #endif
141
142 int     ipsec_auth_default_level = IPSEC_AUTH_LEVEL_DEFAULT;
143 int     ipsec_esp_trans_default_level = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
144 int     ipsec_esp_network_default_level = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
145
146 /* Keep track of memory used for reassembly */
147 int     ip_maxqueue = 300;
148 int     ip_frags = 0;
149
150 /* from in_pcb.c */
151 extern int ipport_firstauto;
152 extern int ipport_lastauto;
153 extern int ipport_hifirstauto;
154 extern int ipport_hilastauto;
155 extern struct baddynamicports baddynamicports;
156
157 extern  struct domain inetdomain;
158 extern  struct protosw inetsw[];
159 u_char  ip_protox[IPPROTO_MAX];
160 int     ipqmaxlen = IFQ_MAXLEN;
161 struct  in_ifaddrhead in_ifaddr;
162 struct  ifqueue ipintrq;
163 struct ipstat ipstat;
164 #if defined(IPFILTER) || defined(IPFILTER_LKM)
165 int     (*fr_checkp) __P((struct ip *, int, struct ifnet *, int,
166                           struct mbuf **));
167 #endif
168
169 int     ipq_locked;
170 static __inline int ipq_lock_try __P((void));
171 static __inline void ipq_unlock __P((void));
172
173 static __inline int
174 ipq_lock_try()
175 {
176         int s;
177
178         s = splimp();
179         if (ipq_locked) {
180                 splx(s);
181                 return (0);
182         }
183         ipq_locked = 1;
184         splx(s);
185         return (1);
186 }
187
188 #define ipq_lock() ipq_lock_try()
189
190 static __inline void
191 ipq_unlock()
192 {
193         int s;
194
195         s = splimp();
196         ipq_locked = 0;
197         splx(s);
198 }
199
200 #if 0 // Now in common layer
201
202 static char *ui8tod( cyg_uint8 n, char *p )
203 {
204     if( n > 99 ) *p++ = (n/100) + '0';
205     if( n >  9 ) *p++ = ((n/10)%10) + '0';
206     *p++ = (n%10) + '0';
207     return p;
208 }
209
210 char *
211 inet_ntoa(ina)
212         struct in_addr ina;
213 {
214         static char buf[4*sizeof "123"];
215         char *p = buf;
216         unsigned char *ucp = (unsigned char *)&ina;
217
218 //      sprintf(buf, "%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff,
219 //          ucp[2] & 0xff, ucp[3] & 0xff);
220
221         p = ui8tod( ucp[0] & 0xFF, p);
222         *p++ = '.';
223         p = ui8tod( ucp[1] & 0xFF, p);
224         *p++ = '.';
225         p = ui8tod( ucp[2] & 0xFF, p);
226         *p++ = '.';
227         p = ui8tod( ucp[3] & 0xFF, p);
228         *p++ = '\0';
229                     
230         return (buf);
231 }
232 #endif
233
234 /*
235  * We need to save the IP options in case a protocol wants to respond
236  * to an incoming packet over the same route if the packet got here
237  * using IP source routing.  This allows connection establishment and
238  * maintenance when the remote end is on a network that is not known
239  * to us.
240  */
241 int     ip_nhops = 0;
242 static  struct ip_srcrt {
243         struct  in_addr dst;                    /* final destination */
244         char    nop;                            /* one NOP to align */
245         char    srcopt[IPOPT_OFFSET + 1];       /* OPTVAL, OLEN and OFFSET */
246         struct  in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
247 } ip_srcrt;
248
249 static void save_rte __P((u_char *, struct in_addr));
250 static int ip_weadvertise(u_int32_t);
251
252 /*
253  * IP initialization: fill in IP protocol switch table.
254  * All protocols not implemented in kernel go to raw IP protocol handler.
255  */
256 void
257 ip_init()
258 {
259         register struct protosw *pr;
260         register int i;
261         const u_int16_t defbaddynamicports_tcp[] = DEFBADDYNAMICPORTS_TCP;
262         const u_int16_t defbaddynamicports_udp[] = DEFBADDYNAMICPORTS_UDP;
263
264         pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
265         if (pr == 0)
266                 panic("ip_init");
267         for (i = 0; i < IPPROTO_MAX; i++)
268                 ip_protox[i] = pr - inetsw;
269         for (pr = inetdomain.dom_protosw;
270             pr < inetdomain.dom_protoswNPROTOSW; pr++)
271                 if (pr->pr_domain->dom_family == PF_INET &&
272                     pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
273                         ip_protox[pr->pr_protocol] = pr - inetsw;
274         LIST_INIT(&ipq);
275         ipintrq.ifq_maxlen = ipqmaxlen;
276         TAILQ_INIT(&in_ifaddr);
277
278         /* Fill in list of ports not to allocate dynamically. */
279         bzero((void *)&baddynamicports, sizeof(baddynamicports));
280         for (i = 0; defbaddynamicports_tcp[i] != 0; i++)
281                 DP_SET(baddynamicports.tcp, defbaddynamicports_tcp[i]);
282         for (i = 0; defbaddynamicports_udp[i] != 0; i++)
283                 DP_SET(baddynamicports.udp, defbaddynamicports_tcp[i]);
284 }
285
286 struct  sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
287 struct  route ipforward_rt;
288
289 void
290 ipintr()
291 {
292         register struct mbuf *m;
293         int s;
294
295         if (needqueuedrain)
296                 m_reclaim();
297         
298         while (1) {
299                 /*
300                  * Get next datagram off input queue and get IP header
301                  * in first mbuf.
302                  */
303                 s = splimp();
304                 IF_DEQUEUE(&ipintrq, m);
305                 splx(s);
306                 if (m == 0)
307                         return;
308 #ifdef  DIAGNOSTIC
309                 if ((m->m_flags & M_PKTHDR) == 0)
310                         panic("ipintr no HDR");
311 #endif
312                 ipv4_input(m, 0, NULL, 0);
313         }
314 }
315
316 /*
317  * Ip input routine.  Checksum and byte swap header.  If fragmented
318  * try to reassemble.  Process options.  Pass to next level.
319  */
320 void
321 ipv4_input(struct mbuf *m, ...)
322 {
323         register struct ip *ip;
324         register struct ipq *fp;
325         struct in_ifaddr *ia;
326         struct ipqent *ipqe;
327         int hlen, mff;
328         va_list ap;
329         int extra;
330
331         va_start(ap, m);
332         extra = va_arg(ap, int);
333         va_end(ap);
334
335         if (extra) {
336                 struct mbuf *newpacket;
337
338                 if (!(newpacket = m_split(m, extra, M_NOWAIT))) {
339                         m_freem(m);
340                         return;
341                 }
342
343                 newpacket->m_flags |= m->m_flags;
344                 m_freem(m);
345                 m = newpacket;
346                 extra = 0;
347         }
348
349         /*
350          * If no IP addresses have been set yet but the interfaces
351          * are receiving, can't do anything with incoming packets yet.
352          */
353         if (in_ifaddr.tqh_first == 0)
354                 goto bad;
355         ipstat.ips_total++;
356         if (m->m_len < sizeof (struct ip) &&
357             (m = m_pullup(m, sizeof (struct ip))) == 0) {
358                 ipstat.ips_toosmall++;
359                 return;
360         }
361         ip = mtod(m, struct ip *);
362         if (ip->ip_v != IPVERSION) {
363                 ipstat.ips_badvers++;
364                 goto bad;
365         }
366         hlen = ip->ip_hl << 2;
367         if (hlen < sizeof(struct ip)) { /* minimum header length */
368                 ipstat.ips_badhlen++;
369                 goto bad;
370         }
371         if (hlen > m->m_len) {
372                 if ((m = m_pullup(m, hlen)) == 0) {
373                         ipstat.ips_badhlen++;
374                         return;
375                 }
376                 ip = mtod(m, struct ip *);
377         }
378         if ((ip->ip_sum = in_cksum(m, hlen)) != 0) {
379                 ipstat.ips_badsum++;
380                 goto bad;
381         }
382
383         /*
384          * Convert fields to host representation.
385          */
386         NTOHS(ip->ip_len);
387         if (ip->ip_len < hlen) {
388                 ipstat.ips_badlen++;
389                 goto bad;
390         }
391         NTOHS(ip->ip_id);
392         NTOHS(ip->ip_off);
393
394         /*
395          * Check that the amount of data in the buffers
396          * is as at least much as the IP header would have us expect.
397          * Trim mbufs if longer than we expect.
398          * Drop packet if shorter than we expect.
399          */
400         if (m->m_pkthdr.len < ip->ip_len) {
401                 ipstat.ips_tooshort++;
402                 goto bad;
403         }
404         if (m->m_pkthdr.len > ip->ip_len) {
405                 if (m->m_len == m->m_pkthdr.len) {
406                         m->m_len = ip->ip_len;
407                         m->m_pkthdr.len = ip->ip_len;
408                 } else
409                         m_adj(m, ip->ip_len - m->m_pkthdr.len);
410         }
411
412 #if defined(IPFILTER) || defined(IPFILTER_LKM)
413          /*
414          * Check if we want to allow this packet to be processed.
415          * Consider it to be bad if not.
416          */
417         {
418                 struct mbuf *m0 = m;
419                 if (fr_checkp && (*fr_checkp)(ip, hlen, m->m_pkthdr.rcvif, 0, &m0))
420                         return;
421                 ip = mtod(m = m0, struct ip *);
422         }
423 #endif
424
425         /*
426          * Process options and, if not destined for us,
427          * ship it on.  ip_dooptions returns 1 when an
428          * error was detected (causing an icmp message
429          * to be sent and the original packet to be freed).
430          */
431         ip_nhops = 0;           /* for source routed packets */
432         if (hlen > sizeof (struct ip) && ip_dooptions(m))
433                 return;
434
435         /*
436          * Check our list of addresses, to see if the packet is for us.
437          */
438         if ((ia = in_iawithaddr(ip->ip_dst, m)) != NULL &&
439             (ia->ia_ifp->if_flags & IFF_UP))
440                 goto ours;
441
442         if (IN_MULTICAST(ip->ip_dst.s_addr)) {
443                 struct in_multi *inm;
444 #ifdef MROUTING
445                 extern struct socket *ip_mrouter;
446
447                 if (m->m_flags & M_EXT) {
448                         if ((m = m_pullup(m, hlen)) == 0) {
449                                 ipstat.ips_toosmall++;
450                                 return;
451                         }
452                         ip = mtod(m, struct ip *);
453                 }
454
455                 if (ip_mrouter) {
456                         /*
457                          * If we are acting as a multicast router, all
458                          * incoming multicast packets are passed to the
459                          * kernel-level multicast forwarding function.
460                          * The packet is returned (relatively) intact; if
461                          * ip_mforward() returns a non-zero value, the packet
462                          * must be discarded, else it may be accepted below.
463                          *
464                          * (The IP ident field is put in the same byte order
465                          * as expected when ip_mforward() is called from
466                          * ip_output().)
467                          */
468                         ip->ip_id = htons(ip->ip_id);
469                         if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
470                                 ipstat.ips_cantforward++;
471                                 m_freem(m);
472                                 return;
473                         }
474                         ip->ip_id = ntohs(ip->ip_id);
475
476                         /*
477                          * The process-level routing demon needs to receive
478                          * all multicast IGMP packets, whether or not this
479                          * host belongs to their destination groups.
480                          */
481                         if (ip->ip_p == IPPROTO_IGMP)
482                                 goto ours;
483                         ipstat.ips_forward++;
484                 }
485 #endif
486                 /*
487                  * See if we belong to the destination multicast group on the
488                  * arrival interface.
489                  */
490                 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
491                 if (inm == NULL) {
492                         ipstat.ips_cantforward++;
493                         m_freem(m);
494                         return;
495                 }
496                 goto ours;
497         }
498         if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
499             ip->ip_dst.s_addr == INADDR_ANY)
500                 goto ours;
501
502         /*
503          * Not for us; forward if possible and desirable.
504          */
505         if (ipforwarding == 0) {
506                 ipstat.ips_cantforward++;
507                 m_freem(m);
508         } else
509                 ip_forward(m, 0);
510         return;
511
512 ours:
513         /*
514          * If offset or IP_MF are set, must reassemble.
515          * Otherwise, nothing need be done.
516          * (We could look in the reassembly queue to see
517          * if the packet was previously fragmented,
518          * but it's not worth the time; just let them time out.)
519          */
520         if (ip->ip_off &~ (IP_DF | IP_RF)) {
521                 if (m->m_flags & M_EXT) {               /* XXX */
522                         if ((m = m_pullup(m, hlen)) == 0) {
523                                 ipstat.ips_toosmall++;
524                                 return;
525                         }
526                         ip = mtod(m, struct ip *);
527                 }
528
529                 /*
530                  * Look for queue of fragments
531                  * of this datagram.
532                  */
533                 ipq_lock();
534                 for (fp = ipq.lh_first; fp != NULL; fp = fp->ipq_q.le_next)
535                         if (ip->ip_id == fp->ipq_id &&
536                             ip->ip_src.s_addr == fp->ipq_src.s_addr &&
537                             ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
538                             ip->ip_p == fp->ipq_p)
539                                 goto found;
540                 fp = 0;
541 found:
542
543                 /*
544                  * Adjust ip_len to not reflect header,
545                  * set ipqe_mff if more fragments are expected,
546                  * convert offset of this to bytes.
547                  */
548                 ip->ip_len -= hlen;
549                 mff = (ip->ip_off & IP_MF) != 0;
550                 if (mff) {
551                         /*
552                          * Make sure that fragments have a data length
553                          * that's a non-zero multiple of 8 bytes.
554                          */
555                         if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
556                                 ipstat.ips_badfrags++;
557                                 ipq_unlock();
558                                 goto bad;
559                         }
560                 }
561                 ip->ip_off <<= 3;
562
563                 /*
564                  * If datagram marked as having more fragments
565                  * or if this is not the first fragment,
566                  * attempt reassembly; if it succeeds, proceed.
567                  */
568                 if (mff || ip->ip_off) {
569                         ipstat.ips_fragments++;
570                         if (ip_frags + 1 > ip_maxqueue) {
571                                 ip_flush();
572                                 ipstat.ips_rcvmemdrop++;
573                                 ipq_unlock();
574                                 goto bad;
575                         }
576                             
577                         MALLOC(ipqe, struct ipqent *, sizeof (struct ipqent),
578                             M_IPQ, M_NOWAIT);
579                         if (ipqe == NULL) {
580                                 ipstat.ips_rcvmemdrop++;
581                                 ipq_unlock();
582                                 goto bad;
583                         }
584                         ip_frags++;
585                         ipqe->ipqe_mff = mff;
586                         ipqe->ipqe_ip = ip;
587                         ip = ip_reass(ipqe, fp);
588                         if (ip == 0) {
589                                 ipq_unlock();
590                                 return;
591                         }
592                         ipstat.ips_reassembled++;
593                         m = dtom(ip);
594                         hlen = ip->ip_hl << 2;
595                 } else
596                         if (fp)
597                                 ip_freef(fp);
598                 ipq_unlock();
599         } else
600                 ip->ip_len -= hlen;
601
602         /*
603          * Switch out to protocol's input routine.
604          */
605         ipstat.ips_delivered++;
606         (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen, NULL, 0);
607         return;
608 bad:
609         m_freem(m);
610 }
611
612 struct in_ifaddr *
613 in_iawithaddr(ina, m)
614         struct in_addr ina;
615         register struct mbuf *m;
616 {
617         register struct in_ifaddr *ia;
618
619         for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) {
620                 if ((ina.s_addr == ia->ia_addr.sin_addr.s_addr) ||
621                     ((ia->ia_ifp->if_flags & (IFF_LOOPBACK|IFF_LINK1)) ==
622                         (IFF_LOOPBACK|IFF_LINK1) &&
623                      ia->ia_subnet == (ina.s_addr & ia->ia_subnetmask)))
624                         return ia;
625                 if (m && ((ip_directedbcast == 0) || (ip_directedbcast &&
626                     ia->ia_ifp == m->m_pkthdr.rcvif)) &&
627                     (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
628                         if (ina.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
629                             ina.s_addr == ia->ia_netbroadcast.s_addr ||
630                             /*
631                              * Look for all-0's host part (old broadcast addr),
632                              * either for subnet or net.
633                              */
634                             ina.s_addr == ia->ia_subnet ||
635                             ina.s_addr == ia->ia_net) {
636                                 /* Make sure M_BCAST is set */
637                                 m->m_flags |= M_BCAST;
638                                 return ia;
639                             }
640                 }
641         }
642
643         return NULL;
644 }
645
646 /*
647  * Take incoming datagram fragment and try to
648  * reassemble it into whole datagram.  If a chain for
649  * reassembly of this datagram already exists, then it
650  * is given as fp; otherwise have to make a chain.
651  */
652 struct ip *
653 ip_reass(ipqe, fp)
654         register struct ipqent *ipqe;
655         register struct ipq *fp;
656 {
657         register struct mbuf *m = dtom(ipqe->ipqe_ip);
658         register struct ipqent *nq, *p, *q;
659         struct ip *ip;
660         struct mbuf *t;
661         int hlen = ipqe->ipqe_ip->ip_hl << 2;
662         int i, next;
663
664         /*
665          * Presence of header sizes in mbufs
666          * would confuse code below.
667          */
668         m->m_data += hlen;
669         m->m_len -= hlen;
670
671         /*
672          * If first fragment to arrive, create a reassembly queue.
673          */
674         if (fp == 0) {
675                 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
676                         goto dropfrag;
677                 fp = mtod(t, struct ipq *);
678                 LIST_INSERT_HEAD(&ipq, fp, ipq_q);
679                 fp->ipq_ttl = IPFRAGTTL;
680                 fp->ipq_p = ipqe->ipqe_ip->ip_p;
681                 fp->ipq_id = ipqe->ipqe_ip->ip_id;
682                 LIST_INIT(&fp->ipq_fragq);
683                 fp->ipq_src = ipqe->ipqe_ip->ip_src;
684                 fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
685                 p = NULL;
686                 goto insert;
687         }
688
689         /*
690          * Find a segment which begins after this one does.
691          */
692         for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
693             p = q, q = q->ipqe_q.le_next)
694                 if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off)
695                         break;
696
697         /*
698          * If there is a preceding segment, it may provide some of
699          * our data already.  If so, drop the data from the incoming
700          * segment.  If it provides all of our data, drop us.
701          */
702         if (p != NULL) {
703                 i = p->ipqe_ip->ip_off + p->ipqe_ip->ip_len -
704                     ipqe->ipqe_ip->ip_off;
705                 if (i > 0) {
706                         if (i >= ipqe->ipqe_ip->ip_len)
707                                 goto dropfrag;
708                         m_adj(dtom(ipqe->ipqe_ip), i);
709                         ipqe->ipqe_ip->ip_off += i;
710                         ipqe->ipqe_ip->ip_len -= i;
711                 }
712         }
713
714         /*
715          * While we overlap succeeding segments trim them or,
716          * if they are completely covered, dequeue them.
717          */
718         for (; q != NULL && ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len >
719             q->ipqe_ip->ip_off; q = nq) {
720                 i = (ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len) -
721                     q->ipqe_ip->ip_off;
722                 if (i < q->ipqe_ip->ip_len) {
723                         q->ipqe_ip->ip_len -= i;
724                         q->ipqe_ip->ip_off += i;
725                         m_adj(dtom(q->ipqe_ip), i);
726                         break;
727                 }
728                 nq = q->ipqe_q.le_next;
729                 m_freem(dtom(q->ipqe_ip));
730                 LIST_REMOVE(q, ipqe_q);
731                 FREE(q, M_IPQ);
732                 ip_frags--;
733         }
734
735 insert:
736         /*
737          * Stick new segment in its place;
738          * check for complete reassembly.
739          */
740         if (p == NULL) {
741                 LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
742         } else {
743                 LIST_INSERT_AFTER(p, ipqe, ipqe_q);
744         }
745         next = 0;
746         for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
747             p = q, q = q->ipqe_q.le_next) {
748                 if (q->ipqe_ip->ip_off != next)
749                         return (0);
750                 next += q->ipqe_ip->ip_len;
751         }
752         if (p->ipqe_mff)
753                 return (0);
754
755         /*
756          * Reassembly is complete.  Check for a bogus message size and
757          * concatenate fragments.
758          */
759         q = fp->ipq_fragq.lh_first;
760         ip = q->ipqe_ip;
761         if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
762                 ipstat.ips_toolong++;
763                 ip_freef(fp);
764                 return (0);
765         }
766         m = dtom(q->ipqe_ip);
767         t = m->m_next;
768         m->m_next = 0;
769         m_cat(m, t);
770         nq = q->ipqe_q.le_next;
771         FREE(q, M_IPQ);
772         ip_frags--;
773         for (q = nq; q != NULL; q = nq) {
774                 t = dtom(q->ipqe_ip);
775                 nq = q->ipqe_q.le_next;
776                 FREE(q, M_IPQ);
777                 ip_frags--;
778                 m_cat(m, t);
779         }
780
781         /*
782          * Create header for new ip packet by
783          * modifying header of first packet;
784          * dequeue and discard fragment reassembly header.
785          * Make header visible.
786          */
787         ip->ip_len = next;
788         ip->ip_ttl = 0; /* xxx */
789         ip->ip_sum = 0;
790         ip->ip_src = fp->ipq_src;
791         ip->ip_dst = fp->ipq_dst;
792         LIST_REMOVE(fp, ipq_q);
793         (void) m_free(dtom(fp));
794         m->m_len += (ip->ip_hl << 2);
795         m->m_data -= (ip->ip_hl << 2);
796         /* some debugging cruft by sklower, below, will go away soon */
797         if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
798                 register int plen = 0;
799                 for (t = m; m; m = m->m_next)
800                         plen += m->m_len;
801                 t->m_pkthdr.len = plen;
802         }
803         return (ip);
804
805 dropfrag:
806         ipstat.ips_fragdropped++;
807         m_freem(m);
808         FREE(ipqe, M_IPQ);
809         ip_frags--;
810         return (0);
811 }
812
813 /*
814  * Free a fragment reassembly header and all
815  * associated datagrams.
816  */
817 void
818 ip_freef(fp)
819         struct ipq *fp;
820 {
821         register struct ipqent *q, *p;
822
823         for (q = fp->ipq_fragq.lh_first; q != NULL; q = p) {
824                 p = q->ipqe_q.le_next;
825                 m_freem(dtom(q->ipqe_ip));
826                 LIST_REMOVE(q, ipqe_q);
827                 FREE(q, M_IPQ);
828                 ip_frags--;
829         }
830         LIST_REMOVE(fp, ipq_q);
831         (void) m_free(dtom(fp));
832 }
833
834 /*
835  * IP timer processing;
836  * if a timer expires on a reassembly
837  * queue, discard it.
838  */
839 void
840 ip_slowtimo()
841 {
842         register struct ipq *fp, *nfp;
843         int s = splsoftnet();
844
845         ipq_lock();
846         for (fp = ipq.lh_first; fp != NULL; fp = nfp) {
847                 nfp = fp->ipq_q.le_next;
848                 if (--fp->ipq_ttl == 0) {
849                         ipstat.ips_fragtimeout++;
850                         ip_freef(fp);
851                 }
852         }
853         ipq_unlock();
854         splx(s);
855 }
856
857 /*
858  * Drain off all datagram fragments.
859  */
860 void
861 ip_drain()
862 {
863
864         if (ipq_lock_try() == 0)
865                 return;
866         while (ipq.lh_first != NULL) {
867                 ipstat.ips_fragdropped++;
868                 ip_freef(ipq.lh_first);
869         }
870         ipq_unlock();
871 }
872
873 /*
874  * Flush a bunch of datagram fragments, till we are down to 75%.
875  */
876 void
877 ip_flush()
878 {
879         int max = 50;
880
881         /* ipq already locked */
882         while (ipq.lh_first != NULL && ip_frags > ip_maxqueue * 3 / 4 && --max) {
883                 ipstat.ips_fragdropped++;
884                 ip_freef(ipq.lh_first);
885         }
886 }
887
888 /*
889  * Do option processing on a datagram,
890  * possibly discarding it if bad options are encountered,
891  * or forwarding it if source-routed.
892  * Returns 1 if packet has been forwarded/freed,
893  * 0 if the packet should be processed further.
894  */
895 int
896 ip_dooptions(m)
897         struct mbuf *m;
898 {
899         register struct ip *ip = mtod(m, struct ip *);
900         register u_char *cp;
901         register struct ip_timestamp *ipt;
902         register struct in_ifaddr *ia;
903         int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
904         struct in_addr *sin, dst;
905         n_time ntime;
906
907         dst = ip->ip_dst;
908         cp = (u_char *)(ip + 1);
909         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
910         for (; cnt > 0; cnt -= optlen, cp += optlen) {
911                 opt = cp[IPOPT_OPTVAL];
912                 if (opt == IPOPT_EOL)
913                         break;
914                 if (opt == IPOPT_NOP)
915                         optlen = 1;
916                 else {
917                         optlen = cp[IPOPT_OLEN];
918                         if (optlen <= 0 || optlen > cnt) {
919                                 code = &cp[IPOPT_OLEN] - (u_char *)ip;
920                                 goto bad;
921                         }
922                 }
923                 switch (opt) {
924
925                 default:
926                         break;
927
928                 /*
929                  * Source routing with record.
930                  * Find interface with current destination address.
931                  * If none on this machine then drop if strictly routed,
932                  * or do nothing if loosely routed.
933                  * Record interface address and bring up next address
934                  * component.  If strictly routed make sure next
935                  * address is on directly accessible net.
936                  */
937                 case IPOPT_LSRR:
938                 case IPOPT_SSRR:
939                         if (!ip_dosourceroute) {
940 #ifndef __ECOS
941                                 char buf[4*sizeof "123"];
942
943                                 strcpy(buf, inet_ntoa(ip->ip_dst));
944                                 log(LOG_WARNING,
945                                     "attempted source route from %s to %s\n",
946                                     inet_ntoa(ip->ip_src), buf);
947 #endif
948                                 type = ICMP_UNREACH;
949                                 code = ICMP_UNREACH_SRCFAIL;
950                                 goto bad;
951                         }
952                         if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
953                                 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
954                                 goto bad;
955                         }
956                         ipaddr.sin_addr = ip->ip_dst;
957                         ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
958                         if (ia == 0) {
959                                 if (opt == IPOPT_SSRR) {
960                                         type = ICMP_UNREACH;
961                                         code = ICMP_UNREACH_SRCFAIL;
962                                         goto bad;
963                                 }
964                                 /*
965                                  * Loose routing, and not at next destination
966                                  * yet; nothing to do except forward.
967                                  */
968                                 break;
969                         }
970                         off--;                  /* 0 origin */
971                         if (off > optlen - sizeof(struct in_addr)) {
972                                 /*
973                                  * End of source route.  Should be for us.
974                                  */
975                                 save_rte(cp, ip->ip_src);
976                                 break;
977                         }
978
979                         /*
980                          * locate outgoing interface
981                          */
982                         bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
983                             sizeof(ipaddr.sin_addr));
984                         if (opt == IPOPT_SSRR) {
985 #define INA     struct in_ifaddr *
986 #define SA      struct sockaddr *
987                             if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
988                                 ia = (INA)ifa_ifwithnet((SA)&ipaddr);
989                         } else
990                                 ia = ip_rtaddr(ipaddr.sin_addr);
991                         if (ia == 0) {
992                                 type = ICMP_UNREACH;
993                                 code = ICMP_UNREACH_SRCFAIL;
994                                 goto bad;
995                         }
996                         ip->ip_dst = ipaddr.sin_addr;
997                         bcopy((caddr_t)&ia->ia_addr.sin_addr,
998                             (caddr_t)(cp + off), sizeof(struct in_addr));
999                         cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1000                         /*
1001                          * Let ip_intr's mcast routing check handle mcast pkts
1002                          */
1003                         forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1004                         break;
1005
1006                 case IPOPT_RR:
1007                         if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1008                                 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1009                                 goto bad;
1010                         }
1011
1012                         /*
1013                          * If no space remains, ignore.
1014                          */
1015                         off--;                  /* 0 origin */
1016                         if (off > optlen - sizeof(struct in_addr))
1017                                 break;
1018                         bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
1019                             sizeof(ipaddr.sin_addr));
1020                         /*
1021                          * locate outgoing interface; if we're the destination,
1022                          * use the incoming interface (should be same).
1023                          */
1024                         if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1025                             (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1026                                 type = ICMP_UNREACH;
1027                                 code = ICMP_UNREACH_HOST;
1028                                 goto bad;
1029                         }
1030                         bcopy((caddr_t)&ia->ia_addr.sin_addr,
1031                             (caddr_t)(cp + off), sizeof(struct in_addr));
1032                         cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1033                         break;
1034
1035                 case IPOPT_TS:
1036                         code = cp - (u_char *)ip;
1037                         ipt = (struct ip_timestamp *)cp;
1038                         if (ipt->ipt_ptr < 5 || ipt->ipt_len < 5)
1039                                 goto bad;
1040                         if (ipt->ipt_ptr - 1 + sizeof(n_time) > ipt->ipt_len) {
1041                                 if (++ipt->ipt_oflw == 0)
1042                                         goto bad;
1043                                 break;
1044                         }
1045                         sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
1046                         switch (ipt->ipt_flg) {
1047
1048                         case IPOPT_TS_TSONLY:
1049                                 break;
1050
1051                         case IPOPT_TS_TSANDADDR:
1052                                 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1053                                     sizeof(struct in_addr) > ipt->ipt_len)
1054                                         goto bad;
1055                                 ipaddr.sin_addr = dst;
1056                                 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1057                                                             m->m_pkthdr.rcvif);
1058                                 if (ia == 0)
1059                                         continue;
1060                                 bcopy((caddr_t)&ia->ia_addr.sin_addr,
1061                                     (caddr_t)sin, sizeof(struct in_addr));
1062                                 ipt->ipt_ptr += sizeof(struct in_addr);
1063                                 break;
1064
1065                         case IPOPT_TS_PRESPEC:
1066                                 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1067                                     sizeof(struct in_addr) > ipt->ipt_len)
1068                                         goto bad;
1069                                 bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
1070                                     sizeof(struct in_addr));
1071                                 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1072                                         continue;
1073                                 ipt->ipt_ptr += sizeof(struct in_addr);
1074                                 break;
1075
1076                         default:
1077                                 goto bad;
1078                         }
1079                         ntime = iptime();
1080                         bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
1081                             sizeof(n_time));
1082                         ipt->ipt_ptr += sizeof(n_time);
1083                 }
1084         }
1085         if (forward && ipforwarding) {
1086                 ip_forward(m, 1);
1087                 return (1);
1088         }
1089         return (0);
1090 bad:
1091         ip->ip_len -= ip->ip_hl << 2;   /* XXX icmp_error adds in hdr length */
1092         HTONS(ip->ip_len);      /* XXX because ip_input changed these three */
1093         HTONS(ip->ip_id);
1094         HTONS(ip->ip_off);
1095         icmp_error(m, type, code, 0, 0);
1096         ipstat.ips_badoptions++;
1097         return (1);
1098 }
1099
1100 /*
1101  * Given address of next destination (final or next hop),
1102  * return internet address info of interface to be used to get there.
1103  */
1104 struct in_ifaddr *
1105 ip_rtaddr(dst)
1106          struct in_addr dst;
1107 {
1108         register struct sockaddr_in *sin;
1109
1110         sin = satosin(&ipforward_rt.ro_dst);
1111
1112         if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
1113                 if (ipforward_rt.ro_rt) {
1114                         RTFREE(ipforward_rt.ro_rt);
1115                         ipforward_rt.ro_rt = 0;
1116                 }
1117                 sin->sin_family = AF_INET;
1118                 sin->sin_len = sizeof(*sin);
1119                 sin->sin_addr = dst;
1120
1121                 rtalloc(&ipforward_rt);
1122         }
1123         if (ipforward_rt.ro_rt == 0)
1124                 return ((struct in_ifaddr *)0);
1125         return (ifatoia(ipforward_rt.ro_rt->rt_ifa));
1126 }
1127
1128 /*
1129  * Save incoming source route for use in replies,
1130  * to be picked up later by ip_srcroute if the receiver is interested.
1131  */
1132 void
1133 save_rte(option, dst)
1134         u_char *option;
1135         struct in_addr dst;
1136 {
1137         unsigned olen;
1138
1139         olen = option[IPOPT_OLEN];
1140 #ifdef DIAGNOSTIC
1141         if (ipprintfs)
1142                 printf("save_rte: olen %d\n", olen);
1143 #endif /* 0 */
1144         if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1145                 return;
1146         bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
1147         ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1148         ip_srcrt.dst = dst;
1149 }
1150
1151 /*
1152  * Check whether we do proxy ARP for this address and we point to ourselves.
1153  * Code shamelessly copied from arplookup().
1154  */
1155 static int
1156 ip_weadvertise(addr)
1157         u_int32_t addr;
1158 {
1159         register struct rtentry *rt;
1160         register struct ifnet *ifp;
1161         register struct ifaddr *ifa;
1162         struct sockaddr_inarp sin;
1163
1164         sin.sin_len = sizeof(sin);
1165         sin.sin_family = AF_INET;
1166         sin.sin_addr.s_addr = addr;
1167         sin.sin_other = SIN_PROXY;
1168         rt = rtalloc1(sintosa(&sin), 0);
1169         if (rt == 0)
1170           return 0;
1171         
1172         RTFREE(rt);
1173         
1174         if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
1175             rt->rt_gateway->sa_family != AF_LINK)
1176           return 0;
1177
1178         for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next)
1179                 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
1180                     ifa = ifa->ifa_list.tqe_next) {
1181                         if (ifa->ifa_addr->sa_family != rt->rt_gateway->sa_family)
1182                                 continue;
1183
1184                         if (!bcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 
1185                             LLADDR((struct sockaddr_dl *)rt->rt_gateway),
1186                             ETHER_ADDR_LEN))
1187                                 return 1;
1188                 }
1189
1190         return 0;
1191 }
1192
1193 /*
1194  * Retrieve incoming source route for use in replies,
1195  * in the same form used by setsockopt.
1196  * The first hop is placed before the options, will be removed later.
1197  */
1198 struct mbuf *
1199 ip_srcroute()
1200 {
1201         register struct in_addr *p, *q;
1202         register struct mbuf *m;
1203
1204         if (ip_nhops == 0)
1205                 return ((struct mbuf *)0);
1206         m = m_get(M_DONTWAIT, MT_SOOPTS);
1207         if (m == 0)
1208                 return ((struct mbuf *)0);
1209
1210 #define OPTSIZ  (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1211
1212         /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1213         m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1214             OPTSIZ;
1215 #ifdef DIAGNOSTIC
1216         if (ipprintfs)
1217                 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1218 #endif
1219
1220         /*
1221          * First save first hop for return route
1222          */
1223         p = &ip_srcrt.route[ip_nhops - 1];
1224         *(mtod(m, struct in_addr *)) = *p--;
1225 #ifdef DIAGNOSTIC
1226         if (ipprintfs)
1227                 printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
1228 #endif
1229
1230         /*
1231          * Copy option fields and padding (nop) to mbuf.
1232          */
1233         ip_srcrt.nop = IPOPT_NOP;
1234         ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1235         bcopy((caddr_t)&ip_srcrt.nop,
1236             mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
1237         q = (struct in_addr *)(mtod(m, caddr_t) +
1238             sizeof(struct in_addr) + OPTSIZ);
1239 #undef OPTSIZ
1240         /*
1241          * Record return path as an IP source route,
1242          * reversing the path (pointers are now aligned).
1243          */
1244         while (p >= ip_srcrt.route) {
1245 #ifdef DIAGNOSTIC
1246                 if (ipprintfs)
1247                         printf(" %x", ntohl(q->s_addr));
1248 #endif
1249                 *q++ = *p--;
1250         }
1251         /*
1252          * Last hop goes to final destination.
1253          */
1254         *q = ip_srcrt.dst;
1255 #ifdef DIAGNOSTIC
1256         if (ipprintfs)
1257                 printf(" %x\n", ntohl(q->s_addr));
1258 #endif
1259         return (m);
1260 }
1261
1262 /*
1263  * Strip out IP options, at higher
1264  * level protocol in the kernel.
1265  * Second argument is buffer to which options
1266  * will be moved, and return value is their length.
1267  * XXX should be deleted; last arg currently ignored.
1268  */
1269 void
1270 ip_stripoptions(m, mopt)
1271         register struct mbuf *m;
1272         struct mbuf *mopt;
1273 {
1274         register int i;
1275         struct ip *ip = mtod(m, struct ip *);
1276         register caddr_t opts;
1277         int olen;
1278
1279         olen = (ip->ip_hl<<2) - sizeof (struct ip);
1280         opts = (caddr_t)(ip + 1);
1281         i = m->m_len - (sizeof (struct ip) + olen);
1282         bcopy(opts  + olen, opts, (unsigned)i);
1283         m->m_len -= olen;
1284         if (m->m_flags & M_PKTHDR)
1285                 m->m_pkthdr.len -= olen;
1286         ip->ip_hl = sizeof(struct ip) >> 2;
1287 }
1288
1289 int inetctlerrmap[PRC_NCMDS] = {
1290         0,              0,              0,              0,
1291         0,              EMSGSIZE,       EHOSTDOWN,      EHOSTUNREACH,
1292         EHOSTUNREACH,   EHOSTUNREACH,   ECONNREFUSED,   ECONNREFUSED,
1293         EMSGSIZE,       EHOSTUNREACH,   0,              0,
1294         0,              0,              0,              0,
1295         ENOPROTOOPT
1296 };
1297
1298 /*
1299  * Forward a packet.  If some error occurs return the sender
1300  * an icmp packet.  Note we can't always generate a meaningful
1301  * icmp message because icmp doesn't have a large enough repertoire
1302  * of codes and types.
1303  *
1304  * If not forwarding, just drop the packet.  This could be confusing
1305  * if ipforwarding was zero but some routing protocol was advancing
1306  * us as a gateway to somewhere.  However, we must let the routing
1307  * protocol deal with that.
1308  *
1309  * The srcrt parameter indicates whether the packet is being forwarded
1310  * via a source route.
1311  */
1312 void
1313 ip_forward(m, srcrt)
1314         struct mbuf *m;
1315         int srcrt;
1316 {
1317         register struct ip *ip = mtod(m, struct ip *);
1318         register struct sockaddr_in *sin;
1319         register struct rtentry *rt;
1320         int error, type = 0, code = 0;
1321         struct mbuf *mcopy;
1322         n_long dest;
1323         struct ifnet *destifp;
1324 #if 0 /*KAME IPSEC*/
1325         struct ifnet dummyifp;
1326 #endif
1327
1328         dest = 0;
1329 #ifdef DIAGNOSTIC
1330         if (ipprintfs)
1331                 printf("forward: src %x dst %x ttl %x\n", ip->ip_src.s_addr,
1332                     ip->ip_dst.s_addr, ip->ip_ttl);
1333 #endif
1334         if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
1335                 ipstat.ips_cantforward++;
1336                 m_freem(m);
1337                 return;
1338         }
1339         HTONS(ip->ip_id);
1340         if (ip->ip_ttl <= IPTTLDEC) {
1341                 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1342                 return;
1343         }
1344         ip->ip_ttl -= IPTTLDEC;
1345
1346         sin = satosin(&ipforward_rt.ro_dst);
1347         if ((rt = ipforward_rt.ro_rt) == 0 ||
1348             ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1349                 if (ipforward_rt.ro_rt) {
1350                         RTFREE(ipforward_rt.ro_rt);
1351                         ipforward_rt.ro_rt = 0;
1352                 }
1353                 sin->sin_family = AF_INET;
1354                 sin->sin_len = sizeof(*sin);
1355                 sin->sin_addr = ip->ip_dst;
1356
1357                 rtalloc(&ipforward_rt);
1358                 if (ipforward_rt.ro_rt == 0) {
1359                         icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1360                         return;
1361                 }
1362                 rt = ipforward_rt.ro_rt;
1363         }
1364
1365         /*
1366          * Save at most 68 bytes of the packet in case
1367          * we need to generate an ICMP message to the src.
1368          */
1369         mcopy = m_copy(m, 0, imin((int)ip->ip_len, 68));
1370
1371         /*
1372          * If forwarding packet using same interface that it came in on,
1373          * perhaps should send a redirect to sender to shortcut a hop.
1374          * Only send redirect if source is sending directly to us,
1375          * and if packet was not source routed (or has any options).
1376          * Also, don't send redirect if forwarding using a default route
1377          * or a route modified by a redirect.
1378          * Don't send redirect if we advertise destination's arp address
1379          * as ours (proxy arp).
1380          */
1381         if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1382             (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1383             satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1384             ipsendredirects && !srcrt &&
1385             !ip_weadvertise(satosin(rt_key(rt))->sin_addr.s_addr)) {
1386                 if (rt->rt_ifa &&
1387                     (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
1388                     ifatoia(rt->rt_ifa)->ia_subnet) {
1389                     if (rt->rt_flags & RTF_GATEWAY)
1390                         dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1391                     else
1392                         dest = ip->ip_dst.s_addr;
1393                     /* Router requirements says to only send host redirects */
1394                     type = ICMP_REDIRECT;
1395                     code = ICMP_REDIRECT_HOST;
1396 #ifdef DIAGNOSTIC
1397                     if (ipprintfs)
1398                         printf("redirect (%d) to %x\n", code, (u_int32_t)dest);
1399 #endif
1400                 }
1401         }
1402
1403 #if 0 /*KAME IPSEC*/
1404         m->m_pkthdr.rcvif = NULL;
1405 #endif /*IPSEC*/
1406         error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1407             (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), 
1408             0, NULL, NULL);
1409         if (error)
1410                 ipstat.ips_cantforward++;
1411         else {
1412                 ipstat.ips_forward++;
1413                 if (type)
1414                         ipstat.ips_redirectsent++;
1415                 else {
1416                         if (mcopy)
1417                                 m_freem(mcopy);
1418                         return;
1419                 }
1420         }
1421         if (mcopy == NULL)
1422                 return;
1423         destifp = NULL;
1424
1425         switch (error) {
1426
1427         case 0:                         /* forwarded, but need redirect */
1428                 /* type, code set above */
1429                 break;
1430
1431         case ENETUNREACH:               /* shouldn't happen, checked above */
1432         case EHOSTUNREACH:
1433         case ENETDOWN:
1434         case EHOSTDOWN:
1435         default:
1436                 type = ICMP_UNREACH;
1437                 code = ICMP_UNREACH_HOST;
1438                 break;
1439
1440         case EMSGSIZE:
1441                 type = ICMP_UNREACH;
1442                 code = ICMP_UNREACH_NEEDFRAG;
1443 #if 1 /*KAME IPSEC*/
1444                 if (ipforward_rt.ro_rt)
1445                         destifp = ipforward_rt.ro_rt->rt_ifp;
1446 #else
1447                 /*
1448                  * If the packet is routed over IPsec tunnel, tell the
1449                  * originator the tunnel MTU.
1450                  *      tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1451                  * XXX quickhack!!!
1452                  */
1453                 if (ipforward_rt.ro_rt) {
1454                         struct secpolicy *sp;
1455                         int ipsecerror;
1456                         int ipsechdr;
1457                         struct route *ro;
1458
1459                         sp = ipsec4_getpolicybyaddr(mcopy,
1460                                                     IP_FORWARDING,
1461                                                     &ipsecerror);
1462
1463                         if (sp == NULL)
1464                                 destifp = ipforward_rt.ro_rt->rt_ifp;
1465                         else {
1466                                 /* count IPsec header size */
1467                                 ipsechdr = ipsec4_hdrsiz(mcopy, NULL);
1468
1469                                 /*
1470                                  * find the correct route for outer IPv4
1471                                  * header, compute tunnel MTU.
1472                                  *
1473                                  * XXX BUG ALERT
1474                                  * The "dummyifp" code relies upon the fact
1475                                  * that icmp_error() touches only ifp->if_mtu.
1476                                  */
1477                                 /*XXX*/
1478                                 destifp = NULL;
1479                                 if (sp->req != NULL
1480                                  && sp->req->sa != NULL) {
1481                                         ro = &sp->req->sa->saidx->sa_route;
1482                                         if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1483                                                 dummyifp.if_mtu =
1484                                                     ro->ro_rt->rt_ifp->if_mtu;
1485                                                 dummyifp.if_mtu -= ipsechdr;
1486                                                 destifp = &dummyifp;
1487                                         }
1488                                 }
1489
1490                                 key_freesp(sp);
1491                         }
1492                 }
1493 #endif /*IPSEC*/
1494                 ipstat.ips_cantfrag++;
1495                 break;
1496
1497         case ENOBUFS:
1498                 type = ICMP_SOURCEQUENCH;
1499                 code = 0;
1500                 break;
1501         }
1502
1503         icmp_error(mcopy, type, code, dest, destifp);
1504 }
1505
1506 #ifdef CYGPKG_NET_SYSCTL
1507 int
1508 ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1509         int *name;
1510         u_int namelen;
1511         void *oldp;
1512         size_t *oldlenp;
1513         void *newp;
1514         size_t newlen;
1515 {
1516         /* All sysctl names at this level are terminal. */
1517         if (namelen != 1)
1518                 return (ENOTDIR);
1519
1520         switch (name[0]) {
1521         case IPCTL_FORWARDING:
1522                 return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1523         case IPCTL_SENDREDIRECTS:
1524                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1525                         &ipsendredirects));
1526         case IPCTL_DEFTTL:
1527                 return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1528 #ifdef notyet
1529         case IPCTL_DEFMTU:
1530                 return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1531 #endif
1532         case IPCTL_SOURCEROUTE:
1533                 /*
1534                  * Don't allow this to change in a secure environment.
1535                  */
1536                 if (newp && securelevel > 0)
1537                         return (EPERM);
1538                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1539                     &ip_dosourceroute));
1540         case IPCTL_DIRECTEDBCAST:
1541                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1542                     &ip_directedbcast));
1543         case IPCTL_IPPORT_FIRSTAUTO:
1544                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1545                     &ipport_firstauto));
1546         case IPCTL_IPPORT_LASTAUTO:
1547                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1548                     &ipport_lastauto));
1549         case IPCTL_IPPORT_HIFIRSTAUTO:
1550                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1551                     &ipport_hifirstauto));
1552         case IPCTL_IPPORT_HILASTAUTO:
1553                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1554                     &ipport_hilastauto));
1555         case IPCTL_IPPORT_MAXQUEUE:
1556                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1557                     &ip_maxqueue));
1558         case IPCTL_ENCDEBUG:
1559                 return (sysctl_int(oldp, oldlenp, newp, newlen, &encdebug));
1560         default:
1561                 return (EOPNOTSUPP);
1562         }
1563         /* NOTREACHED */
1564 }
1565 #endif // CYGPKG_NET_SYSCTL