1 //==========================================================================
3 // sys/kern/uipc_socket2.c
7 //==========================================================================
8 //####BSDCOPYRIGHTBEGIN####
10 // -------------------------------------------
12 // Portions of this software may have been derived from OpenBSD or other sources,
13 // and are covered by the appropriate copyright disclaimers included herein.
15 // -------------------------------------------
17 //####BSDCOPYRIGHTEND####
18 //==========================================================================
19 //#####DESCRIPTIONBEGIN####
22 // Contributors: gthomas
28 //####DESCRIPTIONEND####
30 //==========================================================================
33 /* $OpenBSD: uipc_socket2.c,v 1.11 1999/12/08 06:50:17 itojun Exp $ */
34 /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */
37 * Copyright (c) 1982, 1986, 1988, 1990, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the University of
51 * California, Berkeley and its contributors.
52 * 4. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
71 #include <sys/param.h>
73 #include <sys/systm.h>
78 #include <sys/malloc.h>
80 #include <sys/protosw.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
84 #include <sys/signalvar.h>
88 #include <cyg/infra/diag.h>
92 * Primitive routines for operating on sockets and socket buffers
95 /* strings for sleep message: */
96 char netio[] = "netio";
97 char netcon[] = "netcon";
98 char netcls[] = "netcls";
100 u_long sb_max = SB_MAX; /* patchable */
103 * Procedures to manipulate state flags of socket
104 * and do appropriate wakeups. Normal sequence from the
105 * active (originating) side is that soisconnecting() is
106 * called during processing of connect() call,
107 * resulting in an eventual call to soisconnected() if/when the
108 * connection is established. When the connection is torn down
109 * soisdisconnecting() is called during processing of disconnect() call,
110 * and soisdisconnected() is called when the connection to the peer
111 * is totally severed. The semantics of these routines are such that
112 * connectionless protocols can call soisconnected() and soisdisconnected()
113 * only, bypassing the in-progress calls when setting up a ``connection''
116 * From the passive side, a socket is created with
117 * two queues of sockets: so_q0 for connections in progress
118 * and so_q for connections already made and awaiting user acceptance.
119 * As a protocol is preparing incoming connections, it creates a socket
120 * structure queued on so_q0 by calling sonewconn(). When the connection
121 * is established, soisconnected() is called, and transfers the
122 * socket structure to so_q, making it available to accept().
124 * If a socket is closed with sockets on either
125 * so_q0 or so_q, these sockets are dropped.
127 * If higher level protocols are implemented in
128 * the kernel, the wakeups done here will sometimes
129 * cause software-interrupt process scheduling.
134 register struct socket *so;
137 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
138 so->so_state |= SS_ISCONNECTING;
143 register struct socket *so;
145 register struct socket *head = so->so_head;
147 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
148 so->so_state |= SS_ISCONNECTED;
149 if (head && soqremque(so, 0)) {
150 soqinsque(head, so, 1);
152 wakeup((caddr_t)&head->so_timeo);
154 wakeup((caddr_t)&so->so_timeo);
161 soisdisconnecting(so)
162 register struct socket *so;
165 so->so_state &= ~SS_ISCONNECTING;
166 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
167 wakeup((caddr_t)&so->so_timeo);
174 register struct socket *so;
177 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
178 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
179 wakeup((caddr_t)&so->so_timeo);
185 * When an attempt at a new connection is noted on a socket
186 * which accepts connections, sonewconn is called. If the
187 * connection is possible (subject to space constraints, etc.)
188 * then we allocate a new structure, propoerly linked into the
189 * data structure of the original socket, and return this.
190 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
192 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
193 * to catch calls that are missing the (new) second parameter.
196 sonewconn1(head, connstatus)
197 register struct socket *head;
200 register struct socket *so;
201 int soqueue = connstatus ? 1 : 0;
203 if (head->so_qlen + head->so_q0len > head->so_qlimit * 3)
204 return ((struct socket *)0);
205 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
207 return ((struct socket *)0);
208 bzero((caddr_t)so, sizeof(*so));
209 so->so_type = head->so_type;
210 so->so_options = head->so_options &~ SO_ACCEPTCONN;
211 so->so_linger = head->so_linger;
212 so->so_state = head->so_state | SS_NOFDREF;
213 so->so_proto = head->so_proto;
214 so->so_timeo = head->so_timeo;
215 so->so_pgid = head->so_pgid;
216 so->so_euid = head->so_euid;
217 so->so_ruid = head->so_ruid;
218 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
219 soqinsque(head, so, soqueue);
220 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
221 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
222 (void) soqremque(so, soqueue);
223 (void) free((caddr_t)so, M_SOCKET);
224 return ((struct socket *)0);
228 wakeup((caddr_t)&head->so_timeo);
229 so->so_state |= connstatus;
235 soqinsque(head, so, q)
236 register struct socket *head, *so;
240 register struct socket **prev;
245 for (prev = &(head->so_q0); *prev; )
246 prev = &((*prev)->so_q0);
250 for (prev = &(head->so_q); *prev; )
251 prev = &((*prev)->so_q);
258 register struct socket *so;
261 register struct socket *head, *prev, *next;
266 next = q ? prev->so_q : prev->so_q0;
274 prev->so_q0 = next->so_q0;
277 prev->so_q = next->so_q;
280 next->so_q0 = next->so_q = 0;
286 * Socantsendmore indicates that no more data will be sent on the
287 * socket; it would normally be applied to a socket when the user
288 * informs the system that no more data is to be sent, by the protocol
289 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
290 * will be received, and will normally be applied to the socket by a
291 * protocol when it detects that the peer will send no more data.
292 * Data queued for reading in the socket may yet be read.
300 so->so_state |= SS_CANTSENDMORE;
309 so->so_state |= SS_CANTRCVMORE;
314 * Wait for data to arrive at/drain from a socket buffer.
321 sb->sb_flags |= SB_WAIT;
322 return (tsleep((caddr_t)&sb->sb_cc,
323 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
328 * Lock a sockbuf already known to be locked;
329 * return any error returned from sleep (EINTR).
333 register struct sockbuf *sb;
337 while (sb->sb_flags & SB_LOCK) {
338 sb->sb_flags |= SB_WANT;
339 error = tsleep((caddr_t)&sb->sb_flags,
340 (sb->sb_flags & SB_NOINTR) ?
341 PSOCK : PSOCK|PCATCH, netio, 0);
345 sb->sb_flags |= SB_LOCK;
351 * Set lock on sockbuf sb; sleep if lock is already held.
352 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
353 * Returns error without lock if sleep is interrupted.
356 sblock(struct sockbuf *sb, int wf)
359 cyg_scheduler_safe_lock();
360 if (sb->sb_flags & SB_LOCK) {
361 // Already locked by another thread
362 if (wf == M_WAITOK) {
364 // Note: scheduler unlocked by 'sb_lock()'
367 cyg_scheduler_unlock();
370 sb->sb_flags |= SB_LOCK;
372 cyg_scheduler_unlock();
377 /* release lock on sockbuf sb */
379 sbunlock(struct sockbuf *sb)
381 cyg_scheduler_lock();
382 sb->sb_flags &= ~SB_LOCK;
383 if (sb->sb_flags & SB_WANT) {
384 sb->sb_flags &= ~SB_WANT;
385 wakeup((caddr_t)&sb->sb_flags);
387 cyg_scheduler_unlock();
392 * Wakeup processes waiting on a socket buffer.
393 * Do asynchronous notification via SIGIO
394 * if the socket has the SS_ASYNC flag set.
398 register struct socket *so;
399 register struct sockbuf *sb;
401 selwakeup(&sb->sb_sel);
402 sb->sb_flags &= ~SB_SEL;
403 if (sb->sb_flags & SB_WAIT) {
404 sb->sb_flags &= ~SB_WAIT;
405 wakeup((caddr_t)&sb->sb_cc);
408 if (so->so_state & SS_ASYNC)
409 csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid);
414 * Socket buffer (struct sockbuf) utility routines.
416 * Each socket contains two socket buffers: one for sending data and
417 * one for receiving data. Each buffer contains a queue of mbufs,
418 * information about the number of mbufs and amount of data in the
419 * queue, and other fields allowing select() statements and notification
420 * on data availability to be implemented.
422 * Data stored in a socket buffer is maintained as a list of records.
423 * Each record is a list of mbufs chained together with the m_next
424 * field. Records are chained together with the m_nextpkt field. The upper
425 * level routine soreceive() expects the following conventions to be
426 * observed when placing information in the receive buffer:
428 * 1. If the protocol requires each message be preceded by the sender's
429 * name, then a record containing that name must be present before
430 * any associated data (mbuf's must be of type MT_SONAME).
431 * 2. If the protocol supports the exchange of ``access rights'' (really
432 * just additional data associated with the message), and there are
433 * ``rights'' to be received, then a record containing this data
434 * should be present (mbuf's must be of type MT_CONTROL).
435 * 3. If a name or rights record exists, then it must be followed by
436 * a data record, perhaps of zero length.
438 * Before using a new socket structure it is first necessary to reserve
439 * buffer space to the socket, by calling sbreserve(). This should commit
440 * some of the available buffer space in the system buffer pool for the
441 * socket (currently, it does nothing but enforce limits). The space
442 * should be released by calling sbrelease() when the socket is destroyed.
446 soreserve(so, sndcc, rcvcc)
447 register struct socket *so;
451 if (sbreserve(&so->so_snd, sndcc) == 0)
453 if (sbreserve(&so->so_rcv, rcvcc) == 0)
455 if (so->so_rcv.sb_lowat == 0)
456 so->so_rcv.sb_lowat = 1;
457 if (so->so_snd.sb_lowat == 0)
458 so->so_snd.sb_lowat = MCLBYTES;
459 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
460 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
463 sbrelease(&so->so_snd);
469 * Allot mbufs to a sockbuf.
470 * Attempt to scale mbmax so that mbcnt doesn't become limiting
471 * if buffering efficiency is near the normal case.
479 if (cc == 0 || cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
482 sb->sb_mbmax = min(cc * 2, sb_max);
483 if (sb->sb_lowat > sb->sb_hiwat)
484 sb->sb_lowat = sb->sb_hiwat;
489 * Free mbufs held by a socket, and reserved mbuf space.
497 sb->sb_hiwat = sb->sb_mbmax = 0;
501 * Routines to add and remove
502 * data from an mbuf queue.
504 * The routines sbappend() or sbappendrecord() are normally called to
505 * append new mbufs to a socket buffer, after checking that adequate
506 * space is available, comparing the function sbspace() with the amount
507 * of data to be added. sbappendrecord() differs from sbappend() in
508 * that data supplied is treated as the beginning of a new record.
509 * To place a sender's address, optional access rights, and data in a
510 * socket receive buffer, sbappendaddr() should be used. To place
511 * access rights and data in a socket receive buffer, sbappendrights()
512 * should be used. In either case, the new data begins a new record.
513 * Note that unlike sbappend() and sbappendrecord(), these routines check
514 * for the caller that there will be enough space to store the data.
515 * Each fails if there is not enough space, or if it cannot find mbufs
516 * to store additional information in.
518 * Reliable protocols may use the socket send buffer to hold data
519 * awaiting acknowledgement. Data is normally copied from a socket
520 * send buffer in a protocol with m_copy for output to a peer,
521 * and then removing the data from the socket buffer with sbdrop()
522 * or sbdroprecord() when the data is acknowledged by the peer.
526 * Append mbuf chain m to the last record in the
527 * socket buffer sb. The additional space associated
528 * the mbuf chain is recorded in sb. Empty mbufs are
529 * discarded and mbufs are compacted where possible.
536 register struct mbuf *n;
540 if ((n = sb->sb_mb) != NULL) {
544 if (n->m_flags & M_EOR) {
545 sbappendrecord(sb, m); /* XXXXXX!!!! */
548 } while (n->m_next && (n = n->m_next));
550 sbcompress(sb, m, n);
556 register struct sockbuf *sb;
558 register struct mbuf *m;
559 register int len = 0, mbcnt = 0;
561 for (m = sb->sb_mb; m; m = m->m_next) {
564 if (m->m_flags & M_EXT)
565 mbcnt += m->m_ext.ext_size;
567 panic("sbcheck nextpkt");
569 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
570 printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
571 mbcnt, sb->sb_mbcnt);
578 * As above, except the mbuf chain
579 * begins a new record.
582 sbappendrecord(sb, m0)
583 register struct sockbuf *sb;
584 register struct mbuf *m0;
586 register struct mbuf *m;
590 if ((m = sb->sb_mb) != NULL)
594 * Put the first mbuf on the queue.
595 * Note this permits zero length records.
604 if (m && (m0->m_flags & M_EOR)) {
605 m0->m_flags &= ~M_EOR;
608 sbcompress(sb, m, m0);
612 * As above except that OOB data
613 * is inserted at the beginning of the sockbuf,
614 * but after any other OOB data.
618 register struct sockbuf *sb;
619 register struct mbuf *m0;
621 register struct mbuf *m;
622 register struct mbuf **mp;
626 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
631 continue; /* WANT next train */
634 if ((m = m->m_next) != NULL)
635 goto again; /* inspect THIS train further */
640 * Put the first mbuf on the queue.
641 * Note this permits zero length records.
648 if (m && (m0->m_flags & M_EOR)) {
649 m0->m_flags &= ~M_EOR;
652 sbcompress(sb, m, m0);
656 * Append address and data, and optionally, control (ancillary) data
657 * to the receive queue of a socket. If present,
658 * m0 must include a packet header with total length.
659 * Returns 0 if no space in sockbuf or insufficient mbufs.
662 sbappendaddr(sb, asa, m0, control)
663 register struct sockbuf *sb;
664 struct sockaddr *asa;
665 struct mbuf *m0, *control;
667 register struct mbuf *m, *n;
668 int space = asa->sa_len;
670 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
671 panic("sbappendaddr");
673 space += m0->m_pkthdr.len;
674 for (n = control; n; n = n->m_next) {
676 if (n->m_next == 0) /* keep pointer to last control buf */
679 if (space > sbspace(sb))
681 if (asa->sa_len > MLEN)
683 MGET(m, M_DONTWAIT, MT_SONAME);
686 m->m_len = asa->sa_len;
687 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
689 n->m_next = m0; /* concatenate data to control */
693 for (n = m; n; n = n->m_next)
695 if ((n = sb->sb_mb) != NULL) {
705 sbappendcontrol(sb, m0, control)
707 struct mbuf *m0, *control;
709 register struct mbuf *m, *n;
713 panic("sbappendcontrol");
714 for (m = control; ; m = m->m_next) {
719 n = m; /* save pointer to last control buffer */
720 for (m = m0; m; m = m->m_next)
722 if (space > sbspace(sb))
724 n->m_next = m0; /* concatenate data to control */
725 for (m = control; m; m = m->m_next)
727 if ((n = sb->sb_mb) != NULL) {
730 n->m_nextpkt = control;
737 * Compress mbuf chain m into the socket
738 * buffer sb following mbuf n. If n
739 * is null, the buffer is presumed empty.
743 register struct sockbuf *sb;
744 register struct mbuf *m, *n;
746 register int eor = 0;
747 register struct mbuf *o;
750 eor |= m->m_flags & M_EOR;
753 (((o = m->m_next) || (o = n)) &&
754 o->m_type == m->m_type))) {
758 if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
759 (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
760 n->m_type == m->m_type) {
761 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
763 n->m_len += m->m_len;
764 sb->sb_cc += m->m_len;
774 m->m_flags &= ~M_EOR;
783 diag_printf("semi-panic: sbcompress\n");
785 printf("semi-panic: sbcompress\n");
791 * Free all mbufs in a sockbuf.
792 * Check that all resources are reclaimed.
796 register struct sockbuf *sb;
799 if (sb->sb_flags & SB_LOCK)
802 sbdrop(sb, (int)sb->sb_cc);
803 if (sb->sb_cc || sb->sb_mb)
808 * Drop data from (the front of) a sockbuf.
812 register struct sockbuf *sb;
815 register struct mbuf *m, *mn;
818 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
827 if (m->m_len > len) {
838 while (m && m->m_len == 0) {
851 * Drop a record off the front of a sockbuf
852 * and move the next record to the front.
856 register struct sockbuf *sb;
858 register struct mbuf *m, *mn;
862 sb->sb_mb = m->m_nextpkt;
866 } while ((m = mn) != NULL);
871 * Create a "control" mbuf containing the specified data
872 * with the specified type for presentation on a socket buffer.
875 sbcreatecontrol(p, size, type, level)
880 register struct cmsghdr *cp;
883 if (size + sizeof(*cp) > MCLBYTES) {
885 diag_printf("sbcreatecontrol: message too large %d\n", size);
887 printf("sbcreatecontrol: message too large %d\n", size);
892 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
893 return ((struct mbuf *) NULL);
894 if (size + sizeof(*cp) > MLEN) {
895 MCLGET(m, M_DONTWAIT);
896 if ((m->m_flags & M_EXT) == 0) {
901 cp = mtod(m, struct cmsghdr *);
902 bcopy(p, CMSG_DATA(cp), size);
906 cp->cmsg_level = level;
907 cp->cmsg_type = type;