uipc_socket.c revision 1.42 1 /* $NetBSD: uipc_socket.c,v 1.42 1999/01/20 20:24:12 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include "opt_compat_sunos.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/kernel.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/resourcevar.h>
53 #include <sys/pool.h>
54
55 struct pool socket_pool;
56
57 void
58 soinit()
59 {
60
61 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
62 "sockpl", 0, NULL, NULL, M_SOCKET);
63 }
64
65 /*
66 * Socket operation routines.
67 * These routines are called by the routines in
68 * sys_socket.c or from a system process, and
69 * implement the semantics of socket operations by
70 * switching out to the protocol specific routines.
71 */
72 /*ARGSUSED*/
73 int
74 socreate(dom, aso, type, proto)
75 int dom;
76 struct socket **aso;
77 register int type;
78 int proto;
79 {
80 struct proc *p = curproc; /* XXX */
81 register struct protosw *prp;
82 register struct socket *so;
83 register int error;
84 int s;
85
86 if (proto)
87 prp = pffindproto(dom, proto, type);
88 else
89 prp = pffindtype(dom, type);
90 if (prp == 0 || prp->pr_usrreq == 0)
91 return (EPROTONOSUPPORT);
92 if (prp->pr_type != type)
93 return (EPROTOTYPE);
94 s = splsoftnet();
95 so = pool_get(&socket_pool, PR_WAITOK);
96 memset((caddr_t)so, 0, sizeof(*so));
97 TAILQ_INIT(&so->so_q0);
98 TAILQ_INIT(&so->so_q);
99 so->so_type = type;
100 so->so_proto = prp;
101 so->so_send = sosend;
102 so->so_receive = soreceive;
103 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
104 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
105 if (error) {
106 so->so_state |= SS_NOFDREF;
107 sofree(so);
108 splx(s);
109 return (error);
110 }
111 #ifdef COMPAT_SUNOS
112 {
113 extern struct emul emul_sunos;
114 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
115 so->so_options |= SO_BROADCAST;
116 }
117 #endif
118 splx(s);
119 *aso = so;
120 return (0);
121 }
122
123 int
124 sobind(so, nam)
125 struct socket *so;
126 struct mbuf *nam;
127 {
128 struct proc *p = curproc; /* XXX */
129 int s = splsoftnet();
130 int error;
131
132 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
133 nam, (struct mbuf *)0, p);
134 splx(s);
135 return (error);
136 }
137
138 int
139 solisten(so, backlog)
140 register struct socket *so;
141 int backlog;
142 {
143 int s = splsoftnet(), error;
144
145 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
146 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
147 if (error) {
148 splx(s);
149 return (error);
150 }
151 if (so->so_q.tqh_first == NULL)
152 so->so_options |= SO_ACCEPTCONN;
153 if (backlog < 0)
154 backlog = 0;
155 so->so_qlimit = min(backlog, SOMAXCONN);
156 splx(s);
157 return (0);
158 }
159
160 void
161 sofree(so)
162 register struct socket *so;
163 {
164
165 /*
166 * We must not decommission a socket that's on the accept(2) queue.
167 * If we do, then accept(2) may hang even after select(2) indicated
168 * that the listening socket was ready.
169 */
170 if (so->so_pcb || so->so_head || (so->so_state & SS_NOFDREF) == 0)
171 return;
172 sbrelease(&so->so_snd);
173 sorflush(so);
174 pool_put(&socket_pool, so);
175 }
176
177 /*
178 * Close a socket on last file table reference removal.
179 * Initiate disconnect if connected.
180 * Free socket when disconnect complete.
181 */
182 int
183 soclose(so)
184 register struct socket *so;
185 {
186 struct socket *so2;
187 int s = splsoftnet(); /* conservative */
188 int error = 0;
189
190 if (so->so_options & SO_ACCEPTCONN) {
191 while ((so2 = so->so_q0.tqh_first) != 0) {
192 (void) soqremque(so2, 0);
193 (void) soabort(so2);
194 }
195 while ((so2 = so->so_q.tqh_first) != 0) {
196 (void) soqremque(so2, 1);
197 (void) soabort(so2);
198 }
199 }
200 if (so->so_pcb == 0)
201 goto discard;
202 if (so->so_state & SS_ISCONNECTED) {
203 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
204 error = sodisconnect(so);
205 if (error)
206 goto drop;
207 }
208 if (so->so_options & SO_LINGER) {
209 if ((so->so_state & SS_ISDISCONNECTING) &&
210 (so->so_state & SS_NBIO))
211 goto drop;
212 while (so->so_state & SS_ISCONNECTED) {
213 error = tsleep((caddr_t)&so->so_timeo,
214 PSOCK | PCATCH, netcls,
215 so->so_linger * hz);
216 if (error)
217 break;
218 }
219 }
220 }
221 drop:
222 if (so->so_pcb) {
223 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
224 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
225 (struct proc *)0);
226 if (error == 0)
227 error = error2;
228 }
229 discard:
230 if (so->so_state & SS_NOFDREF)
231 panic("soclose: NOFDREF");
232 so->so_state |= SS_NOFDREF;
233 sofree(so);
234 splx(s);
235 return (error);
236 }
237
238 /*
239 * Must be called at splsoftnet...
240 */
241 int
242 soabort(so)
243 struct socket *so;
244 {
245
246 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
247 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
248 }
249
250 int
251 soaccept(so, nam)
252 register struct socket *so;
253 struct mbuf *nam;
254 {
255 int s = splsoftnet();
256 int error;
257
258 if ((so->so_state & SS_NOFDREF) == 0)
259 panic("soaccept: !NOFDREF");
260 so->so_state &= ~SS_NOFDREF;
261 if ((so->so_state & SS_ISDISCONNECTED) == 0)
262 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
263 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
264 else
265 error = 0;
266 splx(s);
267 return (error);
268 }
269
270 int
271 soconnect(so, nam)
272 register struct socket *so;
273 struct mbuf *nam;
274 {
275 struct proc *p = curproc; /* XXX */
276 int s;
277 int error;
278
279 if (so->so_options & SO_ACCEPTCONN)
280 return (EOPNOTSUPP);
281 s = splsoftnet();
282 /*
283 * If protocol is connection-based, can only connect once.
284 * Otherwise, if connected, try to disconnect first.
285 * This allows user to disconnect by connecting to, e.g.,
286 * a null address.
287 */
288 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
289 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
290 (error = sodisconnect(so))))
291 error = EISCONN;
292 else
293 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
294 (struct mbuf *)0, nam, (struct mbuf *)0, p);
295 splx(s);
296 return (error);
297 }
298
299 int
300 soconnect2(so1, so2)
301 register struct socket *so1;
302 struct socket *so2;
303 {
304 int s = splsoftnet();
305 int error;
306
307 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
308 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
309 (struct proc *)0);
310 splx(s);
311 return (error);
312 }
313
314 int
315 sodisconnect(so)
316 register struct socket *so;
317 {
318 int s = splsoftnet();
319 int error;
320
321 if ((so->so_state & SS_ISCONNECTED) == 0) {
322 error = ENOTCONN;
323 goto bad;
324 }
325 if (so->so_state & SS_ISDISCONNECTING) {
326 error = EALREADY;
327 goto bad;
328 }
329 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
330 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
331 (struct proc *)0);
332 bad:
333 splx(s);
334 return (error);
335 }
336
337 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
338 /*
339 * Send on a socket.
340 * If send must go all at once and message is larger than
341 * send buffering, then hard error.
342 * Lock against other senders.
343 * If must go all at once and not enough room now, then
344 * inform user that this would block and do nothing.
345 * Otherwise, if nonblocking, send as much as possible.
346 * The data to be sent is described by "uio" if nonzero,
347 * otherwise by the mbuf chain "top" (which must be null
348 * if uio is not). Data provided in mbuf chain must be small
349 * enough to send all at once.
350 *
351 * Returns nonzero on error, timeout or signal; callers
352 * must check for short counts if EINTR/ERESTART are returned.
353 * Data and control buffers are freed on return.
354 */
355 int
356 sosend(so, addr, uio, top, control, flags)
357 register struct socket *so;
358 struct mbuf *addr;
359 struct uio *uio;
360 struct mbuf *top;
361 struct mbuf *control;
362 int flags;
363 {
364 struct proc *p = curproc; /* XXX */
365 struct mbuf **mp;
366 register struct mbuf *m;
367 register long space, len, resid;
368 int clen = 0, error, s, dontroute, mlen;
369 int atomic = sosendallatonce(so) || top;
370
371 if (uio)
372 resid = uio->uio_resid;
373 else
374 resid = top->m_pkthdr.len;
375 /*
376 * In theory resid should be unsigned.
377 * However, space must be signed, as it might be less than 0
378 * if we over-committed, and we must use a signed comparison
379 * of space and resid. On the other hand, a negative resid
380 * causes us to loop sending 0-length segments to the protocol.
381 */
382 if (resid < 0) {
383 error = EINVAL;
384 goto out;
385 }
386 dontroute =
387 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
388 (so->so_proto->pr_flags & PR_ATOMIC);
389 p->p_stats->p_ru.ru_msgsnd++;
390 if (control)
391 clen = control->m_len;
392 #define snderr(errno) { error = errno; splx(s); goto release; }
393
394 restart:
395 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
396 goto out;
397 do {
398 s = splsoftnet();
399 if (so->so_state & SS_CANTSENDMORE)
400 snderr(EPIPE);
401 if (so->so_error)
402 snderr(so->so_error);
403 if ((so->so_state & SS_ISCONNECTED) == 0) {
404 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
405 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
406 !(resid == 0 && clen != 0))
407 snderr(ENOTCONN);
408 } else if (addr == 0)
409 snderr(EDESTADDRREQ);
410 }
411 space = sbspace(&so->so_snd);
412 if (flags & MSG_OOB)
413 space += 1024;
414 if ((atomic && resid > so->so_snd.sb_hiwat) ||
415 clen > so->so_snd.sb_hiwat)
416 snderr(EMSGSIZE);
417 if (space < resid + clen && uio &&
418 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
419 if (so->so_state & SS_NBIO)
420 snderr(EWOULDBLOCK);
421 sbunlock(&so->so_snd);
422 error = sbwait(&so->so_snd);
423 splx(s);
424 if (error)
425 goto out;
426 goto restart;
427 }
428 splx(s);
429 mp = ⊤
430 space -= clen;
431 do {
432 if (uio == NULL) {
433 /*
434 * Data is prepackaged in "top".
435 */
436 resid = 0;
437 if (flags & MSG_EOR)
438 top->m_flags |= M_EOR;
439 } else do {
440 if (top == 0) {
441 MGETHDR(m, M_WAIT, MT_DATA);
442 mlen = MHLEN;
443 m->m_pkthdr.len = 0;
444 m->m_pkthdr.rcvif = (struct ifnet *)0;
445 } else {
446 MGET(m, M_WAIT, MT_DATA);
447 mlen = MLEN;
448 }
449 if (resid >= MINCLSIZE && space >= MCLBYTES) {
450 MCLGET(m, M_WAIT);
451 if ((m->m_flags & M_EXT) == 0)
452 goto nopages;
453 mlen = MCLBYTES;
454 #ifdef MAPPED_MBUFS
455 len = min(MCLBYTES, resid);
456 #else
457 if (atomic && top == 0) {
458 len = min(MCLBYTES - max_hdr, resid);
459 m->m_data += max_hdr;
460 } else
461 len = min(MCLBYTES, resid);
462 #endif
463 space -= len;
464 } else {
465 nopages:
466 len = min(min(mlen, resid), space);
467 space -= len;
468 /*
469 * For datagram protocols, leave room
470 * for protocol headers in first mbuf.
471 */
472 if (atomic && top == 0 && len < mlen)
473 MH_ALIGN(m, len);
474 }
475 error = uiomove(mtod(m, caddr_t), (int)len, uio);
476 resid = uio->uio_resid;
477 m->m_len = len;
478 *mp = m;
479 top->m_pkthdr.len += len;
480 if (error)
481 goto release;
482 mp = &m->m_next;
483 if (resid <= 0) {
484 if (flags & MSG_EOR)
485 top->m_flags |= M_EOR;
486 break;
487 }
488 } while (space > 0 && atomic);
489 if (dontroute)
490 so->so_options |= SO_DONTROUTE;
491 if (resid > 0)
492 so->so_state |= SS_MORETOCOME;
493 s = splsoftnet(); /* XXX */
494 error = (*so->so_proto->pr_usrreq)(so,
495 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
496 top, addr, control, p);
497 splx(s);
498 if (dontroute)
499 so->so_options &= ~SO_DONTROUTE;
500 if (resid > 0)
501 so->so_state &= ~SS_MORETOCOME;
502 clen = 0;
503 control = 0;
504 top = 0;
505 mp = ⊤
506 if (error)
507 goto release;
508 } while (resid && space > 0);
509 } while (resid);
510
511 release:
512 sbunlock(&so->so_snd);
513 out:
514 if (top)
515 m_freem(top);
516 if (control)
517 m_freem(control);
518 return (error);
519 }
520
521 /*
522 * Implement receive operations on a socket.
523 * We depend on the way that records are added to the sockbuf
524 * by sbappend*. In particular, each record (mbufs linked through m_next)
525 * must begin with an address if the protocol so specifies,
526 * followed by an optional mbuf or mbufs containing ancillary data,
527 * and then zero or more mbufs of data.
528 * In order to avoid blocking network interrupts for the entire time here,
529 * we splx() while doing the actual copy to user space.
530 * Although the sockbuf is locked, new data may still be appended,
531 * and thus we must maintain consistency of the sockbuf during that time.
532 *
533 * The caller may receive the data as a single mbuf chain by supplying
534 * an mbuf **mp0 for use in returning the chain. The uio is then used
535 * only for the count in uio_resid.
536 */
537 int
538 soreceive(so, paddr, uio, mp0, controlp, flagsp)
539 register struct socket *so;
540 struct mbuf **paddr;
541 struct uio *uio;
542 struct mbuf **mp0;
543 struct mbuf **controlp;
544 int *flagsp;
545 {
546 register struct mbuf *m, **mp;
547 register int flags, len, error, s, offset;
548 struct protosw *pr = so->so_proto;
549 struct mbuf *nextrecord;
550 int moff, type = 0;
551 int orig_resid = uio->uio_resid;
552
553 mp = mp0;
554 if (paddr)
555 *paddr = 0;
556 if (controlp)
557 *controlp = 0;
558 if (flagsp)
559 flags = *flagsp &~ MSG_EOR;
560 else
561 flags = 0;
562 if (flags & MSG_OOB) {
563 m = m_get(M_WAIT, MT_DATA);
564 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
565 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
566 (struct proc *)0);
567 if (error)
568 goto bad;
569 do {
570 error = uiomove(mtod(m, caddr_t),
571 (int) min(uio->uio_resid, m->m_len), uio);
572 m = m_free(m);
573 } while (uio->uio_resid && error == 0 && m);
574 bad:
575 if (m)
576 m_freem(m);
577 return (error);
578 }
579 if (mp)
580 *mp = (struct mbuf *)0;
581 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
582 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
583 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
584
585 restart:
586 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
587 return (error);
588 s = splsoftnet();
589
590 m = so->so_rcv.sb_mb;
591 /*
592 * If we have less data than requested, block awaiting more
593 * (subject to any timeout) if:
594 * 1. the current count is less than the low water mark,
595 * 2. MSG_WAITALL is set, and it is possible to do the entire
596 * receive operation at once if we block (resid <= hiwat), or
597 * 3. MSG_DONTWAIT is not set.
598 * If MSG_WAITALL is set but resid is larger than the receive buffer,
599 * we have to do the receive in sections, and thus risk returning
600 * a short count if a timeout or signal occurs after we start.
601 */
602 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
603 so->so_rcv.sb_cc < uio->uio_resid) &&
604 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
605 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
606 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
607 #ifdef DIAGNOSTIC
608 if (m == 0 && so->so_rcv.sb_cc)
609 panic("receive 1");
610 #endif
611 if (so->so_error) {
612 if (m)
613 goto dontblock;
614 error = so->so_error;
615 if ((flags & MSG_PEEK) == 0)
616 so->so_error = 0;
617 goto release;
618 }
619 if (so->so_state & SS_CANTRCVMORE) {
620 if (m)
621 goto dontblock;
622 else
623 goto release;
624 }
625 for (; m; m = m->m_next)
626 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
627 m = so->so_rcv.sb_mb;
628 goto dontblock;
629 }
630 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
631 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
632 error = ENOTCONN;
633 goto release;
634 }
635 if (uio->uio_resid == 0)
636 goto release;
637 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
638 error = EWOULDBLOCK;
639 goto release;
640 }
641 sbunlock(&so->so_rcv);
642 error = sbwait(&so->so_rcv);
643 splx(s);
644 if (error)
645 return (error);
646 goto restart;
647 }
648 dontblock:
649 #ifdef notyet /* XXXX */
650 if (uio->uio_procp)
651 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
652 #endif
653 nextrecord = m->m_nextpkt;
654 if (pr->pr_flags & PR_ADDR) {
655 #ifdef DIAGNOSTIC
656 if (m->m_type != MT_SONAME)
657 panic("receive 1a");
658 #endif
659 orig_resid = 0;
660 if (flags & MSG_PEEK) {
661 if (paddr)
662 *paddr = m_copy(m, 0, m->m_len);
663 m = m->m_next;
664 } else {
665 sbfree(&so->so_rcv, m);
666 if (paddr) {
667 *paddr = m;
668 so->so_rcv.sb_mb = m->m_next;
669 m->m_next = 0;
670 m = so->so_rcv.sb_mb;
671 } else {
672 MFREE(m, so->so_rcv.sb_mb);
673 m = so->so_rcv.sb_mb;
674 }
675 }
676 }
677 while (m && m->m_type == MT_CONTROL && error == 0) {
678 if (flags & MSG_PEEK) {
679 if (controlp)
680 *controlp = m_copy(m, 0, m->m_len);
681 m = m->m_next;
682 } else {
683 sbfree(&so->so_rcv, m);
684 if (controlp) {
685 if (pr->pr_domain->dom_externalize &&
686 mtod(m, struct cmsghdr *)->cmsg_type ==
687 SCM_RIGHTS)
688 error = (*pr->pr_domain->dom_externalize)(m);
689 *controlp = m;
690 so->so_rcv.sb_mb = m->m_next;
691 m->m_next = 0;
692 m = so->so_rcv.sb_mb;
693 } else {
694 MFREE(m, so->so_rcv.sb_mb);
695 m = so->so_rcv.sb_mb;
696 }
697 }
698 if (controlp) {
699 orig_resid = 0;
700 controlp = &(*controlp)->m_next;
701 }
702 }
703 if (m) {
704 if ((flags & MSG_PEEK) == 0)
705 m->m_nextpkt = nextrecord;
706 type = m->m_type;
707 if (type == MT_OOBDATA)
708 flags |= MSG_OOB;
709 }
710 moff = 0;
711 offset = 0;
712 while (m && uio->uio_resid > 0 && error == 0) {
713 if (m->m_type == MT_OOBDATA) {
714 if (type != MT_OOBDATA)
715 break;
716 } else if (type == MT_OOBDATA)
717 break;
718 #ifdef DIAGNOSTIC
719 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
720 panic("receive 3");
721 #endif
722 so->so_state &= ~SS_RCVATMARK;
723 len = uio->uio_resid;
724 if (so->so_oobmark && len > so->so_oobmark - offset)
725 len = so->so_oobmark - offset;
726 if (len > m->m_len - moff)
727 len = m->m_len - moff;
728 /*
729 * If mp is set, just pass back the mbufs.
730 * Otherwise copy them out via the uio, then free.
731 * Sockbuf must be consistent here (points to current mbuf,
732 * it points to next record) when we drop priority;
733 * we must note any additions to the sockbuf when we
734 * block interrupts again.
735 */
736 if (mp == 0) {
737 splx(s);
738 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
739 s = splsoftnet();
740 } else
741 uio->uio_resid -= len;
742 if (len == m->m_len - moff) {
743 if (m->m_flags & M_EOR)
744 flags |= MSG_EOR;
745 if (flags & MSG_PEEK) {
746 m = m->m_next;
747 moff = 0;
748 } else {
749 nextrecord = m->m_nextpkt;
750 sbfree(&so->so_rcv, m);
751 if (mp) {
752 *mp = m;
753 mp = &m->m_next;
754 so->so_rcv.sb_mb = m = m->m_next;
755 *mp = (struct mbuf *)0;
756 } else {
757 MFREE(m, so->so_rcv.sb_mb);
758 m = so->so_rcv.sb_mb;
759 }
760 if (m)
761 m->m_nextpkt = nextrecord;
762 }
763 } else {
764 if (flags & MSG_PEEK)
765 moff += len;
766 else {
767 if (mp)
768 *mp = m_copym(m, 0, len, M_WAIT);
769 m->m_data += len;
770 m->m_len -= len;
771 so->so_rcv.sb_cc -= len;
772 }
773 }
774 if (so->so_oobmark) {
775 if ((flags & MSG_PEEK) == 0) {
776 so->so_oobmark -= len;
777 if (so->so_oobmark == 0) {
778 so->so_state |= SS_RCVATMARK;
779 break;
780 }
781 } else {
782 offset += len;
783 if (offset == so->so_oobmark)
784 break;
785 }
786 }
787 if (flags & MSG_EOR)
788 break;
789 /*
790 * If the MSG_WAITALL flag is set (for non-atomic socket),
791 * we must not quit until "uio->uio_resid == 0" or an error
792 * termination. If a signal/timeout occurs, return
793 * with a short count but without error.
794 * Keep sockbuf locked against other readers.
795 */
796 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
797 !sosendallatonce(so) && !nextrecord) {
798 if (so->so_error || so->so_state & SS_CANTRCVMORE)
799 break;
800 error = sbwait(&so->so_rcv);
801 if (error) {
802 sbunlock(&so->so_rcv);
803 splx(s);
804 return (0);
805 }
806 if ((m = so->so_rcv.sb_mb) != NULL)
807 nextrecord = m->m_nextpkt;
808 }
809 }
810
811 if (m && pr->pr_flags & PR_ATOMIC) {
812 flags |= MSG_TRUNC;
813 if ((flags & MSG_PEEK) == 0)
814 (void) sbdroprecord(&so->so_rcv);
815 }
816 if ((flags & MSG_PEEK) == 0) {
817 if (m == 0)
818 so->so_rcv.sb_mb = nextrecord;
819 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
820 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
821 (struct mbuf *)(long)flags, (struct mbuf *)0,
822 (struct proc *)0);
823 }
824 if (orig_resid == uio->uio_resid && orig_resid &&
825 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
826 sbunlock(&so->so_rcv);
827 splx(s);
828 goto restart;
829 }
830
831 if (flagsp)
832 *flagsp |= flags;
833 release:
834 sbunlock(&so->so_rcv);
835 splx(s);
836 return (error);
837 }
838
839 int
840 soshutdown(so, how)
841 struct socket *so;
842 int how;
843 {
844 struct protosw *pr = so->so_proto;
845
846 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
847 return (EINVAL);
848
849 if (how == SHUT_RD || how == SHUT_RDWR)
850 sorflush(so);
851 if (how == SHUT_WR || how == SHUT_RDWR)
852 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
853 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
854 return (0);
855 }
856
857 void
858 sorflush(so)
859 register struct socket *so;
860 {
861 register struct sockbuf *sb = &so->so_rcv;
862 register struct protosw *pr = so->so_proto;
863 register int s;
864 struct sockbuf asb;
865
866 sb->sb_flags |= SB_NOINTR;
867 (void) sblock(sb, M_WAITOK);
868 s = splimp();
869 socantrcvmore(so);
870 sbunlock(sb);
871 asb = *sb;
872 memset((caddr_t)sb, 0, sizeof(*sb));
873 splx(s);
874 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
875 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
876 sbrelease(&asb);
877 }
878
879 int
880 sosetopt(so, level, optname, m0)
881 register struct socket *so;
882 int level, optname;
883 struct mbuf *m0;
884 {
885 int error = 0;
886 register struct mbuf *m = m0;
887
888 if (level != SOL_SOCKET) {
889 if (so->so_proto && so->so_proto->pr_ctloutput)
890 return ((*so->so_proto->pr_ctloutput)
891 (PRCO_SETOPT, so, level, optname, &m0));
892 error = ENOPROTOOPT;
893 } else {
894 switch (optname) {
895
896 case SO_LINGER:
897 if (m == NULL || m->m_len != sizeof(struct linger)) {
898 error = EINVAL;
899 goto bad;
900 }
901 so->so_linger = mtod(m, struct linger *)->l_linger;
902 /* fall thru... */
903
904 case SO_DEBUG:
905 case SO_KEEPALIVE:
906 case SO_DONTROUTE:
907 case SO_USELOOPBACK:
908 case SO_BROADCAST:
909 case SO_REUSEADDR:
910 case SO_REUSEPORT:
911 case SO_OOBINLINE:
912 case SO_TIMESTAMP:
913 if (m == NULL || m->m_len < sizeof(int)) {
914 error = EINVAL;
915 goto bad;
916 }
917 if (*mtod(m, int *))
918 so->so_options |= optname;
919 else
920 so->so_options &= ~optname;
921 break;
922
923 case SO_SNDBUF:
924 case SO_RCVBUF:
925 case SO_SNDLOWAT:
926 case SO_RCVLOWAT:
927 {
928 int optval;
929
930 if (m == NULL || m->m_len < sizeof(int)) {
931 error = EINVAL;
932 goto bad;
933 }
934
935 /*
936 * Values < 1 make no sense for any of these
937 * options, so disallow them.
938 */
939 optval = *mtod(m, int *);
940 if (optval < 1) {
941 error = EINVAL;
942 goto bad;
943 }
944
945 switch (optname) {
946
947 case SO_SNDBUF:
948 case SO_RCVBUF:
949 if (sbreserve(optname == SO_SNDBUF ?
950 &so->so_snd : &so->so_rcv,
951 (u_long) optval) == 0) {
952 error = ENOBUFS;
953 goto bad;
954 }
955 break;
956
957 /*
958 * Make sure the low-water is never greater than
959 * the high-water.
960 */
961 case SO_SNDLOWAT:
962 so->so_snd.sb_lowat =
963 (optval > so->so_snd.sb_hiwat) ?
964 so->so_snd.sb_hiwat : optval;
965 break;
966 case SO_RCVLOWAT:
967 so->so_rcv.sb_lowat =
968 (optval > so->so_rcv.sb_hiwat) ?
969 so->so_rcv.sb_hiwat : optval;
970 break;
971 }
972 break;
973 }
974
975 case SO_SNDTIMEO:
976 case SO_RCVTIMEO:
977 {
978 struct timeval *tv;
979 short val;
980
981 if (m == NULL || m->m_len < sizeof(*tv)) {
982 error = EINVAL;
983 goto bad;
984 }
985 tv = mtod(m, struct timeval *);
986 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
987 error = EDOM;
988 goto bad;
989 }
990 val = tv->tv_sec * hz + tv->tv_usec / tick;
991
992 switch (optname) {
993
994 case SO_SNDTIMEO:
995 so->so_snd.sb_timeo = val;
996 break;
997 case SO_RCVTIMEO:
998 so->so_rcv.sb_timeo = val;
999 break;
1000 }
1001 break;
1002 }
1003
1004 default:
1005 error = ENOPROTOOPT;
1006 break;
1007 }
1008 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1009 (void) ((*so->so_proto->pr_ctloutput)
1010 (PRCO_SETOPT, so, level, optname, &m0));
1011 m = NULL; /* freed by protocol */
1012 }
1013 }
1014 bad:
1015 if (m)
1016 (void) m_free(m);
1017 return (error);
1018 }
1019
1020 int
1021 sogetopt(so, level, optname, mp)
1022 register struct socket *so;
1023 int level, optname;
1024 struct mbuf **mp;
1025 {
1026 register struct mbuf *m;
1027
1028 if (level != SOL_SOCKET) {
1029 if (so->so_proto && so->so_proto->pr_ctloutput) {
1030 return ((*so->so_proto->pr_ctloutput)
1031 (PRCO_GETOPT, so, level, optname, mp));
1032 } else
1033 return (ENOPROTOOPT);
1034 } else {
1035 m = m_get(M_WAIT, MT_SOOPTS);
1036 m->m_len = sizeof(int);
1037
1038 switch (optname) {
1039
1040 case SO_LINGER:
1041 m->m_len = sizeof(struct linger);
1042 mtod(m, struct linger *)->l_onoff =
1043 so->so_options & SO_LINGER;
1044 mtod(m, struct linger *)->l_linger = so->so_linger;
1045 break;
1046
1047 case SO_USELOOPBACK:
1048 case SO_DONTROUTE:
1049 case SO_DEBUG:
1050 case SO_KEEPALIVE:
1051 case SO_REUSEADDR:
1052 case SO_REUSEPORT:
1053 case SO_BROADCAST:
1054 case SO_OOBINLINE:
1055 case SO_TIMESTAMP:
1056 *mtod(m, int *) = so->so_options & optname;
1057 break;
1058
1059 case SO_TYPE:
1060 *mtod(m, int *) = so->so_type;
1061 break;
1062
1063 case SO_ERROR:
1064 *mtod(m, int *) = so->so_error;
1065 so->so_error = 0;
1066 break;
1067
1068 case SO_SNDBUF:
1069 *mtod(m, int *) = so->so_snd.sb_hiwat;
1070 break;
1071
1072 case SO_RCVBUF:
1073 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1074 break;
1075
1076 case SO_SNDLOWAT:
1077 *mtod(m, int *) = so->so_snd.sb_lowat;
1078 break;
1079
1080 case SO_RCVLOWAT:
1081 *mtod(m, int *) = so->so_rcv.sb_lowat;
1082 break;
1083
1084 case SO_SNDTIMEO:
1085 case SO_RCVTIMEO:
1086 {
1087 int val = (optname == SO_SNDTIMEO ?
1088 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1089
1090 m->m_len = sizeof(struct timeval);
1091 mtod(m, struct timeval *)->tv_sec = val / hz;
1092 mtod(m, struct timeval *)->tv_usec =
1093 (val % hz) * tick;
1094 break;
1095 }
1096
1097 default:
1098 (void)m_free(m);
1099 return (ENOPROTOOPT);
1100 }
1101 *mp = m;
1102 return (0);
1103 }
1104 }
1105
1106 void
1107 sohasoutofband(so)
1108 register struct socket *so;
1109 {
1110 struct proc *p;
1111
1112 if (so->so_pgid < 0)
1113 gsignal(-so->so_pgid, SIGURG);
1114 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1115 psignal(p, SIGURG);
1116 selwakeup(&so->so_rcv.sb_sel);
1117 }
1118