uipc_socket.c revision 1.54.2.3 1 /* $NetBSD: uipc_socket.c,v 1.54.2.3 2001/06/21 20:07:07 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include "opt_compat_sunos.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/lwp.h>
43 #include <sys/proc.h>
44 #include <sys/file.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/domain.h>
48 #include <sys/kernel.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/resourcevar.h>
54 #include <sys/pool.h>
55
56 struct pool socket_pool;
57
58 extern int somaxconn; /* patchable (XXX sysctl) */
59 int somaxconn = SOMAXCONN;
60
61 void
62 soinit(void)
63 {
64
65 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
66 "sockpl", 0, NULL, NULL, M_SOCKET);
67 }
68
69 /*
70 * Socket operation routines.
71 * These routines are called by the routines in
72 * sys_socket.c or from a system process, and
73 * implement the semantics of socket operations by
74 * switching out to the protocol specific routines.
75 */
76 /*ARGSUSED*/
77 int
78 socreate(int dom, struct socket **aso, int type, int proto)
79 {
80 struct proc *p;
81 struct protosw *prp;
82 struct socket *so;
83 int error, s;
84
85 p = curproc->l_proc; /* XXX */
86 if (proto)
87 prp = pffindproto(dom, proto, type);
88 else
89 prp = pffindtype(dom, type);
90 if (prp == 0 || prp->pr_usrreq == 0)
91 return (EPROTONOSUPPORT);
92 if (prp->pr_type != type)
93 return (EPROTOTYPE);
94 s = splsoftnet();
95 so = pool_get(&socket_pool, PR_WAITOK);
96 memset((caddr_t)so, 0, sizeof(*so));
97 TAILQ_INIT(&so->so_q0);
98 TAILQ_INIT(&so->so_q);
99 so->so_type = type;
100 so->so_proto = prp;
101 so->so_send = sosend;
102 so->so_receive = soreceive;
103 if (p != 0)
104 so->so_uid = p->p_ucred->cr_uid;
105 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
106 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
107 if (error) {
108 so->so_state |= SS_NOFDREF;
109 sofree(so);
110 splx(s);
111 return (error);
112 }
113 #ifdef COMPAT_SUNOS
114 {
115 extern struct emul emul_sunos;
116 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
117 so->so_options |= SO_BROADCAST;
118 }
119 #endif
120 splx(s);
121 *aso = so;
122 return (0);
123 }
124
125 int
126 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
127 {
128 int s, error;
129
130 s = splsoftnet();
131 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
132 nam, (struct mbuf *)0, p);
133 splx(s);
134 return (error);
135 }
136
137 int
138 solisten(struct socket *so, int backlog)
139 {
140 int s, error;
141
142 s = splsoftnet();
143 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
144 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
145 if (error) {
146 splx(s);
147 return (error);
148 }
149 if (so->so_q.tqh_first == NULL)
150 so->so_options |= SO_ACCEPTCONN;
151 if (backlog < 0)
152 backlog = 0;
153 so->so_qlimit = min(backlog, somaxconn);
154 splx(s);
155 return (0);
156 }
157
158 void
159 sofree(struct socket *so)
160 {
161
162 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
163 return;
164 if (so->so_head) {
165 /*
166 * We must not decommission a socket that's on the accept(2)
167 * queue. If we do, then accept(2) may hang after select(2)
168 * indicated that the listening socket was ready.
169 */
170 if (!soqremque(so, 0))
171 return;
172 }
173 sbrelease(&so->so_snd);
174 sorflush(so);
175 pool_put(&socket_pool, so);
176 }
177
178 /*
179 * Close a socket on last file table reference removal.
180 * Initiate disconnect if connected.
181 * Free socket when disconnect complete.
182 */
183 int
184 soclose(struct socket *so)
185 {
186 struct socket *so2;
187 int s, error;
188
189 error = 0;
190 s = splsoftnet(); /* conservative */
191 if (so->so_options & SO_ACCEPTCONN) {
192 while ((so2 = so->so_q0.tqh_first) != 0) {
193 (void) soqremque(so2, 0);
194 (void) soabort(so2);
195 }
196 while ((so2 = so->so_q.tqh_first) != 0) {
197 (void) soqremque(so2, 1);
198 (void) soabort(so2);
199 }
200 }
201 if (so->so_pcb == 0)
202 goto discard;
203 if (so->so_state & SS_ISCONNECTED) {
204 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
205 error = sodisconnect(so);
206 if (error)
207 goto drop;
208 }
209 if (so->so_options & SO_LINGER) {
210 if ((so->so_state & SS_ISDISCONNECTING) &&
211 (so->so_state & SS_NBIO))
212 goto drop;
213 while (so->so_state & SS_ISCONNECTED) {
214 error = tsleep((caddr_t)&so->so_timeo,
215 PSOCK | PCATCH, netcls,
216 so->so_linger * hz);
217 if (error)
218 break;
219 }
220 }
221 }
222 drop:
223 if (so->so_pcb) {
224 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
225 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
226 (struct proc *)0);
227 if (error == 0)
228 error = error2;
229 }
230 discard:
231 if (so->so_state & SS_NOFDREF)
232 panic("soclose: NOFDREF");
233 so->so_state |= SS_NOFDREF;
234 sofree(so);
235 splx(s);
236 return (error);
237 }
238
239 /*
240 * Must be called at splsoftnet...
241 */
242 int
243 soabort(struct socket *so)
244 {
245
246 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
247 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
248 }
249
250 int
251 soaccept(struct socket *so, struct mbuf *nam)
252 {
253 int s, error;
254
255 error = 0;
256 s = splsoftnet();
257 if ((so->so_state & SS_NOFDREF) == 0)
258 panic("soaccept: !NOFDREF");
259 so->so_state &= ~SS_NOFDREF;
260 if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
261 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
262 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
263 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
264 else
265 error = ECONNABORTED;
266
267 splx(s);
268 return (error);
269 }
270
271 int
272 soconnect(struct socket *so, struct mbuf *nam)
273 {
274 struct proc *p;
275 int s, error;
276
277 p = curproc->l_proc; /* XXX */
278 if (so->so_options & SO_ACCEPTCONN)
279 return (EOPNOTSUPP);
280 s = splsoftnet();
281 /*
282 * If protocol is connection-based, can only connect once.
283 * Otherwise, if connected, try to disconnect first.
284 * This allows user to disconnect by connecting to, e.g.,
285 * a null address.
286 */
287 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
288 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
289 (error = sodisconnect(so))))
290 error = EISCONN;
291 else
292 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
293 (struct mbuf *)0, nam, (struct mbuf *)0, p);
294 splx(s);
295 return (error);
296 }
297
298 int
299 soconnect2(struct socket *so1, struct socket *so2)
300 {
301 int s, error;
302
303 s = splsoftnet();
304 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
305 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
306 (struct proc *)0);
307 splx(s);
308 return (error);
309 }
310
311 int
312 sodisconnect(struct socket *so)
313 {
314 int s, error;
315
316 s = splsoftnet();
317 if ((so->so_state & SS_ISCONNECTED) == 0) {
318 error = ENOTCONN;
319 goto bad;
320 }
321 if (so->so_state & SS_ISDISCONNECTING) {
322 error = EALREADY;
323 goto bad;
324 }
325 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
326 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
327 (struct proc *)0);
328 bad:
329 splx(s);
330 return (error);
331 }
332
333 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
334 /*
335 * Send on a socket.
336 * If send must go all at once and message is larger than
337 * send buffering, then hard error.
338 * Lock against other senders.
339 * If must go all at once and not enough room now, then
340 * inform user that this would block and do nothing.
341 * Otherwise, if nonblocking, send as much as possible.
342 * The data to be sent is described by "uio" if nonzero,
343 * otherwise by the mbuf chain "top" (which must be null
344 * if uio is not). Data provided in mbuf chain must be small
345 * enough to send all at once.
346 *
347 * Returns nonzero on error, timeout or signal; callers
348 * must check for short counts if EINTR/ERESTART are returned.
349 * Data and control buffers are freed on return.
350 */
351 int
352 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
353 struct mbuf *control, int flags)
354 {
355 struct proc *p;
356 struct mbuf **mp, *m;
357 long space, len, resid;
358 int clen, error, s, dontroute, mlen, atomic;
359
360 p = curproc->l_proc; /* XXX */
361 clen = 0;
362 atomic = sosendallatonce(so) || top;
363 if (uio)
364 resid = uio->uio_resid;
365 else
366 resid = top->m_pkthdr.len;
367 /*
368 * In theory resid should be unsigned.
369 * However, space must be signed, as it might be less than 0
370 * if we over-committed, and we must use a signed comparison
371 * of space and resid. On the other hand, a negative resid
372 * causes us to loop sending 0-length segments to the protocol.
373 */
374 if (resid < 0) {
375 error = EINVAL;
376 goto out;
377 }
378 dontroute =
379 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
380 (so->so_proto->pr_flags & PR_ATOMIC);
381 p->p_stats->p_ru.ru_msgsnd++;
382 if (control)
383 clen = control->m_len;
384 #define snderr(errno) { error = errno; splx(s); goto release; }
385
386 restart:
387 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
388 goto out;
389 do {
390 s = splsoftnet();
391 if (so->so_state & SS_CANTSENDMORE)
392 snderr(EPIPE);
393 if (so->so_error) {
394 error = so->so_error;
395 so->so_error = 0;
396 splx(s);
397 goto release;
398 }
399 if ((so->so_state & SS_ISCONNECTED) == 0) {
400 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
401 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
402 !(resid == 0 && clen != 0))
403 snderr(ENOTCONN);
404 } else if (addr == 0)
405 snderr(EDESTADDRREQ);
406 }
407 space = sbspace(&so->so_snd);
408 if (flags & MSG_OOB)
409 space += 1024;
410 if ((atomic && resid > so->so_snd.sb_hiwat) ||
411 clen > so->so_snd.sb_hiwat)
412 snderr(EMSGSIZE);
413 if (space < resid + clen && uio &&
414 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
415 if (so->so_state & SS_NBIO)
416 snderr(EWOULDBLOCK);
417 sbunlock(&so->so_snd);
418 error = sbwait(&so->so_snd);
419 splx(s);
420 if (error)
421 goto out;
422 goto restart;
423 }
424 splx(s);
425 mp = ⊤
426 space -= clen;
427 do {
428 if (uio == NULL) {
429 /*
430 * Data is prepackaged in "top".
431 */
432 resid = 0;
433 if (flags & MSG_EOR)
434 top->m_flags |= M_EOR;
435 } else do {
436 if (top == 0) {
437 MGETHDR(m, M_WAIT, MT_DATA);
438 mlen = MHLEN;
439 m->m_pkthdr.len = 0;
440 m->m_pkthdr.rcvif = (struct ifnet *)0;
441 } else {
442 MGET(m, M_WAIT, MT_DATA);
443 mlen = MLEN;
444 }
445 if (resid >= MINCLSIZE && space >= MCLBYTES) {
446 MCLGET(m, M_WAIT);
447 if ((m->m_flags & M_EXT) == 0)
448 goto nopages;
449 mlen = MCLBYTES;
450 #ifdef MAPPED_MBUFS
451 len = min(MCLBYTES, resid);
452 #else
453 if (atomic && top == 0) {
454 len = min(MCLBYTES - max_hdr,
455 resid);
456 m->m_data += max_hdr;
457 } else
458 len = min(MCLBYTES, resid);
459 #endif
460 space -= len;
461 } else {
462 nopages:
463 len = min(min(mlen, resid), space);
464 space -= len;
465 /*
466 * For datagram protocols, leave room
467 * for protocol headers in first mbuf.
468 */
469 if (atomic && top == 0 && len < mlen)
470 MH_ALIGN(m, len);
471 }
472 error = uiomove(mtod(m, caddr_t), (int)len,
473 uio);
474 resid = uio->uio_resid;
475 m->m_len = len;
476 *mp = m;
477 top->m_pkthdr.len += len;
478 if (error)
479 goto release;
480 mp = &m->m_next;
481 if (resid <= 0) {
482 if (flags & MSG_EOR)
483 top->m_flags |= M_EOR;
484 break;
485 }
486 } while (space > 0 && atomic);
487
488 s = splsoftnet();
489
490 if (so->so_state & SS_CANTSENDMORE)
491 snderr(EPIPE);
492
493 if (dontroute)
494 so->so_options |= SO_DONTROUTE;
495 if (resid > 0)
496 so->so_state |= SS_MORETOCOME;
497 error = (*so->so_proto->pr_usrreq)(so,
498 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
499 top, addr, control, p);
500 if (dontroute)
501 so->so_options &= ~SO_DONTROUTE;
502 if (resid > 0)
503 so->so_state &= ~SS_MORETOCOME;
504 splx(s);
505
506 clen = 0;
507 control = 0;
508 top = 0;
509 mp = ⊤
510 if (error)
511 goto release;
512 } while (resid && space > 0);
513 } while (resid);
514
515 release:
516 sbunlock(&so->so_snd);
517 out:
518 if (top)
519 m_freem(top);
520 if (control)
521 m_freem(control);
522 return (error);
523 }
524
525 /*
526 * Implement receive operations on a socket.
527 * We depend on the way that records are added to the sockbuf
528 * by sbappend*. In particular, each record (mbufs linked through m_next)
529 * must begin with an address if the protocol so specifies,
530 * followed by an optional mbuf or mbufs containing ancillary data,
531 * and then zero or more mbufs of data.
532 * In order to avoid blocking network interrupts for the entire time here,
533 * we splx() while doing the actual copy to user space.
534 * Although the sockbuf is locked, new data may still be appended,
535 * and thus we must maintain consistency of the sockbuf during that time.
536 *
537 * The caller may receive the data as a single mbuf chain by supplying
538 * an mbuf **mp0 for use in returning the chain. The uio is then used
539 * only for the count in uio_resid.
540 */
541 int
542 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
543 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
544 {
545 struct mbuf *m, **mp;
546 int flags, len, error, s, offset, moff, type, orig_resid;
547 struct protosw *pr;
548 struct mbuf *nextrecord;
549
550 pr = so->so_proto;
551 mp = mp0;
552 type = 0;
553 orig_resid = uio->uio_resid;
554 if (paddr)
555 *paddr = 0;
556 if (controlp)
557 *controlp = 0;
558 if (flagsp)
559 flags = *flagsp &~ MSG_EOR;
560 else
561 flags = 0;
562 if (flags & MSG_OOB) {
563 m = m_get(M_WAIT, MT_DATA);
564 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
565 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
566 (struct proc *)0);
567 if (error)
568 goto bad;
569 do {
570 error = uiomove(mtod(m, caddr_t),
571 (int) min(uio->uio_resid, m->m_len), uio);
572 m = m_free(m);
573 } while (uio->uio_resid && error == 0 && m);
574 bad:
575 if (m)
576 m_freem(m);
577 return (error);
578 }
579 if (mp)
580 *mp = (struct mbuf *)0;
581 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
582 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
583 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
584
585 restart:
586 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
587 return (error);
588 s = splsoftnet();
589
590 m = so->so_rcv.sb_mb;
591 /*
592 * If we have less data than requested, block awaiting more
593 * (subject to any timeout) if:
594 * 1. the current count is less than the low water mark,
595 * 2. MSG_WAITALL is set, and it is possible to do the entire
596 * receive operation at once if we block (resid <= hiwat), or
597 * 3. MSG_DONTWAIT is not set.
598 * If MSG_WAITALL is set but resid is larger than the receive buffer,
599 * we have to do the receive in sections, and thus risk returning
600 * a short count if a timeout or signal occurs after we start.
601 */
602 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
603 so->so_rcv.sb_cc < uio->uio_resid) &&
604 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
605 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
606 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
607 #ifdef DIAGNOSTIC
608 if (m == 0 && so->so_rcv.sb_cc)
609 panic("receive 1");
610 #endif
611 if (so->so_error) {
612 if (m)
613 goto dontblock;
614 error = so->so_error;
615 if ((flags & MSG_PEEK) == 0)
616 so->so_error = 0;
617 goto release;
618 }
619 if (so->so_state & SS_CANTRCVMORE) {
620 if (m)
621 goto dontblock;
622 else
623 goto release;
624 }
625 for (; m; m = m->m_next)
626 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
627 m = so->so_rcv.sb_mb;
628 goto dontblock;
629 }
630 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
631 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
632 error = ENOTCONN;
633 goto release;
634 }
635 if (uio->uio_resid == 0)
636 goto release;
637 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
638 error = EWOULDBLOCK;
639 goto release;
640 }
641 sbunlock(&so->so_rcv);
642 error = sbwait(&so->so_rcv);
643 splx(s);
644 if (error)
645 return (error);
646 goto restart;
647 }
648 dontblock:
649 #ifdef notyet /* XXXX */
650 if (uio->uio_procp)
651 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
652 #endif
653 nextrecord = m->m_nextpkt;
654 if (pr->pr_flags & PR_ADDR) {
655 #ifdef DIAGNOSTIC
656 if (m->m_type != MT_SONAME)
657 panic("receive 1a");
658 #endif
659 orig_resid = 0;
660 if (flags & MSG_PEEK) {
661 if (paddr)
662 *paddr = m_copy(m, 0, m->m_len);
663 m = m->m_next;
664 } else {
665 sbfree(&so->so_rcv, m);
666 if (paddr) {
667 *paddr = m;
668 so->so_rcv.sb_mb = m->m_next;
669 m->m_next = 0;
670 m = so->so_rcv.sb_mb;
671 } else {
672 MFREE(m, so->so_rcv.sb_mb);
673 m = so->so_rcv.sb_mb;
674 }
675 }
676 }
677 while (m && m->m_type == MT_CONTROL && error == 0) {
678 if (flags & MSG_PEEK) {
679 if (controlp)
680 *controlp = m_copy(m, 0, m->m_len);
681 m = m->m_next;
682 } else {
683 sbfree(&so->so_rcv, m);
684 if (controlp) {
685 if (pr->pr_domain->dom_externalize &&
686 mtod(m, struct cmsghdr *)->cmsg_type ==
687 SCM_RIGHTS)
688 error = (*pr->pr_domain->dom_externalize)(m);
689 *controlp = m;
690 so->so_rcv.sb_mb = m->m_next;
691 m->m_next = 0;
692 m = so->so_rcv.sb_mb;
693 } else {
694 MFREE(m, so->so_rcv.sb_mb);
695 m = so->so_rcv.sb_mb;
696 }
697 }
698 if (controlp) {
699 orig_resid = 0;
700 controlp = &(*controlp)->m_next;
701 }
702 }
703 if (m) {
704 if ((flags & MSG_PEEK) == 0)
705 m->m_nextpkt = nextrecord;
706 type = m->m_type;
707 if (type == MT_OOBDATA)
708 flags |= MSG_OOB;
709 }
710 moff = 0;
711 offset = 0;
712 while (m && uio->uio_resid > 0 && error == 0) {
713 if (m->m_type == MT_OOBDATA) {
714 if (type != MT_OOBDATA)
715 break;
716 } else if (type == MT_OOBDATA)
717 break;
718 #ifdef DIAGNOSTIC
719 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
720 panic("receive 3");
721 #endif
722 so->so_state &= ~SS_RCVATMARK;
723 len = uio->uio_resid;
724 if (so->so_oobmark && len > so->so_oobmark - offset)
725 len = so->so_oobmark - offset;
726 if (len > m->m_len - moff)
727 len = m->m_len - moff;
728 /*
729 * If mp is set, just pass back the mbufs.
730 * Otherwise copy them out via the uio, then free.
731 * Sockbuf must be consistent here (points to current mbuf,
732 * it points to next record) when we drop priority;
733 * we must note any additions to the sockbuf when we
734 * block interrupts again.
735 */
736 if (mp == 0) {
737 splx(s);
738 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
739 s = splsoftnet();
740 } else
741 uio->uio_resid -= len;
742 if (len == m->m_len - moff) {
743 if (m->m_flags & M_EOR)
744 flags |= MSG_EOR;
745 if (flags & MSG_PEEK) {
746 m = m->m_next;
747 moff = 0;
748 } else {
749 nextrecord = m->m_nextpkt;
750 sbfree(&so->so_rcv, m);
751 if (mp) {
752 *mp = m;
753 mp = &m->m_next;
754 so->so_rcv.sb_mb = m = m->m_next;
755 *mp = (struct mbuf *)0;
756 } else {
757 MFREE(m, so->so_rcv.sb_mb);
758 m = so->so_rcv.sb_mb;
759 }
760 if (m)
761 m->m_nextpkt = nextrecord;
762 }
763 } else {
764 if (flags & MSG_PEEK)
765 moff += len;
766 else {
767 if (mp)
768 *mp = m_copym(m, 0, len, M_WAIT);
769 m->m_data += len;
770 m->m_len -= len;
771 so->so_rcv.sb_cc -= len;
772 }
773 }
774 if (so->so_oobmark) {
775 if ((flags & MSG_PEEK) == 0) {
776 so->so_oobmark -= len;
777 if (so->so_oobmark == 0) {
778 so->so_state |= SS_RCVATMARK;
779 break;
780 }
781 } else {
782 offset += len;
783 if (offset == so->so_oobmark)
784 break;
785 }
786 }
787 if (flags & MSG_EOR)
788 break;
789 /*
790 * If the MSG_WAITALL flag is set (for non-atomic socket),
791 * we must not quit until "uio->uio_resid == 0" or an error
792 * termination. If a signal/timeout occurs, return
793 * with a short count but without error.
794 * Keep sockbuf locked against other readers.
795 */
796 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
797 !sosendallatonce(so) && !nextrecord) {
798 if (so->so_error || so->so_state & SS_CANTRCVMORE)
799 break;
800 error = sbwait(&so->so_rcv);
801 if (error) {
802 sbunlock(&so->so_rcv);
803 splx(s);
804 return (0);
805 }
806 if ((m = so->so_rcv.sb_mb) != NULL)
807 nextrecord = m->m_nextpkt;
808 }
809 }
810
811 if (m && pr->pr_flags & PR_ATOMIC) {
812 flags |= MSG_TRUNC;
813 if ((flags & MSG_PEEK) == 0)
814 (void) sbdroprecord(&so->so_rcv);
815 }
816 if ((flags & MSG_PEEK) == 0) {
817 if (m == 0)
818 so->so_rcv.sb_mb = nextrecord;
819 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
820 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
821 (struct mbuf *)(long)flags, (struct mbuf *)0,
822 (struct proc *)0);
823 }
824 if (orig_resid == uio->uio_resid && orig_resid &&
825 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
826 sbunlock(&so->so_rcv);
827 splx(s);
828 goto restart;
829 }
830
831 if (flagsp)
832 *flagsp |= flags;
833 release:
834 sbunlock(&so->so_rcv);
835 splx(s);
836 return (error);
837 }
838
839 int
840 soshutdown(struct socket *so, int how)
841 {
842 struct protosw *pr;
843
844 pr = so->so_proto;
845 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
846 return (EINVAL);
847
848 if (how == SHUT_RD || how == SHUT_RDWR)
849 sorflush(so);
850 if (how == SHUT_WR || how == SHUT_RDWR)
851 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
852 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
853 return (0);
854 }
855
856 void
857 sorflush(struct socket *so)
858 {
859 struct sockbuf *sb, asb;
860 struct protosw *pr;
861 int s;
862
863 sb = &so->so_rcv;
864 pr = so->so_proto;
865 sb->sb_flags |= SB_NOINTR;
866 (void) sblock(sb, M_WAITOK);
867 s = splnet();
868 socantrcvmore(so);
869 sbunlock(sb);
870 asb = *sb;
871 memset((caddr_t)sb, 0, sizeof(*sb));
872 splx(s);
873 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
874 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
875 sbrelease(&asb);
876 }
877
878 int
879 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0)
880 {
881 int error;
882 struct mbuf *m;
883
884 error = 0;
885 m = m0;
886 if (level != SOL_SOCKET) {
887 if (so->so_proto && so->so_proto->pr_ctloutput)
888 return ((*so->so_proto->pr_ctloutput)
889 (PRCO_SETOPT, so, level, optname, &m0));
890 error = ENOPROTOOPT;
891 } else {
892 switch (optname) {
893
894 case SO_LINGER:
895 if (m == NULL || m->m_len != sizeof(struct linger)) {
896 error = EINVAL;
897 goto bad;
898 }
899 so->so_linger = mtod(m, struct linger *)->l_linger;
900 /* fall thru... */
901
902 case SO_DEBUG:
903 case SO_KEEPALIVE:
904 case SO_DONTROUTE:
905 case SO_USELOOPBACK:
906 case SO_BROADCAST:
907 case SO_REUSEADDR:
908 case SO_REUSEPORT:
909 case SO_OOBINLINE:
910 case SO_TIMESTAMP:
911 if (m == NULL || m->m_len < sizeof(int)) {
912 error = EINVAL;
913 goto bad;
914 }
915 if (*mtod(m, int *))
916 so->so_options |= optname;
917 else
918 so->so_options &= ~optname;
919 break;
920
921 case SO_SNDBUF:
922 case SO_RCVBUF:
923 case SO_SNDLOWAT:
924 case SO_RCVLOWAT:
925 {
926 int optval;
927
928 if (m == NULL || m->m_len < sizeof(int)) {
929 error = EINVAL;
930 goto bad;
931 }
932
933 /*
934 * Values < 1 make no sense for any of these
935 * options, so disallow them.
936 */
937 optval = *mtod(m, int *);
938 if (optval < 1) {
939 error = EINVAL;
940 goto bad;
941 }
942
943 switch (optname) {
944
945 case SO_SNDBUF:
946 case SO_RCVBUF:
947 if (sbreserve(optname == SO_SNDBUF ?
948 &so->so_snd : &so->so_rcv,
949 (u_long) optval) == 0) {
950 error = ENOBUFS;
951 goto bad;
952 }
953 break;
954
955 /*
956 * Make sure the low-water is never greater than
957 * the high-water.
958 */
959 case SO_SNDLOWAT:
960 so->so_snd.sb_lowat =
961 (optval > so->so_snd.sb_hiwat) ?
962 so->so_snd.sb_hiwat : optval;
963 break;
964 case SO_RCVLOWAT:
965 so->so_rcv.sb_lowat =
966 (optval > so->so_rcv.sb_hiwat) ?
967 so->so_rcv.sb_hiwat : optval;
968 break;
969 }
970 break;
971 }
972
973 case SO_SNDTIMEO:
974 case SO_RCVTIMEO:
975 {
976 struct timeval *tv;
977 short val;
978
979 if (m == NULL || m->m_len < sizeof(*tv)) {
980 error = EINVAL;
981 goto bad;
982 }
983 tv = mtod(m, struct timeval *);
984 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
985 error = EDOM;
986 goto bad;
987 }
988 val = tv->tv_sec * hz + tv->tv_usec / tick;
989
990 switch (optname) {
991
992 case SO_SNDTIMEO:
993 so->so_snd.sb_timeo = val;
994 break;
995 case SO_RCVTIMEO:
996 so->so_rcv.sb_timeo = val;
997 break;
998 }
999 break;
1000 }
1001
1002 default:
1003 error = ENOPROTOOPT;
1004 break;
1005 }
1006 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1007 (void) ((*so->so_proto->pr_ctloutput)
1008 (PRCO_SETOPT, so, level, optname, &m0));
1009 m = NULL; /* freed by protocol */
1010 }
1011 }
1012 bad:
1013 if (m)
1014 (void) m_free(m);
1015 return (error);
1016 }
1017
1018 int
1019 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp)
1020 {
1021 struct mbuf *m;
1022
1023 if (level != SOL_SOCKET) {
1024 if (so->so_proto && so->so_proto->pr_ctloutput) {
1025 return ((*so->so_proto->pr_ctloutput)
1026 (PRCO_GETOPT, so, level, optname, mp));
1027 } else
1028 return (ENOPROTOOPT);
1029 } else {
1030 m = m_get(M_WAIT, MT_SOOPTS);
1031 m->m_len = sizeof(int);
1032
1033 switch (optname) {
1034
1035 case SO_LINGER:
1036 m->m_len = sizeof(struct linger);
1037 mtod(m, struct linger *)->l_onoff =
1038 so->so_options & SO_LINGER;
1039 mtod(m, struct linger *)->l_linger = so->so_linger;
1040 break;
1041
1042 case SO_USELOOPBACK:
1043 case SO_DONTROUTE:
1044 case SO_DEBUG:
1045 case SO_KEEPALIVE:
1046 case SO_REUSEADDR:
1047 case SO_REUSEPORT:
1048 case SO_BROADCAST:
1049 case SO_OOBINLINE:
1050 case SO_TIMESTAMP:
1051 *mtod(m, int *) = so->so_options & optname;
1052 break;
1053
1054 case SO_TYPE:
1055 *mtod(m, int *) = so->so_type;
1056 break;
1057
1058 case SO_ERROR:
1059 *mtod(m, int *) = so->so_error;
1060 so->so_error = 0;
1061 break;
1062
1063 case SO_SNDBUF:
1064 *mtod(m, int *) = so->so_snd.sb_hiwat;
1065 break;
1066
1067 case SO_RCVBUF:
1068 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1069 break;
1070
1071 case SO_SNDLOWAT:
1072 *mtod(m, int *) = so->so_snd.sb_lowat;
1073 break;
1074
1075 case SO_RCVLOWAT:
1076 *mtod(m, int *) = so->so_rcv.sb_lowat;
1077 break;
1078
1079 case SO_SNDTIMEO:
1080 case SO_RCVTIMEO:
1081 {
1082 int val = (optname == SO_SNDTIMEO ?
1083 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1084
1085 m->m_len = sizeof(struct timeval);
1086 mtod(m, struct timeval *)->tv_sec = val / hz;
1087 mtod(m, struct timeval *)->tv_usec =
1088 (val % hz) * tick;
1089 break;
1090 }
1091
1092 default:
1093 (void)m_free(m);
1094 return (ENOPROTOOPT);
1095 }
1096 *mp = m;
1097 return (0);
1098 }
1099 }
1100
1101 void
1102 sohasoutofband(struct socket *so)
1103 {
1104 struct proc *p;
1105
1106 if (so->so_pgid < 0)
1107 gsignal(-so->so_pgid, SIGURG);
1108 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1109 psignal(p, SIGURG);
1110 selwakeup(&so->so_rcv.sb_sel);
1111 }
1112