uipc_socket.c revision 1.43 1 /* $NetBSD: uipc_socket.c,v 1.43 1999/01/21 22:09:10 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include "opt_compat_sunos.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/kernel.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/resourcevar.h>
53 #include <sys/pool.h>
54
55 struct pool socket_pool;
56
57 void
58 soinit()
59 {
60
61 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
62 "sockpl", 0, NULL, NULL, M_SOCKET);
63 }
64
65 /*
66 * Socket operation routines.
67 * These routines are called by the routines in
68 * sys_socket.c or from a system process, and
69 * implement the semantics of socket operations by
70 * switching out to the protocol specific routines.
71 */
72 /*ARGSUSED*/
73 int
74 socreate(dom, aso, type, proto)
75 int dom;
76 struct socket **aso;
77 register int type;
78 int proto;
79 {
80 struct proc *p = curproc; /* XXX */
81 register struct protosw *prp;
82 register struct socket *so;
83 register int error;
84 int s;
85
86 if (proto)
87 prp = pffindproto(dom, proto, type);
88 else
89 prp = pffindtype(dom, type);
90 if (prp == 0 || prp->pr_usrreq == 0)
91 return (EPROTONOSUPPORT);
92 if (prp->pr_type != type)
93 return (EPROTOTYPE);
94 s = splsoftnet();
95 so = pool_get(&socket_pool, PR_WAITOK);
96 memset((caddr_t)so, 0, sizeof(*so));
97 TAILQ_INIT(&so->so_q0);
98 TAILQ_INIT(&so->so_q);
99 so->so_type = type;
100 so->so_proto = prp;
101 so->so_send = sosend;
102 so->so_receive = soreceive;
103 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
104 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
105 if (error) {
106 so->so_state |= SS_NOFDREF;
107 sofree(so);
108 splx(s);
109 return (error);
110 }
111 #ifdef COMPAT_SUNOS
112 {
113 extern struct emul emul_sunos;
114 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
115 so->so_options |= SO_BROADCAST;
116 }
117 #endif
118 splx(s);
119 *aso = so;
120 return (0);
121 }
122
123 int
124 sobind(so, nam)
125 struct socket *so;
126 struct mbuf *nam;
127 {
128 struct proc *p = curproc; /* XXX */
129 int s = splsoftnet();
130 int error;
131
132 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
133 nam, (struct mbuf *)0, p);
134 splx(s);
135 return (error);
136 }
137
138 int
139 solisten(so, backlog)
140 register struct socket *so;
141 int backlog;
142 {
143 int s = splsoftnet(), error;
144
145 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
146 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
147 if (error) {
148 splx(s);
149 return (error);
150 }
151 if (so->so_q.tqh_first == NULL)
152 so->so_options |= SO_ACCEPTCONN;
153 if (backlog < 0)
154 backlog = 0;
155 so->so_qlimit = min(backlog, SOMAXCONN);
156 splx(s);
157 return (0);
158 }
159
160 void
161 sofree(so)
162 register struct socket *so;
163 {
164
165 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
166 return;
167 if (so->so_head) {
168 /*
169 * We must not decommission a socket that's on the accept(2)
170 * queue. If we do, then accept(2) may hang after select(2)
171 * indicated that the listening socket was ready.
172 */
173 if (!soqremque(so, 0))
174 return;
175 }
176 sbrelease(&so->so_snd);
177 sorflush(so);
178 pool_put(&socket_pool, so);
179 }
180
181 /*
182 * Close a socket on last file table reference removal.
183 * Initiate disconnect if connected.
184 * Free socket when disconnect complete.
185 */
186 int
187 soclose(so)
188 register struct socket *so;
189 {
190 struct socket *so2;
191 int s = splsoftnet(); /* conservative */
192 int error = 0;
193
194 if (so->so_options & SO_ACCEPTCONN) {
195 while ((so2 = so->so_q0.tqh_first) != 0) {
196 (void) soqremque(so2, 0);
197 (void) soabort(so2);
198 }
199 while ((so2 = so->so_q.tqh_first) != 0) {
200 (void) soqremque(so2, 1);
201 (void) soabort(so2);
202 }
203 }
204 if (so->so_pcb == 0)
205 goto discard;
206 if (so->so_state & SS_ISCONNECTED) {
207 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
208 error = sodisconnect(so);
209 if (error)
210 goto drop;
211 }
212 if (so->so_options & SO_LINGER) {
213 if ((so->so_state & SS_ISDISCONNECTING) &&
214 (so->so_state & SS_NBIO))
215 goto drop;
216 while (so->so_state & SS_ISCONNECTED) {
217 error = tsleep((caddr_t)&so->so_timeo,
218 PSOCK | PCATCH, netcls,
219 so->so_linger * hz);
220 if (error)
221 break;
222 }
223 }
224 }
225 drop:
226 if (so->so_pcb) {
227 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
228 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
229 (struct proc *)0);
230 if (error == 0)
231 error = error2;
232 }
233 discard:
234 if (so->so_state & SS_NOFDREF)
235 panic("soclose: NOFDREF");
236 so->so_state |= SS_NOFDREF;
237 sofree(so);
238 splx(s);
239 return (error);
240 }
241
242 /*
243 * Must be called at splsoftnet...
244 */
245 int
246 soabort(so)
247 struct socket *so;
248 {
249
250 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
251 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
252 }
253
254 int
255 soaccept(so, nam)
256 register struct socket *so;
257 struct mbuf *nam;
258 {
259 int s = splsoftnet();
260 int error;
261
262 if ((so->so_state & SS_NOFDREF) == 0)
263 panic("soaccept: !NOFDREF");
264 so->so_state &= ~SS_NOFDREF;
265 if ((so->so_state & SS_ISDISCONNECTED) == 0)
266 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
267 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
268 else
269 error = 0;
270 splx(s);
271 return (error);
272 }
273
274 int
275 soconnect(so, nam)
276 register struct socket *so;
277 struct mbuf *nam;
278 {
279 struct proc *p = curproc; /* XXX */
280 int s;
281 int error;
282
283 if (so->so_options & SO_ACCEPTCONN)
284 return (EOPNOTSUPP);
285 s = splsoftnet();
286 /*
287 * If protocol is connection-based, can only connect once.
288 * Otherwise, if connected, try to disconnect first.
289 * This allows user to disconnect by connecting to, e.g.,
290 * a null address.
291 */
292 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
293 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
294 (error = sodisconnect(so))))
295 error = EISCONN;
296 else
297 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
298 (struct mbuf *)0, nam, (struct mbuf *)0, p);
299 splx(s);
300 return (error);
301 }
302
303 int
304 soconnect2(so1, so2)
305 register struct socket *so1;
306 struct socket *so2;
307 {
308 int s = splsoftnet();
309 int error;
310
311 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
312 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
313 (struct proc *)0);
314 splx(s);
315 return (error);
316 }
317
318 int
319 sodisconnect(so)
320 register struct socket *so;
321 {
322 int s = splsoftnet();
323 int error;
324
325 if ((so->so_state & SS_ISCONNECTED) == 0) {
326 error = ENOTCONN;
327 goto bad;
328 }
329 if (so->so_state & SS_ISDISCONNECTING) {
330 error = EALREADY;
331 goto bad;
332 }
333 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
334 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
335 (struct proc *)0);
336 bad:
337 splx(s);
338 return (error);
339 }
340
341 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
342 /*
343 * Send on a socket.
344 * If send must go all at once and message is larger than
345 * send buffering, then hard error.
346 * Lock against other senders.
347 * If must go all at once and not enough room now, then
348 * inform user that this would block and do nothing.
349 * Otherwise, if nonblocking, send as much as possible.
350 * The data to be sent is described by "uio" if nonzero,
351 * otherwise by the mbuf chain "top" (which must be null
352 * if uio is not). Data provided in mbuf chain must be small
353 * enough to send all at once.
354 *
355 * Returns nonzero on error, timeout or signal; callers
356 * must check for short counts if EINTR/ERESTART are returned.
357 * Data and control buffers are freed on return.
358 */
359 int
360 sosend(so, addr, uio, top, control, flags)
361 register struct socket *so;
362 struct mbuf *addr;
363 struct uio *uio;
364 struct mbuf *top;
365 struct mbuf *control;
366 int flags;
367 {
368 struct proc *p = curproc; /* XXX */
369 struct mbuf **mp;
370 register struct mbuf *m;
371 register long space, len, resid;
372 int clen = 0, error, s, dontroute, mlen;
373 int atomic = sosendallatonce(so) || top;
374
375 if (uio)
376 resid = uio->uio_resid;
377 else
378 resid = top->m_pkthdr.len;
379 /*
380 * In theory resid should be unsigned.
381 * However, space must be signed, as it might be less than 0
382 * if we over-committed, and we must use a signed comparison
383 * of space and resid. On the other hand, a negative resid
384 * causes us to loop sending 0-length segments to the protocol.
385 */
386 if (resid < 0) {
387 error = EINVAL;
388 goto out;
389 }
390 dontroute =
391 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
392 (so->so_proto->pr_flags & PR_ATOMIC);
393 p->p_stats->p_ru.ru_msgsnd++;
394 if (control)
395 clen = control->m_len;
396 #define snderr(errno) { error = errno; splx(s); goto release; }
397
398 restart:
399 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
400 goto out;
401 do {
402 s = splsoftnet();
403 if (so->so_state & SS_CANTSENDMORE)
404 snderr(EPIPE);
405 if (so->so_error)
406 snderr(so->so_error);
407 if ((so->so_state & SS_ISCONNECTED) == 0) {
408 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
409 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
410 !(resid == 0 && clen != 0))
411 snderr(ENOTCONN);
412 } else if (addr == 0)
413 snderr(EDESTADDRREQ);
414 }
415 space = sbspace(&so->so_snd);
416 if (flags & MSG_OOB)
417 space += 1024;
418 if ((atomic && resid > so->so_snd.sb_hiwat) ||
419 clen > so->so_snd.sb_hiwat)
420 snderr(EMSGSIZE);
421 if (space < resid + clen && uio &&
422 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
423 if (so->so_state & SS_NBIO)
424 snderr(EWOULDBLOCK);
425 sbunlock(&so->so_snd);
426 error = sbwait(&so->so_snd);
427 splx(s);
428 if (error)
429 goto out;
430 goto restart;
431 }
432 splx(s);
433 mp = ⊤
434 space -= clen;
435 do {
436 if (uio == NULL) {
437 /*
438 * Data is prepackaged in "top".
439 */
440 resid = 0;
441 if (flags & MSG_EOR)
442 top->m_flags |= M_EOR;
443 } else do {
444 if (top == 0) {
445 MGETHDR(m, M_WAIT, MT_DATA);
446 mlen = MHLEN;
447 m->m_pkthdr.len = 0;
448 m->m_pkthdr.rcvif = (struct ifnet *)0;
449 } else {
450 MGET(m, M_WAIT, MT_DATA);
451 mlen = MLEN;
452 }
453 if (resid >= MINCLSIZE && space >= MCLBYTES) {
454 MCLGET(m, M_WAIT);
455 if ((m->m_flags & M_EXT) == 0)
456 goto nopages;
457 mlen = MCLBYTES;
458 #ifdef MAPPED_MBUFS
459 len = min(MCLBYTES, resid);
460 #else
461 if (atomic && top == 0) {
462 len = min(MCLBYTES - max_hdr, resid);
463 m->m_data += max_hdr;
464 } else
465 len = min(MCLBYTES, resid);
466 #endif
467 space -= len;
468 } else {
469 nopages:
470 len = min(min(mlen, resid), space);
471 space -= len;
472 /*
473 * For datagram protocols, leave room
474 * for protocol headers in first mbuf.
475 */
476 if (atomic && top == 0 && len < mlen)
477 MH_ALIGN(m, len);
478 }
479 error = uiomove(mtod(m, caddr_t), (int)len, uio);
480 resid = uio->uio_resid;
481 m->m_len = len;
482 *mp = m;
483 top->m_pkthdr.len += len;
484 if (error)
485 goto release;
486 mp = &m->m_next;
487 if (resid <= 0) {
488 if (flags & MSG_EOR)
489 top->m_flags |= M_EOR;
490 break;
491 }
492 } while (space > 0 && atomic);
493 if (dontroute)
494 so->so_options |= SO_DONTROUTE;
495 if (resid > 0)
496 so->so_state |= SS_MORETOCOME;
497 s = splsoftnet(); /* XXX */
498 error = (*so->so_proto->pr_usrreq)(so,
499 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
500 top, addr, control, p);
501 splx(s);
502 if (dontroute)
503 so->so_options &= ~SO_DONTROUTE;
504 if (resid > 0)
505 so->so_state &= ~SS_MORETOCOME;
506 clen = 0;
507 control = 0;
508 top = 0;
509 mp = ⊤
510 if (error)
511 goto release;
512 } while (resid && space > 0);
513 } while (resid);
514
515 release:
516 sbunlock(&so->so_snd);
517 out:
518 if (top)
519 m_freem(top);
520 if (control)
521 m_freem(control);
522 return (error);
523 }
524
525 /*
526 * Implement receive operations on a socket.
527 * We depend on the way that records are added to the sockbuf
528 * by sbappend*. In particular, each record (mbufs linked through m_next)
529 * must begin with an address if the protocol so specifies,
530 * followed by an optional mbuf or mbufs containing ancillary data,
531 * and then zero or more mbufs of data.
532 * In order to avoid blocking network interrupts for the entire time here,
533 * we splx() while doing the actual copy to user space.
534 * Although the sockbuf is locked, new data may still be appended,
535 * and thus we must maintain consistency of the sockbuf during that time.
536 *
537 * The caller may receive the data as a single mbuf chain by supplying
538 * an mbuf **mp0 for use in returning the chain. The uio is then used
539 * only for the count in uio_resid.
540 */
541 int
542 soreceive(so, paddr, uio, mp0, controlp, flagsp)
543 register struct socket *so;
544 struct mbuf **paddr;
545 struct uio *uio;
546 struct mbuf **mp0;
547 struct mbuf **controlp;
548 int *flagsp;
549 {
550 register struct mbuf *m, **mp;
551 register int flags, len, error, s, offset;
552 struct protosw *pr = so->so_proto;
553 struct mbuf *nextrecord;
554 int moff, type = 0;
555 int orig_resid = uio->uio_resid;
556
557 mp = mp0;
558 if (paddr)
559 *paddr = 0;
560 if (controlp)
561 *controlp = 0;
562 if (flagsp)
563 flags = *flagsp &~ MSG_EOR;
564 else
565 flags = 0;
566 if (flags & MSG_OOB) {
567 m = m_get(M_WAIT, MT_DATA);
568 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
569 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
570 (struct proc *)0);
571 if (error)
572 goto bad;
573 do {
574 error = uiomove(mtod(m, caddr_t),
575 (int) min(uio->uio_resid, m->m_len), uio);
576 m = m_free(m);
577 } while (uio->uio_resid && error == 0 && m);
578 bad:
579 if (m)
580 m_freem(m);
581 return (error);
582 }
583 if (mp)
584 *mp = (struct mbuf *)0;
585 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
586 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
587 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
588
589 restart:
590 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
591 return (error);
592 s = splsoftnet();
593
594 m = so->so_rcv.sb_mb;
595 /*
596 * If we have less data than requested, block awaiting more
597 * (subject to any timeout) if:
598 * 1. the current count is less than the low water mark,
599 * 2. MSG_WAITALL is set, and it is possible to do the entire
600 * receive operation at once if we block (resid <= hiwat), or
601 * 3. MSG_DONTWAIT is not set.
602 * If MSG_WAITALL is set but resid is larger than the receive buffer,
603 * we have to do the receive in sections, and thus risk returning
604 * a short count if a timeout or signal occurs after we start.
605 */
606 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
607 so->so_rcv.sb_cc < uio->uio_resid) &&
608 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
609 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
610 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
611 #ifdef DIAGNOSTIC
612 if (m == 0 && so->so_rcv.sb_cc)
613 panic("receive 1");
614 #endif
615 if (so->so_error) {
616 if (m)
617 goto dontblock;
618 error = so->so_error;
619 if ((flags & MSG_PEEK) == 0)
620 so->so_error = 0;
621 goto release;
622 }
623 if (so->so_state & SS_CANTRCVMORE) {
624 if (m)
625 goto dontblock;
626 else
627 goto release;
628 }
629 for (; m; m = m->m_next)
630 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
631 m = so->so_rcv.sb_mb;
632 goto dontblock;
633 }
634 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
635 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
636 error = ENOTCONN;
637 goto release;
638 }
639 if (uio->uio_resid == 0)
640 goto release;
641 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
642 error = EWOULDBLOCK;
643 goto release;
644 }
645 sbunlock(&so->so_rcv);
646 error = sbwait(&so->so_rcv);
647 splx(s);
648 if (error)
649 return (error);
650 goto restart;
651 }
652 dontblock:
653 #ifdef notyet /* XXXX */
654 if (uio->uio_procp)
655 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
656 #endif
657 nextrecord = m->m_nextpkt;
658 if (pr->pr_flags & PR_ADDR) {
659 #ifdef DIAGNOSTIC
660 if (m->m_type != MT_SONAME)
661 panic("receive 1a");
662 #endif
663 orig_resid = 0;
664 if (flags & MSG_PEEK) {
665 if (paddr)
666 *paddr = m_copy(m, 0, m->m_len);
667 m = m->m_next;
668 } else {
669 sbfree(&so->so_rcv, m);
670 if (paddr) {
671 *paddr = m;
672 so->so_rcv.sb_mb = m->m_next;
673 m->m_next = 0;
674 m = so->so_rcv.sb_mb;
675 } else {
676 MFREE(m, so->so_rcv.sb_mb);
677 m = so->so_rcv.sb_mb;
678 }
679 }
680 }
681 while (m && m->m_type == MT_CONTROL && error == 0) {
682 if (flags & MSG_PEEK) {
683 if (controlp)
684 *controlp = m_copy(m, 0, m->m_len);
685 m = m->m_next;
686 } else {
687 sbfree(&so->so_rcv, m);
688 if (controlp) {
689 if (pr->pr_domain->dom_externalize &&
690 mtod(m, struct cmsghdr *)->cmsg_type ==
691 SCM_RIGHTS)
692 error = (*pr->pr_domain->dom_externalize)(m);
693 *controlp = m;
694 so->so_rcv.sb_mb = m->m_next;
695 m->m_next = 0;
696 m = so->so_rcv.sb_mb;
697 } else {
698 MFREE(m, so->so_rcv.sb_mb);
699 m = so->so_rcv.sb_mb;
700 }
701 }
702 if (controlp) {
703 orig_resid = 0;
704 controlp = &(*controlp)->m_next;
705 }
706 }
707 if (m) {
708 if ((flags & MSG_PEEK) == 0)
709 m->m_nextpkt = nextrecord;
710 type = m->m_type;
711 if (type == MT_OOBDATA)
712 flags |= MSG_OOB;
713 }
714 moff = 0;
715 offset = 0;
716 while (m && uio->uio_resid > 0 && error == 0) {
717 if (m->m_type == MT_OOBDATA) {
718 if (type != MT_OOBDATA)
719 break;
720 } else if (type == MT_OOBDATA)
721 break;
722 #ifdef DIAGNOSTIC
723 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
724 panic("receive 3");
725 #endif
726 so->so_state &= ~SS_RCVATMARK;
727 len = uio->uio_resid;
728 if (so->so_oobmark && len > so->so_oobmark - offset)
729 len = so->so_oobmark - offset;
730 if (len > m->m_len - moff)
731 len = m->m_len - moff;
732 /*
733 * If mp is set, just pass back the mbufs.
734 * Otherwise copy them out via the uio, then free.
735 * Sockbuf must be consistent here (points to current mbuf,
736 * it points to next record) when we drop priority;
737 * we must note any additions to the sockbuf when we
738 * block interrupts again.
739 */
740 if (mp == 0) {
741 splx(s);
742 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
743 s = splsoftnet();
744 } else
745 uio->uio_resid -= len;
746 if (len == m->m_len - moff) {
747 if (m->m_flags & M_EOR)
748 flags |= MSG_EOR;
749 if (flags & MSG_PEEK) {
750 m = m->m_next;
751 moff = 0;
752 } else {
753 nextrecord = m->m_nextpkt;
754 sbfree(&so->so_rcv, m);
755 if (mp) {
756 *mp = m;
757 mp = &m->m_next;
758 so->so_rcv.sb_mb = m = m->m_next;
759 *mp = (struct mbuf *)0;
760 } else {
761 MFREE(m, so->so_rcv.sb_mb);
762 m = so->so_rcv.sb_mb;
763 }
764 if (m)
765 m->m_nextpkt = nextrecord;
766 }
767 } else {
768 if (flags & MSG_PEEK)
769 moff += len;
770 else {
771 if (mp)
772 *mp = m_copym(m, 0, len, M_WAIT);
773 m->m_data += len;
774 m->m_len -= len;
775 so->so_rcv.sb_cc -= len;
776 }
777 }
778 if (so->so_oobmark) {
779 if ((flags & MSG_PEEK) == 0) {
780 so->so_oobmark -= len;
781 if (so->so_oobmark == 0) {
782 so->so_state |= SS_RCVATMARK;
783 break;
784 }
785 } else {
786 offset += len;
787 if (offset == so->so_oobmark)
788 break;
789 }
790 }
791 if (flags & MSG_EOR)
792 break;
793 /*
794 * If the MSG_WAITALL flag is set (for non-atomic socket),
795 * we must not quit until "uio->uio_resid == 0" or an error
796 * termination. If a signal/timeout occurs, return
797 * with a short count but without error.
798 * Keep sockbuf locked against other readers.
799 */
800 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
801 !sosendallatonce(so) && !nextrecord) {
802 if (so->so_error || so->so_state & SS_CANTRCVMORE)
803 break;
804 error = sbwait(&so->so_rcv);
805 if (error) {
806 sbunlock(&so->so_rcv);
807 splx(s);
808 return (0);
809 }
810 if ((m = so->so_rcv.sb_mb) != NULL)
811 nextrecord = m->m_nextpkt;
812 }
813 }
814
815 if (m && pr->pr_flags & PR_ATOMIC) {
816 flags |= MSG_TRUNC;
817 if ((flags & MSG_PEEK) == 0)
818 (void) sbdroprecord(&so->so_rcv);
819 }
820 if ((flags & MSG_PEEK) == 0) {
821 if (m == 0)
822 so->so_rcv.sb_mb = nextrecord;
823 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
824 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
825 (struct mbuf *)(long)flags, (struct mbuf *)0,
826 (struct proc *)0);
827 }
828 if (orig_resid == uio->uio_resid && orig_resid &&
829 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
830 sbunlock(&so->so_rcv);
831 splx(s);
832 goto restart;
833 }
834
835 if (flagsp)
836 *flagsp |= flags;
837 release:
838 sbunlock(&so->so_rcv);
839 splx(s);
840 return (error);
841 }
842
843 int
844 soshutdown(so, how)
845 struct socket *so;
846 int how;
847 {
848 struct protosw *pr = so->so_proto;
849
850 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
851 return (EINVAL);
852
853 if (how == SHUT_RD || how == SHUT_RDWR)
854 sorflush(so);
855 if (how == SHUT_WR || how == SHUT_RDWR)
856 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
857 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
858 return (0);
859 }
860
861 void
862 sorflush(so)
863 register struct socket *so;
864 {
865 register struct sockbuf *sb = &so->so_rcv;
866 register struct protosw *pr = so->so_proto;
867 register int s;
868 struct sockbuf asb;
869
870 sb->sb_flags |= SB_NOINTR;
871 (void) sblock(sb, M_WAITOK);
872 s = splimp();
873 socantrcvmore(so);
874 sbunlock(sb);
875 asb = *sb;
876 memset((caddr_t)sb, 0, sizeof(*sb));
877 splx(s);
878 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
879 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
880 sbrelease(&asb);
881 }
882
883 int
884 sosetopt(so, level, optname, m0)
885 register struct socket *so;
886 int level, optname;
887 struct mbuf *m0;
888 {
889 int error = 0;
890 register struct mbuf *m = m0;
891
892 if (level != SOL_SOCKET) {
893 if (so->so_proto && so->so_proto->pr_ctloutput)
894 return ((*so->so_proto->pr_ctloutput)
895 (PRCO_SETOPT, so, level, optname, &m0));
896 error = ENOPROTOOPT;
897 } else {
898 switch (optname) {
899
900 case SO_LINGER:
901 if (m == NULL || m->m_len != sizeof(struct linger)) {
902 error = EINVAL;
903 goto bad;
904 }
905 so->so_linger = mtod(m, struct linger *)->l_linger;
906 /* fall thru... */
907
908 case SO_DEBUG:
909 case SO_KEEPALIVE:
910 case SO_DONTROUTE:
911 case SO_USELOOPBACK:
912 case SO_BROADCAST:
913 case SO_REUSEADDR:
914 case SO_REUSEPORT:
915 case SO_OOBINLINE:
916 case SO_TIMESTAMP:
917 if (m == NULL || m->m_len < sizeof(int)) {
918 error = EINVAL;
919 goto bad;
920 }
921 if (*mtod(m, int *))
922 so->so_options |= optname;
923 else
924 so->so_options &= ~optname;
925 break;
926
927 case SO_SNDBUF:
928 case SO_RCVBUF:
929 case SO_SNDLOWAT:
930 case SO_RCVLOWAT:
931 {
932 int optval;
933
934 if (m == NULL || m->m_len < sizeof(int)) {
935 error = EINVAL;
936 goto bad;
937 }
938
939 /*
940 * Values < 1 make no sense for any of these
941 * options, so disallow them.
942 */
943 optval = *mtod(m, int *);
944 if (optval < 1) {
945 error = EINVAL;
946 goto bad;
947 }
948
949 switch (optname) {
950
951 case SO_SNDBUF:
952 case SO_RCVBUF:
953 if (sbreserve(optname == SO_SNDBUF ?
954 &so->so_snd : &so->so_rcv,
955 (u_long) optval) == 0) {
956 error = ENOBUFS;
957 goto bad;
958 }
959 break;
960
961 /*
962 * Make sure the low-water is never greater than
963 * the high-water.
964 */
965 case SO_SNDLOWAT:
966 so->so_snd.sb_lowat =
967 (optval > so->so_snd.sb_hiwat) ?
968 so->so_snd.sb_hiwat : optval;
969 break;
970 case SO_RCVLOWAT:
971 so->so_rcv.sb_lowat =
972 (optval > so->so_rcv.sb_hiwat) ?
973 so->so_rcv.sb_hiwat : optval;
974 break;
975 }
976 break;
977 }
978
979 case SO_SNDTIMEO:
980 case SO_RCVTIMEO:
981 {
982 struct timeval *tv;
983 short val;
984
985 if (m == NULL || m->m_len < sizeof(*tv)) {
986 error = EINVAL;
987 goto bad;
988 }
989 tv = mtod(m, struct timeval *);
990 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
991 error = EDOM;
992 goto bad;
993 }
994 val = tv->tv_sec * hz + tv->tv_usec / tick;
995
996 switch (optname) {
997
998 case SO_SNDTIMEO:
999 so->so_snd.sb_timeo = val;
1000 break;
1001 case SO_RCVTIMEO:
1002 so->so_rcv.sb_timeo = val;
1003 break;
1004 }
1005 break;
1006 }
1007
1008 default:
1009 error = ENOPROTOOPT;
1010 break;
1011 }
1012 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1013 (void) ((*so->so_proto->pr_ctloutput)
1014 (PRCO_SETOPT, so, level, optname, &m0));
1015 m = NULL; /* freed by protocol */
1016 }
1017 }
1018 bad:
1019 if (m)
1020 (void) m_free(m);
1021 return (error);
1022 }
1023
1024 int
1025 sogetopt(so, level, optname, mp)
1026 register struct socket *so;
1027 int level, optname;
1028 struct mbuf **mp;
1029 {
1030 register struct mbuf *m;
1031
1032 if (level != SOL_SOCKET) {
1033 if (so->so_proto && so->so_proto->pr_ctloutput) {
1034 return ((*so->so_proto->pr_ctloutput)
1035 (PRCO_GETOPT, so, level, optname, mp));
1036 } else
1037 return (ENOPROTOOPT);
1038 } else {
1039 m = m_get(M_WAIT, MT_SOOPTS);
1040 m->m_len = sizeof(int);
1041
1042 switch (optname) {
1043
1044 case SO_LINGER:
1045 m->m_len = sizeof(struct linger);
1046 mtod(m, struct linger *)->l_onoff =
1047 so->so_options & SO_LINGER;
1048 mtod(m, struct linger *)->l_linger = so->so_linger;
1049 break;
1050
1051 case SO_USELOOPBACK:
1052 case SO_DONTROUTE:
1053 case SO_DEBUG:
1054 case SO_KEEPALIVE:
1055 case SO_REUSEADDR:
1056 case SO_REUSEPORT:
1057 case SO_BROADCAST:
1058 case SO_OOBINLINE:
1059 case SO_TIMESTAMP:
1060 *mtod(m, int *) = so->so_options & optname;
1061 break;
1062
1063 case SO_TYPE:
1064 *mtod(m, int *) = so->so_type;
1065 break;
1066
1067 case SO_ERROR:
1068 *mtod(m, int *) = so->so_error;
1069 so->so_error = 0;
1070 break;
1071
1072 case SO_SNDBUF:
1073 *mtod(m, int *) = so->so_snd.sb_hiwat;
1074 break;
1075
1076 case SO_RCVBUF:
1077 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1078 break;
1079
1080 case SO_SNDLOWAT:
1081 *mtod(m, int *) = so->so_snd.sb_lowat;
1082 break;
1083
1084 case SO_RCVLOWAT:
1085 *mtod(m, int *) = so->so_rcv.sb_lowat;
1086 break;
1087
1088 case SO_SNDTIMEO:
1089 case SO_RCVTIMEO:
1090 {
1091 int val = (optname == SO_SNDTIMEO ?
1092 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1093
1094 m->m_len = sizeof(struct timeval);
1095 mtod(m, struct timeval *)->tv_sec = val / hz;
1096 mtod(m, struct timeval *)->tv_usec =
1097 (val % hz) * tick;
1098 break;
1099 }
1100
1101 default:
1102 (void)m_free(m);
1103 return (ENOPROTOOPT);
1104 }
1105 *mp = m;
1106 return (0);
1107 }
1108 }
1109
1110 void
1111 sohasoutofband(so)
1112 register struct socket *so;
1113 {
1114 struct proc *p;
1115
1116 if (so->so_pgid < 0)
1117 gsignal(-so->so_pgid, SIGURG);
1118 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1119 psignal(p, SIGURG);
1120 selwakeup(&so->so_rcv.sb_sel);
1121 }
1122