uipc_socket.c revision 1.32 1 /* $NetBSD: uipc_socket.c,v 1.32 1998/03/01 02:22:34 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/file.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/signalvar.h>
50 #include <sys/resourcevar.h>
51
52 /*
53 * Socket operation routines.
54 * These routines are called by the routines in
55 * sys_socket.c or from a system process, and
56 * implement the semantics of socket operations by
57 * switching out to the protocol specific routines.
58 */
59 /*ARGSUSED*/
60 int
61 socreate(dom, aso, type, proto)
62 int dom;
63 struct socket **aso;
64 register int type;
65 int proto;
66 {
67 struct proc *p = curproc; /* XXX */
68 register struct protosw *prp;
69 register struct socket *so;
70 register int error;
71
72 if (proto)
73 prp = pffindproto(dom, proto, type);
74 else
75 prp = pffindtype(dom, type);
76 if (prp == 0 || prp->pr_usrreq == 0)
77 return (EPROTONOSUPPORT);
78 if (prp->pr_type != type)
79 return (EPROTOTYPE);
80 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
81 bzero((caddr_t)so, sizeof(*so));
82 TAILQ_INIT(&so->so_q0);
83 TAILQ_INIT(&so->so_q);
84 so->so_type = type;
85 so->so_proto = prp;
86 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
87 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
88 if (error) {
89 so->so_state |= SS_NOFDREF;
90 sofree(so);
91 return (error);
92 }
93 #ifdef COMPAT_SUNOS
94 {
95 extern struct emul emul_sunos;
96 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
97 so->so_options |= SO_BROADCAST;
98 }
99 #endif
100 *aso = so;
101 return (0);
102 }
103
104 int
105 sobind(so, nam)
106 struct socket *so;
107 struct mbuf *nam;
108 {
109 struct proc *p = curproc; /* XXX */
110 int s = splsoftnet();
111 int error;
112
113 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
114 nam, (struct mbuf *)0, p);
115 splx(s);
116 return (error);
117 }
118
119 int
120 solisten(so, backlog)
121 register struct socket *so;
122 int backlog;
123 {
124 int s = splsoftnet(), error;
125
126 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
127 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
128 if (error) {
129 splx(s);
130 return (error);
131 }
132 if (so->so_q.tqh_first == NULL)
133 so->so_options |= SO_ACCEPTCONN;
134 if (backlog < 0)
135 backlog = 0;
136 so->so_qlimit = min(backlog, SOMAXCONN);
137 splx(s);
138 return (0);
139 }
140
141 void
142 sofree(so)
143 register struct socket *so;
144 {
145
146 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
147 return;
148 if (so->so_head) {
149 if (!soqremque(so, 0) && !soqremque(so, 1))
150 panic("sofree dq");
151 so->so_head = 0;
152 }
153 sbrelease(&so->so_snd);
154 sorflush(so);
155 FREE(so, M_SOCKET);
156 }
157
158 /*
159 * Close a socket on last file table reference removal.
160 * Initiate disconnect if connected.
161 * Free socket when disconnect complete.
162 */
163 int
164 soclose(so)
165 register struct socket *so;
166 {
167 int s = splsoftnet(); /* conservative */
168 int error = 0;
169
170 if (so->so_options & SO_ACCEPTCONN) {
171 while (so->so_q0.tqh_first)
172 (void) soabort(so->so_q0.tqh_first);
173 while (so->so_q.tqh_first)
174 (void) soabort(so->so_q.tqh_first);
175 }
176 if (so->so_pcb == 0)
177 goto discard;
178 if (so->so_state & SS_ISCONNECTED) {
179 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
180 error = sodisconnect(so);
181 if (error)
182 goto drop;
183 }
184 if (so->so_options & SO_LINGER) {
185 if ((so->so_state & SS_ISDISCONNECTING) &&
186 (so->so_state & SS_NBIO))
187 goto drop;
188 while (so->so_state & SS_ISCONNECTED) {
189 error = tsleep((caddr_t)&so->so_timeo,
190 PSOCK | PCATCH, netcls,
191 so->so_linger * hz);
192 if (error)
193 break;
194 }
195 }
196 }
197 drop:
198 if (so->so_pcb) {
199 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
200 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
201 (struct proc *)0);
202 if (error == 0)
203 error = error2;
204 }
205 discard:
206 if (so->so_state & SS_NOFDREF)
207 panic("soclose: NOFDREF");
208 so->so_state |= SS_NOFDREF;
209 sofree(so);
210 splx(s);
211 return (error);
212 }
213
214 /*
215 * Must be called at splsoftnet...
216 */
217 int
218 soabort(so)
219 struct socket *so;
220 {
221
222 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
223 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
224 }
225
226 int
227 soaccept(so, nam)
228 register struct socket *so;
229 struct mbuf *nam;
230 {
231 int s = splsoftnet();
232 int error;
233
234 if ((so->so_state & SS_NOFDREF) == 0)
235 panic("soaccept: !NOFDREF");
236 so->so_state &= ~SS_NOFDREF;
237 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
238 nam, (struct mbuf *)0, (struct proc *)0);
239 splx(s);
240 return (error);
241 }
242
243 int
244 soconnect(so, nam)
245 register struct socket *so;
246 struct mbuf *nam;
247 {
248 struct proc *p = curproc; /* XXX */
249 int s;
250 int error;
251
252 if (so->so_options & SO_ACCEPTCONN)
253 return (EOPNOTSUPP);
254 s = splsoftnet();
255 /*
256 * If protocol is connection-based, can only connect once.
257 * Otherwise, if connected, try to disconnect first.
258 * This allows user to disconnect by connecting to, e.g.,
259 * a null address.
260 */
261 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
262 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
263 (error = sodisconnect(so))))
264 error = EISCONN;
265 else
266 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
267 (struct mbuf *)0, nam, (struct mbuf *)0, p);
268 splx(s);
269 return (error);
270 }
271
272 int
273 soconnect2(so1, so2)
274 register struct socket *so1;
275 struct socket *so2;
276 {
277 int s = splsoftnet();
278 int error;
279
280 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
281 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
282 (struct proc *)0);
283 splx(s);
284 return (error);
285 }
286
287 int
288 sodisconnect(so)
289 register struct socket *so;
290 {
291 int s = splsoftnet();
292 int error;
293
294 if ((so->so_state & SS_ISCONNECTED) == 0) {
295 error = ENOTCONN;
296 goto bad;
297 }
298 if (so->so_state & SS_ISDISCONNECTING) {
299 error = EALREADY;
300 goto bad;
301 }
302 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
303 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
304 (struct proc *)0);
305 bad:
306 splx(s);
307 return (error);
308 }
309
310 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
311 /*
312 * Send on a socket.
313 * If send must go all at once and message is larger than
314 * send buffering, then hard error.
315 * Lock against other senders.
316 * If must go all at once and not enough room now, then
317 * inform user that this would block and do nothing.
318 * Otherwise, if nonblocking, send as much as possible.
319 * The data to be sent is described by "uio" if nonzero,
320 * otherwise by the mbuf chain "top" (which must be null
321 * if uio is not). Data provided in mbuf chain must be small
322 * enough to send all at once.
323 *
324 * Returns nonzero on error, timeout or signal; callers
325 * must check for short counts if EINTR/ERESTART are returned.
326 * Data and control buffers are freed on return.
327 */
328 int
329 sosend(so, addr, uio, top, control, flags)
330 register struct socket *so;
331 struct mbuf *addr;
332 struct uio *uio;
333 struct mbuf *top;
334 struct mbuf *control;
335 int flags;
336 {
337 struct proc *p = curproc; /* XXX */
338 struct mbuf **mp;
339 register struct mbuf *m;
340 register long space, len, resid;
341 int clen = 0, error, s, dontroute, mlen;
342 int atomic = sosendallatonce(so) || top;
343
344 if (uio)
345 resid = uio->uio_resid;
346 else
347 resid = top->m_pkthdr.len;
348 /*
349 * In theory resid should be unsigned.
350 * However, space must be signed, as it might be less than 0
351 * if we over-committed, and we must use a signed comparison
352 * of space and resid. On the other hand, a negative resid
353 * causes us to loop sending 0-length segments to the protocol.
354 */
355 if (resid < 0) {
356 error = EINVAL;
357 goto out;
358 }
359 dontroute =
360 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
361 (so->so_proto->pr_flags & PR_ATOMIC);
362 p->p_stats->p_ru.ru_msgsnd++;
363 if (control)
364 clen = control->m_len;
365 #define snderr(errno) { error = errno; splx(s); goto release; }
366
367 restart:
368 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
369 goto out;
370 do {
371 s = splsoftnet();
372 if (so->so_state & SS_CANTSENDMORE)
373 snderr(EPIPE);
374 if (so->so_error)
375 snderr(so->so_error);
376 if ((so->so_state & SS_ISCONNECTED) == 0) {
377 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
378 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
379 !(resid == 0 && clen != 0))
380 snderr(ENOTCONN);
381 } else if (addr == 0)
382 snderr(EDESTADDRREQ);
383 }
384 space = sbspace(&so->so_snd);
385 if (flags & MSG_OOB)
386 space += 1024;
387 if ((atomic && resid > so->so_snd.sb_hiwat) ||
388 clen > so->so_snd.sb_hiwat)
389 snderr(EMSGSIZE);
390 if (space < resid + clen && uio &&
391 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
392 if (so->so_state & SS_NBIO)
393 snderr(EWOULDBLOCK);
394 sbunlock(&so->so_snd);
395 error = sbwait(&so->so_snd);
396 splx(s);
397 if (error)
398 goto out;
399 goto restart;
400 }
401 splx(s);
402 mp = ⊤
403 space -= clen;
404 do {
405 if (uio == NULL) {
406 /*
407 * Data is prepackaged in "top".
408 */
409 resid = 0;
410 if (flags & MSG_EOR)
411 top->m_flags |= M_EOR;
412 } else do {
413 if (top == 0) {
414 MGETHDR(m, M_WAIT, MT_DATA);
415 mlen = MHLEN;
416 m->m_pkthdr.len = 0;
417 m->m_pkthdr.rcvif = (struct ifnet *)0;
418 } else {
419 MGET(m, M_WAIT, MT_DATA);
420 mlen = MLEN;
421 }
422 if (resid >= MINCLSIZE && space >= MCLBYTES) {
423 MCLGET(m, M_WAIT);
424 if ((m->m_flags & M_EXT) == 0)
425 goto nopages;
426 mlen = MCLBYTES;
427 #ifdef MAPPED_MBUFS
428 len = min(MCLBYTES, resid);
429 #else
430 if (atomic && top == 0) {
431 len = min(MCLBYTES - max_hdr, resid);
432 m->m_data += max_hdr;
433 } else
434 len = min(MCLBYTES, resid);
435 #endif
436 space -= len;
437 } else {
438 nopages:
439 len = min(min(mlen, resid), space);
440 space -= len;
441 /*
442 * For datagram protocols, leave room
443 * for protocol headers in first mbuf.
444 */
445 if (atomic && top == 0 && len < mlen)
446 MH_ALIGN(m, len);
447 }
448 error = uiomove(mtod(m, caddr_t), (int)len, uio);
449 resid = uio->uio_resid;
450 m->m_len = len;
451 *mp = m;
452 top->m_pkthdr.len += len;
453 if (error)
454 goto release;
455 mp = &m->m_next;
456 if (resid <= 0) {
457 if (flags & MSG_EOR)
458 top->m_flags |= M_EOR;
459 break;
460 }
461 } while (space > 0 && atomic);
462 if (dontroute)
463 so->so_options |= SO_DONTROUTE;
464 s = splsoftnet(); /* XXX */
465 error = (*so->so_proto->pr_usrreq)(so,
466 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
467 top, addr, control, p);
468 splx(s);
469 if (dontroute)
470 so->so_options &= ~SO_DONTROUTE;
471 clen = 0;
472 control = 0;
473 top = 0;
474 mp = ⊤
475 if (error)
476 goto release;
477 } while (resid && space > 0);
478 } while (resid);
479
480 release:
481 sbunlock(&so->so_snd);
482 out:
483 if (top)
484 m_freem(top);
485 if (control)
486 m_freem(control);
487 return (error);
488 }
489
490 /*
491 * Implement receive operations on a socket.
492 * We depend on the way that records are added to the sockbuf
493 * by sbappend*. In particular, each record (mbufs linked through m_next)
494 * must begin with an address if the protocol so specifies,
495 * followed by an optional mbuf or mbufs containing ancillary data,
496 * and then zero or more mbufs of data.
497 * In order to avoid blocking network interrupts for the entire time here,
498 * we splx() while doing the actual copy to user space.
499 * Although the sockbuf is locked, new data may still be appended,
500 * and thus we must maintain consistency of the sockbuf during that time.
501 *
502 * The caller may receive the data as a single mbuf chain by supplying
503 * an mbuf **mp0 for use in returning the chain. The uio is then used
504 * only for the count in uio_resid.
505 */
506 int
507 soreceive(so, paddr, uio, mp0, controlp, flagsp)
508 register struct socket *so;
509 struct mbuf **paddr;
510 struct uio *uio;
511 struct mbuf **mp0;
512 struct mbuf **controlp;
513 int *flagsp;
514 {
515 register struct mbuf *m, **mp;
516 register int flags, len, error, s, offset;
517 struct protosw *pr = so->so_proto;
518 struct mbuf *nextrecord;
519 int moff, type = 0;
520 int orig_resid = uio->uio_resid;
521
522 mp = mp0;
523 if (paddr)
524 *paddr = 0;
525 if (controlp)
526 *controlp = 0;
527 if (flagsp)
528 flags = *flagsp &~ MSG_EOR;
529 else
530 flags = 0;
531 if (flags & MSG_OOB) {
532 m = m_get(M_WAIT, MT_DATA);
533 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
534 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
535 (struct proc *)0);
536 if (error)
537 goto bad;
538 do {
539 error = uiomove(mtod(m, caddr_t),
540 (int) min(uio->uio_resid, m->m_len), uio);
541 m = m_free(m);
542 } while (uio->uio_resid && error == 0 && m);
543 bad:
544 if (m)
545 m_freem(m);
546 return (error);
547 }
548 if (mp)
549 *mp = (struct mbuf *)0;
550 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
551 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
552 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
553
554 restart:
555 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
556 return (error);
557 s = splsoftnet();
558
559 m = so->so_rcv.sb_mb;
560 /*
561 * If we have less data than requested, block awaiting more
562 * (subject to any timeout) if:
563 * 1. the current count is less than the low water mark,
564 * 2. MSG_WAITALL is set, and it is possible to do the entire
565 * receive operation at once if we block (resid <= hiwat), or
566 * 3. MSG_DONTWAIT is not set.
567 * If MSG_WAITALL is set but resid is larger than the receive buffer,
568 * we have to do the receive in sections, and thus risk returning
569 * a short count if a timeout or signal occurs after we start.
570 */
571 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
572 so->so_rcv.sb_cc < uio->uio_resid) &&
573 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
574 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
575 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
576 #ifdef DIAGNOSTIC
577 if (m == 0 && so->so_rcv.sb_cc)
578 panic("receive 1");
579 #endif
580 if (so->so_error) {
581 if (m)
582 goto dontblock;
583 error = so->so_error;
584 if ((flags & MSG_PEEK) == 0)
585 so->so_error = 0;
586 goto release;
587 }
588 if (so->so_state & SS_CANTRCVMORE) {
589 if (m)
590 goto dontblock;
591 else
592 goto release;
593 }
594 for (; m; m = m->m_next)
595 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
596 m = so->so_rcv.sb_mb;
597 goto dontblock;
598 }
599 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
600 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
601 error = ENOTCONN;
602 goto release;
603 }
604 if (uio->uio_resid == 0)
605 goto release;
606 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
607 error = EWOULDBLOCK;
608 goto release;
609 }
610 sbunlock(&so->so_rcv);
611 error = sbwait(&so->so_rcv);
612 splx(s);
613 if (error)
614 return (error);
615 goto restart;
616 }
617 dontblock:
618 #ifdef notyet /* XXXX */
619 if (uio->uio_procp)
620 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
621 #endif
622 nextrecord = m->m_nextpkt;
623 if (pr->pr_flags & PR_ADDR) {
624 #ifdef DIAGNOSTIC
625 if (m->m_type != MT_SONAME)
626 panic("receive 1a");
627 #endif
628 orig_resid = 0;
629 if (flags & MSG_PEEK) {
630 if (paddr)
631 *paddr = m_copy(m, 0, m->m_len);
632 m = m->m_next;
633 } else {
634 sbfree(&so->so_rcv, m);
635 if (paddr) {
636 *paddr = m;
637 so->so_rcv.sb_mb = m->m_next;
638 m->m_next = 0;
639 m = so->so_rcv.sb_mb;
640 } else {
641 MFREE(m, so->so_rcv.sb_mb);
642 m = so->so_rcv.sb_mb;
643 }
644 }
645 }
646 while (m && m->m_type == MT_CONTROL && error == 0) {
647 if (flags & MSG_PEEK) {
648 if (controlp)
649 *controlp = m_copy(m, 0, m->m_len);
650 m = m->m_next;
651 } else {
652 sbfree(&so->so_rcv, m);
653 if (controlp) {
654 if (pr->pr_domain->dom_externalize &&
655 mtod(m, struct cmsghdr *)->cmsg_type ==
656 SCM_RIGHTS)
657 error = (*pr->pr_domain->dom_externalize)(m);
658 *controlp = m;
659 so->so_rcv.sb_mb = m->m_next;
660 m->m_next = 0;
661 m = so->so_rcv.sb_mb;
662 } else {
663 MFREE(m, so->so_rcv.sb_mb);
664 m = so->so_rcv.sb_mb;
665 }
666 }
667 if (controlp) {
668 orig_resid = 0;
669 controlp = &(*controlp)->m_next;
670 }
671 }
672 if (m) {
673 if ((flags & MSG_PEEK) == 0)
674 m->m_nextpkt = nextrecord;
675 type = m->m_type;
676 if (type == MT_OOBDATA)
677 flags |= MSG_OOB;
678 }
679 moff = 0;
680 offset = 0;
681 while (m && uio->uio_resid > 0 && error == 0) {
682 if (m->m_type == MT_OOBDATA) {
683 if (type != MT_OOBDATA)
684 break;
685 } else if (type == MT_OOBDATA)
686 break;
687 #ifdef DIAGNOSTIC
688 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
689 panic("receive 3");
690 #endif
691 so->so_state &= ~SS_RCVATMARK;
692 len = uio->uio_resid;
693 if (so->so_oobmark && len > so->so_oobmark - offset)
694 len = so->so_oobmark - offset;
695 if (len > m->m_len - moff)
696 len = m->m_len - moff;
697 /*
698 * If mp is set, just pass back the mbufs.
699 * Otherwise copy them out via the uio, then free.
700 * Sockbuf must be consistent here (points to current mbuf,
701 * it points to next record) when we drop priority;
702 * we must note any additions to the sockbuf when we
703 * block interrupts again.
704 */
705 if (mp == 0) {
706 splx(s);
707 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
708 s = splsoftnet();
709 } else
710 uio->uio_resid -= len;
711 if (len == m->m_len - moff) {
712 if (m->m_flags & M_EOR)
713 flags |= MSG_EOR;
714 if (flags & MSG_PEEK) {
715 m = m->m_next;
716 moff = 0;
717 } else {
718 nextrecord = m->m_nextpkt;
719 sbfree(&so->so_rcv, m);
720 if (mp) {
721 *mp = m;
722 mp = &m->m_next;
723 so->so_rcv.sb_mb = m = m->m_next;
724 *mp = (struct mbuf *)0;
725 } else {
726 MFREE(m, so->so_rcv.sb_mb);
727 m = so->so_rcv.sb_mb;
728 }
729 if (m)
730 m->m_nextpkt = nextrecord;
731 }
732 } else {
733 if (flags & MSG_PEEK)
734 moff += len;
735 else {
736 if (mp)
737 *mp = m_copym(m, 0, len, M_WAIT);
738 m->m_data += len;
739 m->m_len -= len;
740 so->so_rcv.sb_cc -= len;
741 }
742 }
743 if (so->so_oobmark) {
744 if ((flags & MSG_PEEK) == 0) {
745 so->so_oobmark -= len;
746 if (so->so_oobmark == 0) {
747 so->so_state |= SS_RCVATMARK;
748 break;
749 }
750 } else {
751 offset += len;
752 if (offset == so->so_oobmark)
753 break;
754 }
755 }
756 if (flags & MSG_EOR)
757 break;
758 /*
759 * If the MSG_WAITALL flag is set (for non-atomic socket),
760 * we must not quit until "uio->uio_resid == 0" or an error
761 * termination. If a signal/timeout occurs, return
762 * with a short count but without error.
763 * Keep sockbuf locked against other readers.
764 */
765 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
766 !sosendallatonce(so) && !nextrecord) {
767 if (so->so_error || so->so_state & SS_CANTRCVMORE)
768 break;
769 error = sbwait(&so->so_rcv);
770 if (error) {
771 sbunlock(&so->so_rcv);
772 splx(s);
773 return (0);
774 }
775 if ((m = so->so_rcv.sb_mb) != NULL)
776 nextrecord = m->m_nextpkt;
777 }
778 }
779
780 if (m && pr->pr_flags & PR_ATOMIC) {
781 flags |= MSG_TRUNC;
782 if ((flags & MSG_PEEK) == 0)
783 (void) sbdroprecord(&so->so_rcv);
784 }
785 if ((flags & MSG_PEEK) == 0) {
786 if (m == 0)
787 so->so_rcv.sb_mb = nextrecord;
788 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
789 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
790 (struct mbuf *)(long)flags, (struct mbuf *)0,
791 (struct proc *)0);
792 }
793 if (orig_resid == uio->uio_resid && orig_resid &&
794 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
795 sbunlock(&so->so_rcv);
796 splx(s);
797 goto restart;
798 }
799
800 if (flagsp)
801 *flagsp |= flags;
802 release:
803 sbunlock(&so->so_rcv);
804 splx(s);
805 return (error);
806 }
807
808 int
809 soshutdown(so, how)
810 register struct socket *so;
811 register int how;
812 {
813 register struct protosw *pr = so->so_proto;
814
815 how++;
816 if (how & FREAD)
817 sorflush(so);
818 if (how & FWRITE)
819 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
820 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
821 return (0);
822 }
823
824 void
825 sorflush(so)
826 register struct socket *so;
827 {
828 register struct sockbuf *sb = &so->so_rcv;
829 register struct protosw *pr = so->so_proto;
830 register int s;
831 struct sockbuf asb;
832
833 sb->sb_flags |= SB_NOINTR;
834 (void) sblock(sb, M_WAITOK);
835 s = splimp();
836 socantrcvmore(so);
837 sbunlock(sb);
838 asb = *sb;
839 bzero((caddr_t)sb, sizeof (*sb));
840 splx(s);
841 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
842 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
843 sbrelease(&asb);
844 }
845
846 int
847 sosetopt(so, level, optname, m0)
848 register struct socket *so;
849 int level, optname;
850 struct mbuf *m0;
851 {
852 int error = 0;
853 register struct mbuf *m = m0;
854
855 if (level != SOL_SOCKET) {
856 if (so->so_proto && so->so_proto->pr_ctloutput)
857 return ((*so->so_proto->pr_ctloutput)
858 (PRCO_SETOPT, so, level, optname, &m0));
859 error = ENOPROTOOPT;
860 } else {
861 switch (optname) {
862
863 case SO_LINGER:
864 if (m == NULL || m->m_len != sizeof (struct linger)) {
865 error = EINVAL;
866 goto bad;
867 }
868 so->so_linger = mtod(m, struct linger *)->l_linger;
869 /* fall thru... */
870
871 case SO_DEBUG:
872 case SO_KEEPALIVE:
873 case SO_DONTROUTE:
874 case SO_USELOOPBACK:
875 case SO_BROADCAST:
876 case SO_REUSEADDR:
877 case SO_REUSEPORT:
878 case SO_OOBINLINE:
879 case SO_TIMESTAMP:
880 if (m == NULL || m->m_len < sizeof (int)) {
881 error = EINVAL;
882 goto bad;
883 }
884 if (*mtod(m, int *))
885 so->so_options |= optname;
886 else
887 so->so_options &= ~optname;
888 break;
889
890 case SO_SNDBUF:
891 case SO_RCVBUF:
892 case SO_SNDLOWAT:
893 case SO_RCVLOWAT:
894 {
895 int optval;
896
897 if (m == NULL || m->m_len < sizeof (int)) {
898 error = EINVAL;
899 goto bad;
900 }
901
902 /*
903 * Values < 1 make no sense for any of these
904 * options, so disallow them.
905 */
906 optval = *mtod(m, int *);
907 if (optval < 1) {
908 error = EINVAL;
909 goto bad;
910 }
911
912 switch (optname) {
913
914 case SO_SNDBUF:
915 case SO_RCVBUF:
916 if (sbreserve(optname == SO_SNDBUF ?
917 &so->so_snd : &so->so_rcv,
918 (u_long) optval) == 0) {
919 error = ENOBUFS;
920 goto bad;
921 }
922 break;
923
924 /*
925 * Make sure the low-water is never greater than
926 * the high-water.
927 */
928 case SO_SNDLOWAT:
929 so->so_snd.sb_lowat =
930 (optval > so->so_snd.sb_hiwat) ?
931 so->so_snd.sb_hiwat : optval;
932 break;
933 case SO_RCVLOWAT:
934 so->so_rcv.sb_lowat =
935 (optval > so->so_rcv.sb_hiwat) ?
936 so->so_rcv.sb_hiwat : optval;
937 break;
938 }
939 break;
940 }
941
942 case SO_SNDTIMEO:
943 case SO_RCVTIMEO:
944 {
945 struct timeval *tv;
946 short val;
947
948 if (m == NULL || m->m_len < sizeof (*tv)) {
949 error = EINVAL;
950 goto bad;
951 }
952 tv = mtod(m, struct timeval *);
953 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
954 error = EDOM;
955 goto bad;
956 }
957 val = tv->tv_sec * hz + tv->tv_usec / tick;
958
959 switch (optname) {
960
961 case SO_SNDTIMEO:
962 so->so_snd.sb_timeo = val;
963 break;
964 case SO_RCVTIMEO:
965 so->so_rcv.sb_timeo = val;
966 break;
967 }
968 break;
969 }
970
971 default:
972 error = ENOPROTOOPT;
973 break;
974 }
975 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
976 (void) ((*so->so_proto->pr_ctloutput)
977 (PRCO_SETOPT, so, level, optname, &m0));
978 m = NULL; /* freed by protocol */
979 }
980 }
981 bad:
982 if (m)
983 (void) m_free(m);
984 return (error);
985 }
986
987 int
988 sogetopt(so, level, optname, mp)
989 register struct socket *so;
990 int level, optname;
991 struct mbuf **mp;
992 {
993 register struct mbuf *m;
994
995 if (level != SOL_SOCKET) {
996 if (so->so_proto && so->so_proto->pr_ctloutput) {
997 return ((*so->so_proto->pr_ctloutput)
998 (PRCO_GETOPT, so, level, optname, mp));
999 } else
1000 return (ENOPROTOOPT);
1001 } else {
1002 m = m_get(M_WAIT, MT_SOOPTS);
1003 m->m_len = sizeof (int);
1004
1005 switch (optname) {
1006
1007 case SO_LINGER:
1008 m->m_len = sizeof (struct linger);
1009 mtod(m, struct linger *)->l_onoff =
1010 so->so_options & SO_LINGER;
1011 mtod(m, struct linger *)->l_linger = so->so_linger;
1012 break;
1013
1014 case SO_USELOOPBACK:
1015 case SO_DONTROUTE:
1016 case SO_DEBUG:
1017 case SO_KEEPALIVE:
1018 case SO_REUSEADDR:
1019 case SO_REUSEPORT:
1020 case SO_BROADCAST:
1021 case SO_OOBINLINE:
1022 case SO_TIMESTAMP:
1023 *mtod(m, int *) = so->so_options & optname;
1024 break;
1025
1026 case SO_TYPE:
1027 *mtod(m, int *) = so->so_type;
1028 break;
1029
1030 case SO_ERROR:
1031 *mtod(m, int *) = so->so_error;
1032 so->so_error = 0;
1033 break;
1034
1035 case SO_SNDBUF:
1036 *mtod(m, int *) = so->so_snd.sb_hiwat;
1037 break;
1038
1039 case SO_RCVBUF:
1040 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1041 break;
1042
1043 case SO_SNDLOWAT:
1044 *mtod(m, int *) = so->so_snd.sb_lowat;
1045 break;
1046
1047 case SO_RCVLOWAT:
1048 *mtod(m, int *) = so->so_rcv.sb_lowat;
1049 break;
1050
1051 case SO_SNDTIMEO:
1052 case SO_RCVTIMEO:
1053 {
1054 int val = (optname == SO_SNDTIMEO ?
1055 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1056
1057 m->m_len = sizeof(struct timeval);
1058 mtod(m, struct timeval *)->tv_sec = val / hz;
1059 mtod(m, struct timeval *)->tv_usec =
1060 (val % hz) * tick;
1061 break;
1062 }
1063
1064 default:
1065 (void)m_free(m);
1066 return (ENOPROTOOPT);
1067 }
1068 *mp = m;
1069 return (0);
1070 }
1071 }
1072
1073 void
1074 sohasoutofband(so)
1075 register struct socket *so;
1076 {
1077 struct proc *p;
1078
1079 if (so->so_pgid < 0)
1080 gsignal(-so->so_pgid, SIGURG);
1081 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1082 psignal(p, SIGURG);
1083 selwakeup(&so->so_rcv.sb_sel);
1084 }
1085