uipc_socket.c revision 1.34 1 /* $NetBSD: uipc_socket.c,v 1.34 1998/04/27 13:31:45 kleink Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/file.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/signalvar.h>
50 #include <sys/resourcevar.h>
51
52 /*
53 * Socket operation routines.
54 * These routines are called by the routines in
55 * sys_socket.c or from a system process, and
56 * implement the semantics of socket operations by
57 * switching out to the protocol specific routines.
58 */
59 /*ARGSUSED*/
60 int
61 socreate(dom, aso, type, proto)
62 int dom;
63 struct socket **aso;
64 register int type;
65 int proto;
66 {
67 struct proc *p = curproc; /* XXX */
68 register struct protosw *prp;
69 register struct socket *so;
70 register int error;
71
72 if (proto)
73 prp = pffindproto(dom, proto, type);
74 else
75 prp = pffindtype(dom, type);
76 if (prp == 0 || prp->pr_usrreq == 0)
77 return (EPROTONOSUPPORT);
78 if (prp->pr_type != type)
79 return (EPROTOTYPE);
80 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
81 bzero((caddr_t)so, sizeof(*so));
82 TAILQ_INIT(&so->so_q0);
83 TAILQ_INIT(&so->so_q);
84 so->so_type = type;
85 so->so_proto = prp;
86 so->so_send = sosend;
87 so->so_receive = soreceive;
88 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
89 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
90 if (error) {
91 so->so_state |= SS_NOFDREF;
92 sofree(so);
93 return (error);
94 }
95 #ifdef COMPAT_SUNOS
96 {
97 extern struct emul emul_sunos;
98 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
99 so->so_options |= SO_BROADCAST;
100 }
101 #endif
102 *aso = so;
103 return (0);
104 }
105
106 int
107 sobind(so, nam)
108 struct socket *so;
109 struct mbuf *nam;
110 {
111 struct proc *p = curproc; /* XXX */
112 int s = splsoftnet();
113 int error;
114
115 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
116 nam, (struct mbuf *)0, p);
117 splx(s);
118 return (error);
119 }
120
121 int
122 solisten(so, backlog)
123 register struct socket *so;
124 int backlog;
125 {
126 int s = splsoftnet(), error;
127
128 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
129 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
130 if (error) {
131 splx(s);
132 return (error);
133 }
134 if (so->so_q.tqh_first == NULL)
135 so->so_options |= SO_ACCEPTCONN;
136 if (backlog < 0)
137 backlog = 0;
138 so->so_qlimit = min(backlog, SOMAXCONN);
139 splx(s);
140 return (0);
141 }
142
143 void
144 sofree(so)
145 register struct socket *so;
146 {
147
148 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
149 return;
150 if (so->so_head) {
151 if (!soqremque(so, 0) && !soqremque(so, 1))
152 panic("sofree dq");
153 so->so_head = 0;
154 }
155 sbrelease(&so->so_snd);
156 sorflush(so);
157 FREE(so, M_SOCKET);
158 }
159
160 /*
161 * Close a socket on last file table reference removal.
162 * Initiate disconnect if connected.
163 * Free socket when disconnect complete.
164 */
165 int
166 soclose(so)
167 register struct socket *so;
168 {
169 int s = splsoftnet(); /* conservative */
170 int error = 0;
171
172 if (so->so_options & SO_ACCEPTCONN) {
173 while (so->so_q0.tqh_first)
174 (void) soabort(so->so_q0.tqh_first);
175 while (so->so_q.tqh_first)
176 (void) soabort(so->so_q.tqh_first);
177 }
178 if (so->so_pcb == 0)
179 goto discard;
180 if (so->so_state & SS_ISCONNECTED) {
181 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
182 error = sodisconnect(so);
183 if (error)
184 goto drop;
185 }
186 if (so->so_options & SO_LINGER) {
187 if ((so->so_state & SS_ISDISCONNECTING) &&
188 (so->so_state & SS_NBIO))
189 goto drop;
190 while (so->so_state & SS_ISCONNECTED) {
191 error = tsleep((caddr_t)&so->so_timeo,
192 PSOCK | PCATCH, netcls,
193 so->so_linger * hz);
194 if (error)
195 break;
196 }
197 }
198 }
199 drop:
200 if (so->so_pcb) {
201 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
202 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
203 (struct proc *)0);
204 if (error == 0)
205 error = error2;
206 }
207 discard:
208 if (so->so_state & SS_NOFDREF)
209 panic("soclose: NOFDREF");
210 so->so_state |= SS_NOFDREF;
211 sofree(so);
212 splx(s);
213 return (error);
214 }
215
216 /*
217 * Must be called at splsoftnet...
218 */
219 int
220 soabort(so)
221 struct socket *so;
222 {
223
224 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
225 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
226 }
227
228 int
229 soaccept(so, nam)
230 register struct socket *so;
231 struct mbuf *nam;
232 {
233 int s = splsoftnet();
234 int error;
235
236 if ((so->so_state & SS_NOFDREF) == 0)
237 panic("soaccept: !NOFDREF");
238 so->so_state &= ~SS_NOFDREF;
239 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
240 nam, (struct mbuf *)0, (struct proc *)0);
241 splx(s);
242 return (error);
243 }
244
245 int
246 soconnect(so, nam)
247 register struct socket *so;
248 struct mbuf *nam;
249 {
250 struct proc *p = curproc; /* XXX */
251 int s;
252 int error;
253
254 if (so->so_options & SO_ACCEPTCONN)
255 return (EOPNOTSUPP);
256 s = splsoftnet();
257 /*
258 * If protocol is connection-based, can only connect once.
259 * Otherwise, if connected, try to disconnect first.
260 * This allows user to disconnect by connecting to, e.g.,
261 * a null address.
262 */
263 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
264 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
265 (error = sodisconnect(so))))
266 error = EISCONN;
267 else
268 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
269 (struct mbuf *)0, nam, (struct mbuf *)0, p);
270 splx(s);
271 return (error);
272 }
273
274 int
275 soconnect2(so1, so2)
276 register struct socket *so1;
277 struct socket *so2;
278 {
279 int s = splsoftnet();
280 int error;
281
282 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
283 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
284 (struct proc *)0);
285 splx(s);
286 return (error);
287 }
288
289 int
290 sodisconnect(so)
291 register struct socket *so;
292 {
293 int s = splsoftnet();
294 int error;
295
296 if ((so->so_state & SS_ISCONNECTED) == 0) {
297 error = ENOTCONN;
298 goto bad;
299 }
300 if (so->so_state & SS_ISDISCONNECTING) {
301 error = EALREADY;
302 goto bad;
303 }
304 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
305 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
306 (struct proc *)0);
307 bad:
308 splx(s);
309 return (error);
310 }
311
312 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
313 /*
314 * Send on a socket.
315 * If send must go all at once and message is larger than
316 * send buffering, then hard error.
317 * Lock against other senders.
318 * If must go all at once and not enough room now, then
319 * inform user that this would block and do nothing.
320 * Otherwise, if nonblocking, send as much as possible.
321 * The data to be sent is described by "uio" if nonzero,
322 * otherwise by the mbuf chain "top" (which must be null
323 * if uio is not). Data provided in mbuf chain must be small
324 * enough to send all at once.
325 *
326 * Returns nonzero on error, timeout or signal; callers
327 * must check for short counts if EINTR/ERESTART are returned.
328 * Data and control buffers are freed on return.
329 */
330 int
331 sosend(so, addr, uio, top, control, flags)
332 register struct socket *so;
333 struct mbuf *addr;
334 struct uio *uio;
335 struct mbuf *top;
336 struct mbuf *control;
337 int flags;
338 {
339 struct proc *p = curproc; /* XXX */
340 struct mbuf **mp;
341 register struct mbuf *m;
342 register long space, len, resid;
343 int clen = 0, error, s, dontroute, mlen;
344 int atomic = sosendallatonce(so) || top;
345
346 if (uio)
347 resid = uio->uio_resid;
348 else
349 resid = top->m_pkthdr.len;
350 /*
351 * In theory resid should be unsigned.
352 * However, space must be signed, as it might be less than 0
353 * if we over-committed, and we must use a signed comparison
354 * of space and resid. On the other hand, a negative resid
355 * causes us to loop sending 0-length segments to the protocol.
356 */
357 if (resid < 0) {
358 error = EINVAL;
359 goto out;
360 }
361 dontroute =
362 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
363 (so->so_proto->pr_flags & PR_ATOMIC);
364 p->p_stats->p_ru.ru_msgsnd++;
365 if (control)
366 clen = control->m_len;
367 #define snderr(errno) { error = errno; splx(s); goto release; }
368
369 restart:
370 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
371 goto out;
372 do {
373 s = splsoftnet();
374 if (so->so_state & SS_CANTSENDMORE)
375 snderr(EPIPE);
376 if (so->so_error)
377 snderr(so->so_error);
378 if ((so->so_state & SS_ISCONNECTED) == 0) {
379 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
380 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
381 !(resid == 0 && clen != 0))
382 snderr(ENOTCONN);
383 } else if (addr == 0)
384 snderr(EDESTADDRREQ);
385 }
386 space = sbspace(&so->so_snd);
387 if (flags & MSG_OOB)
388 space += 1024;
389 if ((atomic && resid > so->so_snd.sb_hiwat) ||
390 clen > so->so_snd.sb_hiwat)
391 snderr(EMSGSIZE);
392 if (space < resid + clen && uio &&
393 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
394 if (so->so_state & SS_NBIO)
395 snderr(EWOULDBLOCK);
396 sbunlock(&so->so_snd);
397 error = sbwait(&so->so_snd);
398 splx(s);
399 if (error)
400 goto out;
401 goto restart;
402 }
403 splx(s);
404 mp = ⊤
405 space -= clen;
406 do {
407 if (uio == NULL) {
408 /*
409 * Data is prepackaged in "top".
410 */
411 resid = 0;
412 if (flags & MSG_EOR)
413 top->m_flags |= M_EOR;
414 } else do {
415 if (top == 0) {
416 MGETHDR(m, M_WAIT, MT_DATA);
417 mlen = MHLEN;
418 m->m_pkthdr.len = 0;
419 m->m_pkthdr.rcvif = (struct ifnet *)0;
420 } else {
421 MGET(m, M_WAIT, MT_DATA);
422 mlen = MLEN;
423 }
424 if (resid >= MINCLSIZE && space >= MCLBYTES) {
425 MCLGET(m, M_WAIT);
426 if ((m->m_flags & M_EXT) == 0)
427 goto nopages;
428 mlen = MCLBYTES;
429 #ifdef MAPPED_MBUFS
430 len = min(MCLBYTES, resid);
431 #else
432 if (atomic && top == 0) {
433 len = min(MCLBYTES - max_hdr, resid);
434 m->m_data += max_hdr;
435 } else
436 len = min(MCLBYTES, resid);
437 #endif
438 space -= len;
439 } else {
440 nopages:
441 len = min(min(mlen, resid), space);
442 space -= len;
443 /*
444 * For datagram protocols, leave room
445 * for protocol headers in first mbuf.
446 */
447 if (atomic && top == 0 && len < mlen)
448 MH_ALIGN(m, len);
449 }
450 error = uiomove(mtod(m, caddr_t), (int)len, uio);
451 resid = uio->uio_resid;
452 m->m_len = len;
453 *mp = m;
454 top->m_pkthdr.len += len;
455 if (error)
456 goto release;
457 mp = &m->m_next;
458 if (resid <= 0) {
459 if (flags & MSG_EOR)
460 top->m_flags |= M_EOR;
461 break;
462 }
463 } while (space > 0 && atomic);
464 if (dontroute)
465 so->so_options |= SO_DONTROUTE;
466 s = splsoftnet(); /* XXX */
467 error = (*so->so_proto->pr_usrreq)(so,
468 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
469 top, addr, control, p);
470 splx(s);
471 if (dontroute)
472 so->so_options &= ~SO_DONTROUTE;
473 clen = 0;
474 control = 0;
475 top = 0;
476 mp = ⊤
477 if (error)
478 goto release;
479 } while (resid && space > 0);
480 } while (resid);
481
482 release:
483 sbunlock(&so->so_snd);
484 out:
485 if (top)
486 m_freem(top);
487 if (control)
488 m_freem(control);
489 return (error);
490 }
491
492 /*
493 * Implement receive operations on a socket.
494 * We depend on the way that records are added to the sockbuf
495 * by sbappend*. In particular, each record (mbufs linked through m_next)
496 * must begin with an address if the protocol so specifies,
497 * followed by an optional mbuf or mbufs containing ancillary data,
498 * and then zero or more mbufs of data.
499 * In order to avoid blocking network interrupts for the entire time here,
500 * we splx() while doing the actual copy to user space.
501 * Although the sockbuf is locked, new data may still be appended,
502 * and thus we must maintain consistency of the sockbuf during that time.
503 *
504 * The caller may receive the data as a single mbuf chain by supplying
505 * an mbuf **mp0 for use in returning the chain. The uio is then used
506 * only for the count in uio_resid.
507 */
508 int
509 soreceive(so, paddr, uio, mp0, controlp, flagsp)
510 register struct socket *so;
511 struct mbuf **paddr;
512 struct uio *uio;
513 struct mbuf **mp0;
514 struct mbuf **controlp;
515 int *flagsp;
516 {
517 register struct mbuf *m, **mp;
518 register int flags, len, error, s, offset;
519 struct protosw *pr = so->so_proto;
520 struct mbuf *nextrecord;
521 int moff, type = 0;
522 int orig_resid = uio->uio_resid;
523
524 mp = mp0;
525 if (paddr)
526 *paddr = 0;
527 if (controlp)
528 *controlp = 0;
529 if (flagsp)
530 flags = *flagsp &~ MSG_EOR;
531 else
532 flags = 0;
533 if (flags & MSG_OOB) {
534 m = m_get(M_WAIT, MT_DATA);
535 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
536 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
537 (struct proc *)0);
538 if (error)
539 goto bad;
540 do {
541 error = uiomove(mtod(m, caddr_t),
542 (int) min(uio->uio_resid, m->m_len), uio);
543 m = m_free(m);
544 } while (uio->uio_resid && error == 0 && m);
545 bad:
546 if (m)
547 m_freem(m);
548 return (error);
549 }
550 if (mp)
551 *mp = (struct mbuf *)0;
552 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
553 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
554 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
555
556 restart:
557 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
558 return (error);
559 s = splsoftnet();
560
561 m = so->so_rcv.sb_mb;
562 /*
563 * If we have less data than requested, block awaiting more
564 * (subject to any timeout) if:
565 * 1. the current count is less than the low water mark,
566 * 2. MSG_WAITALL is set, and it is possible to do the entire
567 * receive operation at once if we block (resid <= hiwat), or
568 * 3. MSG_DONTWAIT is not set.
569 * If MSG_WAITALL is set but resid is larger than the receive buffer,
570 * we have to do the receive in sections, and thus risk returning
571 * a short count if a timeout or signal occurs after we start.
572 */
573 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
574 so->so_rcv.sb_cc < uio->uio_resid) &&
575 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
576 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
577 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
578 #ifdef DIAGNOSTIC
579 if (m == 0 && so->so_rcv.sb_cc)
580 panic("receive 1");
581 #endif
582 if (so->so_error) {
583 if (m)
584 goto dontblock;
585 error = so->so_error;
586 if ((flags & MSG_PEEK) == 0)
587 so->so_error = 0;
588 goto release;
589 }
590 if (so->so_state & SS_CANTRCVMORE) {
591 if (m)
592 goto dontblock;
593 else
594 goto release;
595 }
596 for (; m; m = m->m_next)
597 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
598 m = so->so_rcv.sb_mb;
599 goto dontblock;
600 }
601 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
602 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
603 error = ENOTCONN;
604 goto release;
605 }
606 if (uio->uio_resid == 0)
607 goto release;
608 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
609 error = EWOULDBLOCK;
610 goto release;
611 }
612 sbunlock(&so->so_rcv);
613 error = sbwait(&so->so_rcv);
614 splx(s);
615 if (error)
616 return (error);
617 goto restart;
618 }
619 dontblock:
620 #ifdef notyet /* XXXX */
621 if (uio->uio_procp)
622 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
623 #endif
624 nextrecord = m->m_nextpkt;
625 if (pr->pr_flags & PR_ADDR) {
626 #ifdef DIAGNOSTIC
627 if (m->m_type != MT_SONAME)
628 panic("receive 1a");
629 #endif
630 orig_resid = 0;
631 if (flags & MSG_PEEK) {
632 if (paddr)
633 *paddr = m_copy(m, 0, m->m_len);
634 m = m->m_next;
635 } else {
636 sbfree(&so->so_rcv, m);
637 if (paddr) {
638 *paddr = m;
639 so->so_rcv.sb_mb = m->m_next;
640 m->m_next = 0;
641 m = so->so_rcv.sb_mb;
642 } else {
643 MFREE(m, so->so_rcv.sb_mb);
644 m = so->so_rcv.sb_mb;
645 }
646 }
647 }
648 while (m && m->m_type == MT_CONTROL && error == 0) {
649 if (flags & MSG_PEEK) {
650 if (controlp)
651 *controlp = m_copy(m, 0, m->m_len);
652 m = m->m_next;
653 } else {
654 sbfree(&so->so_rcv, m);
655 if (controlp) {
656 if (pr->pr_domain->dom_externalize &&
657 mtod(m, struct cmsghdr *)->cmsg_type ==
658 SCM_RIGHTS)
659 error = (*pr->pr_domain->dom_externalize)(m);
660 *controlp = m;
661 so->so_rcv.sb_mb = m->m_next;
662 m->m_next = 0;
663 m = so->so_rcv.sb_mb;
664 } else {
665 MFREE(m, so->so_rcv.sb_mb);
666 m = so->so_rcv.sb_mb;
667 }
668 }
669 if (controlp) {
670 orig_resid = 0;
671 controlp = &(*controlp)->m_next;
672 }
673 }
674 if (m) {
675 if ((flags & MSG_PEEK) == 0)
676 m->m_nextpkt = nextrecord;
677 type = m->m_type;
678 if (type == MT_OOBDATA)
679 flags |= MSG_OOB;
680 }
681 moff = 0;
682 offset = 0;
683 while (m && uio->uio_resid > 0 && error == 0) {
684 if (m->m_type == MT_OOBDATA) {
685 if (type != MT_OOBDATA)
686 break;
687 } else if (type == MT_OOBDATA)
688 break;
689 #ifdef DIAGNOSTIC
690 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
691 panic("receive 3");
692 #endif
693 so->so_state &= ~SS_RCVATMARK;
694 len = uio->uio_resid;
695 if (so->so_oobmark && len > so->so_oobmark - offset)
696 len = so->so_oobmark - offset;
697 if (len > m->m_len - moff)
698 len = m->m_len - moff;
699 /*
700 * If mp is set, just pass back the mbufs.
701 * Otherwise copy them out via the uio, then free.
702 * Sockbuf must be consistent here (points to current mbuf,
703 * it points to next record) when we drop priority;
704 * we must note any additions to the sockbuf when we
705 * block interrupts again.
706 */
707 if (mp == 0) {
708 splx(s);
709 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
710 s = splsoftnet();
711 } else
712 uio->uio_resid -= len;
713 if (len == m->m_len - moff) {
714 if (m->m_flags & M_EOR)
715 flags |= MSG_EOR;
716 if (flags & MSG_PEEK) {
717 m = m->m_next;
718 moff = 0;
719 } else {
720 nextrecord = m->m_nextpkt;
721 sbfree(&so->so_rcv, m);
722 if (mp) {
723 *mp = m;
724 mp = &m->m_next;
725 so->so_rcv.sb_mb = m = m->m_next;
726 *mp = (struct mbuf *)0;
727 } else {
728 MFREE(m, so->so_rcv.sb_mb);
729 m = so->so_rcv.sb_mb;
730 }
731 if (m)
732 m->m_nextpkt = nextrecord;
733 }
734 } else {
735 if (flags & MSG_PEEK)
736 moff += len;
737 else {
738 if (mp)
739 *mp = m_copym(m, 0, len, M_WAIT);
740 m->m_data += len;
741 m->m_len -= len;
742 so->so_rcv.sb_cc -= len;
743 }
744 }
745 if (so->so_oobmark) {
746 if ((flags & MSG_PEEK) == 0) {
747 so->so_oobmark -= len;
748 if (so->so_oobmark == 0) {
749 so->so_state |= SS_RCVATMARK;
750 break;
751 }
752 } else {
753 offset += len;
754 if (offset == so->so_oobmark)
755 break;
756 }
757 }
758 if (flags & MSG_EOR)
759 break;
760 /*
761 * If the MSG_WAITALL flag is set (for non-atomic socket),
762 * we must not quit until "uio->uio_resid == 0" or an error
763 * termination. If a signal/timeout occurs, return
764 * with a short count but without error.
765 * Keep sockbuf locked against other readers.
766 */
767 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
768 !sosendallatonce(so) && !nextrecord) {
769 if (so->so_error || so->so_state & SS_CANTRCVMORE)
770 break;
771 error = sbwait(&so->so_rcv);
772 if (error) {
773 sbunlock(&so->so_rcv);
774 splx(s);
775 return (0);
776 }
777 if ((m = so->so_rcv.sb_mb) != NULL)
778 nextrecord = m->m_nextpkt;
779 }
780 }
781
782 if (m && pr->pr_flags & PR_ATOMIC) {
783 flags |= MSG_TRUNC;
784 if ((flags & MSG_PEEK) == 0)
785 (void) sbdroprecord(&so->so_rcv);
786 }
787 if ((flags & MSG_PEEK) == 0) {
788 if (m == 0)
789 so->so_rcv.sb_mb = nextrecord;
790 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
791 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
792 (struct mbuf *)(long)flags, (struct mbuf *)0,
793 (struct proc *)0);
794 }
795 if (orig_resid == uio->uio_resid && orig_resid &&
796 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
797 sbunlock(&so->so_rcv);
798 splx(s);
799 goto restart;
800 }
801
802 if (flagsp)
803 *flagsp |= flags;
804 release:
805 sbunlock(&so->so_rcv);
806 splx(s);
807 return (error);
808 }
809
810 int
811 soshutdown(so, how)
812 struct socket *so;
813 int how;
814 {
815 struct protosw *pr = so->so_proto;
816
817 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
818 return (EINVAL);
819
820 if (how == SHUT_RD || how == SHUT_RDWR)
821 sorflush(so);
822 if (how == SHUT_WR || how == SHUT_RDWR)
823 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
824 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
825 return (0);
826 }
827
828 void
829 sorflush(so)
830 register struct socket *so;
831 {
832 register struct sockbuf *sb = &so->so_rcv;
833 register struct protosw *pr = so->so_proto;
834 register int s;
835 struct sockbuf asb;
836
837 sb->sb_flags |= SB_NOINTR;
838 (void) sblock(sb, M_WAITOK);
839 s = splimp();
840 socantrcvmore(so);
841 sbunlock(sb);
842 asb = *sb;
843 bzero((caddr_t)sb, sizeof (*sb));
844 splx(s);
845 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
846 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
847 sbrelease(&asb);
848 }
849
850 int
851 sosetopt(so, level, optname, m0)
852 register struct socket *so;
853 int level, optname;
854 struct mbuf *m0;
855 {
856 int error = 0;
857 register struct mbuf *m = m0;
858
859 if (level != SOL_SOCKET) {
860 if (so->so_proto && so->so_proto->pr_ctloutput)
861 return ((*so->so_proto->pr_ctloutput)
862 (PRCO_SETOPT, so, level, optname, &m0));
863 error = ENOPROTOOPT;
864 } else {
865 switch (optname) {
866
867 case SO_LINGER:
868 if (m == NULL || m->m_len != sizeof (struct linger)) {
869 error = EINVAL;
870 goto bad;
871 }
872 so->so_linger = mtod(m, struct linger *)->l_linger;
873 /* fall thru... */
874
875 case SO_DEBUG:
876 case SO_KEEPALIVE:
877 case SO_DONTROUTE:
878 case SO_USELOOPBACK:
879 case SO_BROADCAST:
880 case SO_REUSEADDR:
881 case SO_REUSEPORT:
882 case SO_OOBINLINE:
883 case SO_TIMESTAMP:
884 if (m == NULL || m->m_len < sizeof (int)) {
885 error = EINVAL;
886 goto bad;
887 }
888 if (*mtod(m, int *))
889 so->so_options |= optname;
890 else
891 so->so_options &= ~optname;
892 break;
893
894 case SO_SNDBUF:
895 case SO_RCVBUF:
896 case SO_SNDLOWAT:
897 case SO_RCVLOWAT:
898 {
899 int optval;
900
901 if (m == NULL || m->m_len < sizeof (int)) {
902 error = EINVAL;
903 goto bad;
904 }
905
906 /*
907 * Values < 1 make no sense for any of these
908 * options, so disallow them.
909 */
910 optval = *mtod(m, int *);
911 if (optval < 1) {
912 error = EINVAL;
913 goto bad;
914 }
915
916 switch (optname) {
917
918 case SO_SNDBUF:
919 case SO_RCVBUF:
920 if (sbreserve(optname == SO_SNDBUF ?
921 &so->so_snd : &so->so_rcv,
922 (u_long) optval) == 0) {
923 error = ENOBUFS;
924 goto bad;
925 }
926 break;
927
928 /*
929 * Make sure the low-water is never greater than
930 * the high-water.
931 */
932 case SO_SNDLOWAT:
933 so->so_snd.sb_lowat =
934 (optval > so->so_snd.sb_hiwat) ?
935 so->so_snd.sb_hiwat : optval;
936 break;
937 case SO_RCVLOWAT:
938 so->so_rcv.sb_lowat =
939 (optval > so->so_rcv.sb_hiwat) ?
940 so->so_rcv.sb_hiwat : optval;
941 break;
942 }
943 break;
944 }
945
946 case SO_SNDTIMEO:
947 case SO_RCVTIMEO:
948 {
949 struct timeval *tv;
950 short val;
951
952 if (m == NULL || m->m_len < sizeof (*tv)) {
953 error = EINVAL;
954 goto bad;
955 }
956 tv = mtod(m, struct timeval *);
957 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
958 error = EDOM;
959 goto bad;
960 }
961 val = tv->tv_sec * hz + tv->tv_usec / tick;
962
963 switch (optname) {
964
965 case SO_SNDTIMEO:
966 so->so_snd.sb_timeo = val;
967 break;
968 case SO_RCVTIMEO:
969 so->so_rcv.sb_timeo = val;
970 break;
971 }
972 break;
973 }
974
975 default:
976 error = ENOPROTOOPT;
977 break;
978 }
979 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
980 (void) ((*so->so_proto->pr_ctloutput)
981 (PRCO_SETOPT, so, level, optname, &m0));
982 m = NULL; /* freed by protocol */
983 }
984 }
985 bad:
986 if (m)
987 (void) m_free(m);
988 return (error);
989 }
990
991 int
992 sogetopt(so, level, optname, mp)
993 register struct socket *so;
994 int level, optname;
995 struct mbuf **mp;
996 {
997 register struct mbuf *m;
998
999 if (level != SOL_SOCKET) {
1000 if (so->so_proto && so->so_proto->pr_ctloutput) {
1001 return ((*so->so_proto->pr_ctloutput)
1002 (PRCO_GETOPT, so, level, optname, mp));
1003 } else
1004 return (ENOPROTOOPT);
1005 } else {
1006 m = m_get(M_WAIT, MT_SOOPTS);
1007 m->m_len = sizeof (int);
1008
1009 switch (optname) {
1010
1011 case SO_LINGER:
1012 m->m_len = sizeof (struct linger);
1013 mtod(m, struct linger *)->l_onoff =
1014 so->so_options & SO_LINGER;
1015 mtod(m, struct linger *)->l_linger = so->so_linger;
1016 break;
1017
1018 case SO_USELOOPBACK:
1019 case SO_DONTROUTE:
1020 case SO_DEBUG:
1021 case SO_KEEPALIVE:
1022 case SO_REUSEADDR:
1023 case SO_REUSEPORT:
1024 case SO_BROADCAST:
1025 case SO_OOBINLINE:
1026 case SO_TIMESTAMP:
1027 *mtod(m, int *) = so->so_options & optname;
1028 break;
1029
1030 case SO_TYPE:
1031 *mtod(m, int *) = so->so_type;
1032 break;
1033
1034 case SO_ERROR:
1035 *mtod(m, int *) = so->so_error;
1036 so->so_error = 0;
1037 break;
1038
1039 case SO_SNDBUF:
1040 *mtod(m, int *) = so->so_snd.sb_hiwat;
1041 break;
1042
1043 case SO_RCVBUF:
1044 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1045 break;
1046
1047 case SO_SNDLOWAT:
1048 *mtod(m, int *) = so->so_snd.sb_lowat;
1049 break;
1050
1051 case SO_RCVLOWAT:
1052 *mtod(m, int *) = so->so_rcv.sb_lowat;
1053 break;
1054
1055 case SO_SNDTIMEO:
1056 case SO_RCVTIMEO:
1057 {
1058 int val = (optname == SO_SNDTIMEO ?
1059 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1060
1061 m->m_len = sizeof(struct timeval);
1062 mtod(m, struct timeval *)->tv_sec = val / hz;
1063 mtod(m, struct timeval *)->tv_usec =
1064 (val % hz) * tick;
1065 break;
1066 }
1067
1068 default:
1069 (void)m_free(m);
1070 return (ENOPROTOOPT);
1071 }
1072 *mp = m;
1073 return (0);
1074 }
1075 }
1076
1077 void
1078 sohasoutofband(so)
1079 register struct socket *so;
1080 {
1081 struct proc *p;
1082
1083 if (so->so_pgid < 0)
1084 gsignal(-so->so_pgid, SIGURG);
1085 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1086 psignal(p, SIGURG);
1087 selwakeup(&so->so_rcv.sb_sel);
1088 }
1089