uipc_socket.c revision 1.29.4.2 1 /* $NetBSD: uipc_socket.c,v 1.29.4.2 1999/01/25 05:49:42 cgd Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/file.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/signalvar.h>
50 #include <sys/resourcevar.h>
51
52 /*
53 * Socket operation routines.
54 * These routines are called by the routines in
55 * sys_socket.c or from a system process, and
56 * implement the semantics of socket operations by
57 * switching out to the protocol specific routines.
58 */
59 /*ARGSUSED*/
60 int
61 socreate(dom, aso, type, proto)
62 int dom;
63 struct socket **aso;
64 register int type;
65 int proto;
66 {
67 struct proc *p = curproc; /* XXX */
68 register struct protosw *prp;
69 register struct socket *so;
70 register int error;
71
72 if (proto)
73 prp = pffindproto(dom, proto, type);
74 else
75 prp = pffindtype(dom, type);
76 if (prp == 0 || prp->pr_usrreq == 0)
77 return (EPROTONOSUPPORT);
78 if (prp->pr_type != type)
79 return (EPROTOTYPE);
80 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
81 bzero((caddr_t)so, sizeof(*so));
82 TAILQ_INIT(&so->so_q0);
83 TAILQ_INIT(&so->so_q);
84 so->so_type = type;
85 so->so_proto = prp;
86 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
87 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
88 if (error) {
89 so->so_state |= SS_NOFDREF;
90 sofree(so);
91 return (error);
92 }
93 #ifdef COMPAT_SUNOS
94 {
95 extern struct emul emul_sunos;
96 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
97 so->so_options |= SO_BROADCAST;
98 }
99 #endif
100 *aso = so;
101 return (0);
102 }
103
104 int
105 sobind(so, nam)
106 struct socket *so;
107 struct mbuf *nam;
108 {
109 struct proc *p = curproc; /* XXX */
110 int s = splsoftnet();
111 int error;
112
113 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
114 nam, (struct mbuf *)0, p);
115 splx(s);
116 return (error);
117 }
118
119 int
120 solisten(so, backlog)
121 register struct socket *so;
122 int backlog;
123 {
124 int s = splsoftnet(), error;
125
126 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
127 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
128 if (error) {
129 splx(s);
130 return (error);
131 }
132 if (so->so_q.tqh_first == NULL)
133 so->so_options |= SO_ACCEPTCONN;
134 if (backlog < 0)
135 backlog = 0;
136 so->so_qlimit = min(backlog, SOMAXCONN);
137 splx(s);
138 return (0);
139 }
140
141 void
142 sofree(so)
143 register struct socket *so;
144 {
145
146 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
147 return;
148 if (so->so_head) {
149 /*
150 * We must not decommission a socket that's on the accept(2)
151 * queue. If we do, then accept(2) may hang after select(2)
152 * indicated that the listening socket was ready.
153 */
154 if (!soqremque(so, 0))
155 return;
156 }
157 sbrelease(&so->so_snd);
158 sorflush(so);
159 FREE(so, M_SOCKET);
160 }
161
162 /*
163 * Close a socket on last file table reference removal.
164 * Initiate disconnect if connected.
165 * Free socket when disconnect complete.
166 */
167 int
168 soclose(so)
169 register struct socket *so;
170 {
171 struct socket *so2;
172 int s = splsoftnet(); /* conservative */
173 int error = 0;
174
175 if (so->so_options & SO_ACCEPTCONN) {
176 while ((so2 = so->so_q0.tqh_first) != 0) {
177 (void) soqremque(so2, 0);
178 (void) soabort(so2);
179 }
180 while ((so2 = so->so_q.tqh_first) != 0) {
181 (void) soqremque(so2, 1);
182 (void) soabort(so2);
183 }
184 }
185 if (so->so_pcb == 0)
186 goto discard;
187 if (so->so_state & SS_ISCONNECTED) {
188 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
189 error = sodisconnect(so);
190 if (error)
191 goto drop;
192 }
193 if (so->so_options & SO_LINGER) {
194 if ((so->so_state & SS_ISDISCONNECTING) &&
195 (so->so_state & SS_NBIO))
196 goto drop;
197 while (so->so_state & SS_ISCONNECTED) {
198 error = tsleep((caddr_t)&so->so_timeo,
199 PSOCK | PCATCH, netcls,
200 so->so_linger * hz);
201 if (error)
202 break;
203 }
204 }
205 }
206 drop:
207 if (so->so_pcb) {
208 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
209 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
210 (struct proc *)0);
211 if (error == 0)
212 error = error2;
213 }
214 discard:
215 if (so->so_state & SS_NOFDREF)
216 panic("soclose: NOFDREF");
217 so->so_state |= SS_NOFDREF;
218 sofree(so);
219 splx(s);
220 return (error);
221 }
222
223 /*
224 * Must be called at splsoftnet...
225 */
226 int
227 soabort(so)
228 struct socket *so;
229 {
230
231 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
232 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
233 }
234
235 int
236 soaccept(so, nam)
237 register struct socket *so;
238 struct mbuf *nam;
239 {
240 int s = splsoftnet();
241 int error;
242
243 if ((so->so_state & SS_NOFDREF) == 0)
244 panic("soaccept: !NOFDREF");
245 so->so_state &= ~SS_NOFDREF;
246 if ((so->so_state & SS_ISDISCONNECTED) == 0)
247 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
248 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
249 else
250 error = 0;
251 splx(s);
252 return (error);
253 }
254
255 int
256 soconnect(so, nam)
257 register struct socket *so;
258 struct mbuf *nam;
259 {
260 struct proc *p = curproc; /* XXX */
261 int s;
262 int error;
263
264 if (so->so_options & SO_ACCEPTCONN)
265 return (EOPNOTSUPP);
266 s = splsoftnet();
267 /*
268 * If protocol is connection-based, can only connect once.
269 * Otherwise, if connected, try to disconnect first.
270 * This allows user to disconnect by connecting to, e.g.,
271 * a null address.
272 */
273 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
274 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
275 (error = sodisconnect(so))))
276 error = EISCONN;
277 else
278 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
279 (struct mbuf *)0, nam, (struct mbuf *)0, p);
280 splx(s);
281 return (error);
282 }
283
284 int
285 soconnect2(so1, so2)
286 register struct socket *so1;
287 struct socket *so2;
288 {
289 int s = splsoftnet();
290 int error;
291
292 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
293 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
294 (struct proc *)0);
295 splx(s);
296 return (error);
297 }
298
299 int
300 sodisconnect(so)
301 register struct socket *so;
302 {
303 int s = splsoftnet();
304 int error;
305
306 if ((so->so_state & SS_ISCONNECTED) == 0) {
307 error = ENOTCONN;
308 goto bad;
309 }
310 if (so->so_state & SS_ISDISCONNECTING) {
311 error = EALREADY;
312 goto bad;
313 }
314 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
315 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
316 (struct proc *)0);
317 bad:
318 splx(s);
319 return (error);
320 }
321
322 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
323 /*
324 * Send on a socket.
325 * If send must go all at once and message is larger than
326 * send buffering, then hard error.
327 * Lock against other senders.
328 * If must go all at once and not enough room now, then
329 * inform user that this would block and do nothing.
330 * Otherwise, if nonblocking, send as much as possible.
331 * The data to be sent is described by "uio" if nonzero,
332 * otherwise by the mbuf chain "top" (which must be null
333 * if uio is not). Data provided in mbuf chain must be small
334 * enough to send all at once.
335 *
336 * Returns nonzero on error, timeout or signal; callers
337 * must check for short counts if EINTR/ERESTART are returned.
338 * Data and control buffers are freed on return.
339 */
340 int
341 sosend(so, addr, uio, top, control, flags)
342 register struct socket *so;
343 struct mbuf *addr;
344 struct uio *uio;
345 struct mbuf *top;
346 struct mbuf *control;
347 int flags;
348 {
349 struct proc *p = curproc; /* XXX */
350 struct mbuf **mp;
351 register struct mbuf *m;
352 register long space, len, resid;
353 int clen = 0, error, s, dontroute, mlen;
354 int atomic = sosendallatonce(so) || top;
355
356 if (uio)
357 resid = uio->uio_resid;
358 else
359 resid = top->m_pkthdr.len;
360 /*
361 * In theory resid should be unsigned.
362 * However, space must be signed, as it might be less than 0
363 * if we over-committed, and we must use a signed comparison
364 * of space and resid. On the other hand, a negative resid
365 * causes us to loop sending 0-length segments to the protocol.
366 */
367 if (resid < 0) {
368 error = EINVAL;
369 goto out;
370 }
371 dontroute =
372 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
373 (so->so_proto->pr_flags & PR_ATOMIC);
374 p->p_stats->p_ru.ru_msgsnd++;
375 if (control)
376 clen = control->m_len;
377 #define snderr(errno) { error = errno; splx(s); goto release; }
378
379 restart:
380 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
381 goto out;
382 do {
383 s = splsoftnet();
384 if (so->so_state & SS_CANTSENDMORE)
385 snderr(EPIPE);
386 if (so->so_error)
387 snderr(so->so_error);
388 if ((so->so_state & SS_ISCONNECTED) == 0) {
389 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
390 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
391 !(resid == 0 && clen != 0))
392 snderr(ENOTCONN);
393 } else if (addr == 0)
394 snderr(EDESTADDRREQ);
395 }
396 space = sbspace(&so->so_snd);
397 if (flags & MSG_OOB)
398 space += 1024;
399 if ((atomic && resid > so->so_snd.sb_hiwat) ||
400 clen > so->so_snd.sb_hiwat)
401 snderr(EMSGSIZE);
402 if (space < resid + clen && uio &&
403 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
404 if (so->so_state & SS_NBIO)
405 snderr(EWOULDBLOCK);
406 sbunlock(&so->so_snd);
407 error = sbwait(&so->so_snd);
408 splx(s);
409 if (error)
410 goto out;
411 goto restart;
412 }
413 splx(s);
414 mp = ⊤
415 space -= clen;
416 do {
417 if (uio == NULL) {
418 /*
419 * Data is prepackaged in "top".
420 */
421 resid = 0;
422 if (flags & MSG_EOR)
423 top->m_flags |= M_EOR;
424 } else do {
425 if (top == 0) {
426 MGETHDR(m, M_WAIT, MT_DATA);
427 mlen = MHLEN;
428 m->m_pkthdr.len = 0;
429 m->m_pkthdr.rcvif = (struct ifnet *)0;
430 } else {
431 MGET(m, M_WAIT, MT_DATA);
432 mlen = MLEN;
433 }
434 if (resid >= MINCLSIZE && space >= MCLBYTES) {
435 MCLGET(m, M_WAIT);
436 if ((m->m_flags & M_EXT) == 0)
437 goto nopages;
438 mlen = MCLBYTES;
439 #ifdef MAPPED_MBUFS
440 len = min(MCLBYTES, resid);
441 #else
442 if (atomic && top == 0) {
443 len = min(MCLBYTES - max_hdr, resid);
444 m->m_data += max_hdr;
445 } else
446 len = min(MCLBYTES, resid);
447 #endif
448 space -= len;
449 } else {
450 nopages:
451 len = min(min(mlen, resid), space);
452 space -= len;
453 /*
454 * For datagram protocols, leave room
455 * for protocol headers in first mbuf.
456 */
457 if (atomic && top == 0 && len < mlen)
458 MH_ALIGN(m, len);
459 }
460 error = uiomove(mtod(m, caddr_t), (int)len, uio);
461 resid = uio->uio_resid;
462 m->m_len = len;
463 *mp = m;
464 top->m_pkthdr.len += len;
465 if (error)
466 goto release;
467 mp = &m->m_next;
468 if (resid <= 0) {
469 if (flags & MSG_EOR)
470 top->m_flags |= M_EOR;
471 break;
472 }
473 } while (space > 0 && atomic);
474 if (dontroute)
475 so->so_options |= SO_DONTROUTE;
476 s = splsoftnet(); /* XXX */
477 error = (*so->so_proto->pr_usrreq)(so,
478 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
479 top, addr, control, p);
480 splx(s);
481 if (dontroute)
482 so->so_options &= ~SO_DONTROUTE;
483 clen = 0;
484 control = 0;
485 top = 0;
486 mp = ⊤
487 if (error)
488 goto release;
489 } while (resid && space > 0);
490 } while (resid);
491
492 release:
493 sbunlock(&so->so_snd);
494 out:
495 if (top)
496 m_freem(top);
497 if (control)
498 m_freem(control);
499 return (error);
500 }
501
502 /*
503 * Implement receive operations on a socket.
504 * We depend on the way that records are added to the sockbuf
505 * by sbappend*. In particular, each record (mbufs linked through m_next)
506 * must begin with an address if the protocol so specifies,
507 * followed by an optional mbuf or mbufs containing ancillary data,
508 * and then zero or more mbufs of data.
509 * In order to avoid blocking network interrupts for the entire time here,
510 * we splx() while doing the actual copy to user space.
511 * Although the sockbuf is locked, new data may still be appended,
512 * and thus we must maintain consistency of the sockbuf during that time.
513 *
514 * The caller may receive the data as a single mbuf chain by supplying
515 * an mbuf **mp0 for use in returning the chain. The uio is then used
516 * only for the count in uio_resid.
517 */
518 int
519 soreceive(so, paddr, uio, mp0, controlp, flagsp)
520 register struct socket *so;
521 struct mbuf **paddr;
522 struct uio *uio;
523 struct mbuf **mp0;
524 struct mbuf **controlp;
525 int *flagsp;
526 {
527 register struct mbuf *m, **mp;
528 register int flags, len, error, s, offset;
529 struct protosw *pr = so->so_proto;
530 struct mbuf *nextrecord;
531 int moff, type = 0;
532 int orig_resid = uio->uio_resid;
533
534 mp = mp0;
535 if (paddr)
536 *paddr = 0;
537 if (controlp)
538 *controlp = 0;
539 if (flagsp)
540 flags = *flagsp &~ MSG_EOR;
541 else
542 flags = 0;
543 if (flags & MSG_OOB) {
544 m = m_get(M_WAIT, MT_DATA);
545 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
546 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
547 (struct proc *)0);
548 if (error)
549 goto bad;
550 do {
551 error = uiomove(mtod(m, caddr_t),
552 (int) min(uio->uio_resid, m->m_len), uio);
553 m = m_free(m);
554 } while (uio->uio_resid && error == 0 && m);
555 bad:
556 if (m)
557 m_freem(m);
558 return (error);
559 }
560 if (mp)
561 *mp = (struct mbuf *)0;
562 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
563 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
564 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
565
566 restart:
567 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
568 return (error);
569 s = splsoftnet();
570
571 m = so->so_rcv.sb_mb;
572 /*
573 * If we have less data than requested, block awaiting more
574 * (subject to any timeout) if:
575 * 1. the current count is less than the low water mark,
576 * 2. MSG_WAITALL is set, and it is possible to do the entire
577 * receive operation at once if we block (resid <= hiwat), or
578 * 3. MSG_DONTWAIT is not set.
579 * If MSG_WAITALL is set but resid is larger than the receive buffer,
580 * we have to do the receive in sections, and thus risk returning
581 * a short count if a timeout or signal occurs after we start.
582 */
583 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
584 so->so_rcv.sb_cc < uio->uio_resid) &&
585 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
586 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
587 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
588 #ifdef DIAGNOSTIC
589 if (m == 0 && so->so_rcv.sb_cc)
590 panic("receive 1");
591 #endif
592 if (so->so_error) {
593 if (m)
594 goto dontblock;
595 error = so->so_error;
596 if ((flags & MSG_PEEK) == 0)
597 so->so_error = 0;
598 goto release;
599 }
600 if (so->so_state & SS_CANTRCVMORE) {
601 if (m)
602 goto dontblock;
603 else
604 goto release;
605 }
606 for (; m; m = m->m_next)
607 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
608 m = so->so_rcv.sb_mb;
609 goto dontblock;
610 }
611 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
612 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
613 error = ENOTCONN;
614 goto release;
615 }
616 if (uio->uio_resid == 0)
617 goto release;
618 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
619 error = EWOULDBLOCK;
620 goto release;
621 }
622 sbunlock(&so->so_rcv);
623 error = sbwait(&so->so_rcv);
624 splx(s);
625 if (error)
626 return (error);
627 goto restart;
628 }
629 dontblock:
630 #ifdef notyet /* XXXX */
631 if (uio->uio_procp)
632 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
633 #endif
634 nextrecord = m->m_nextpkt;
635 if (pr->pr_flags & PR_ADDR) {
636 #ifdef DIAGNOSTIC
637 if (m->m_type != MT_SONAME)
638 panic("receive 1a");
639 #endif
640 orig_resid = 0;
641 if (flags & MSG_PEEK) {
642 if (paddr)
643 *paddr = m_copy(m, 0, m->m_len);
644 m = m->m_next;
645 } else {
646 sbfree(&so->so_rcv, m);
647 if (paddr) {
648 *paddr = m;
649 so->so_rcv.sb_mb = m->m_next;
650 m->m_next = 0;
651 m = so->so_rcv.sb_mb;
652 } else {
653 MFREE(m, so->so_rcv.sb_mb);
654 m = so->so_rcv.sb_mb;
655 }
656 }
657 }
658 while (m && m->m_type == MT_CONTROL && error == 0) {
659 if (flags & MSG_PEEK) {
660 if (controlp)
661 *controlp = m_copy(m, 0, m->m_len);
662 m = m->m_next;
663 } else {
664 sbfree(&so->so_rcv, m);
665 if (controlp) {
666 if (pr->pr_domain->dom_externalize &&
667 mtod(m, struct cmsghdr *)->cmsg_type ==
668 SCM_RIGHTS)
669 error = (*pr->pr_domain->dom_externalize)(m);
670 *controlp = m;
671 so->so_rcv.sb_mb = m->m_next;
672 m->m_next = 0;
673 m = so->so_rcv.sb_mb;
674 } else {
675 MFREE(m, so->so_rcv.sb_mb);
676 m = so->so_rcv.sb_mb;
677 }
678 }
679 if (controlp) {
680 orig_resid = 0;
681 controlp = &(*controlp)->m_next;
682 }
683 }
684 if (m) {
685 if ((flags & MSG_PEEK) == 0)
686 m->m_nextpkt = nextrecord;
687 type = m->m_type;
688 if (type == MT_OOBDATA)
689 flags |= MSG_OOB;
690 }
691 moff = 0;
692 offset = 0;
693 while (m && uio->uio_resid > 0 && error == 0) {
694 if (m->m_type == MT_OOBDATA) {
695 if (type != MT_OOBDATA)
696 break;
697 } else if (type == MT_OOBDATA)
698 break;
699 #ifdef DIAGNOSTIC
700 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
701 panic("receive 3");
702 #endif
703 so->so_state &= ~SS_RCVATMARK;
704 len = uio->uio_resid;
705 if (so->so_oobmark && len > so->so_oobmark - offset)
706 len = so->so_oobmark - offset;
707 if (len > m->m_len - moff)
708 len = m->m_len - moff;
709 /*
710 * If mp is set, just pass back the mbufs.
711 * Otherwise copy them out via the uio, then free.
712 * Sockbuf must be consistent here (points to current mbuf,
713 * it points to next record) when we drop priority;
714 * we must note any additions to the sockbuf when we
715 * block interrupts again.
716 */
717 if (mp == 0) {
718 splx(s);
719 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
720 s = splsoftnet();
721 } else
722 uio->uio_resid -= len;
723 if (len == m->m_len - moff) {
724 if (m->m_flags & M_EOR)
725 flags |= MSG_EOR;
726 if (flags & MSG_PEEK) {
727 m = m->m_next;
728 moff = 0;
729 } else {
730 nextrecord = m->m_nextpkt;
731 sbfree(&so->so_rcv, m);
732 if (mp) {
733 *mp = m;
734 mp = &m->m_next;
735 so->so_rcv.sb_mb = m = m->m_next;
736 *mp = (struct mbuf *)0;
737 } else {
738 MFREE(m, so->so_rcv.sb_mb);
739 m = so->so_rcv.sb_mb;
740 }
741 if (m)
742 m->m_nextpkt = nextrecord;
743 }
744 } else {
745 if (flags & MSG_PEEK)
746 moff += len;
747 else {
748 if (mp)
749 *mp = m_copym(m, 0, len, M_WAIT);
750 m->m_data += len;
751 m->m_len -= len;
752 so->so_rcv.sb_cc -= len;
753 }
754 }
755 if (so->so_oobmark) {
756 if ((flags & MSG_PEEK) == 0) {
757 so->so_oobmark -= len;
758 if (so->so_oobmark == 0) {
759 so->so_state |= SS_RCVATMARK;
760 break;
761 }
762 } else {
763 offset += len;
764 if (offset == so->so_oobmark)
765 break;
766 }
767 }
768 if (flags & MSG_EOR)
769 break;
770 /*
771 * If the MSG_WAITALL flag is set (for non-atomic socket),
772 * we must not quit until "uio->uio_resid == 0" or an error
773 * termination. If a signal/timeout occurs, return
774 * with a short count but without error.
775 * Keep sockbuf locked against other readers.
776 */
777 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
778 !sosendallatonce(so) && !nextrecord) {
779 if (so->so_error || so->so_state & SS_CANTRCVMORE)
780 break;
781 error = sbwait(&so->so_rcv);
782 if (error) {
783 sbunlock(&so->so_rcv);
784 splx(s);
785 return (0);
786 }
787 if ((m = so->so_rcv.sb_mb) != NULL)
788 nextrecord = m->m_nextpkt;
789 }
790 }
791
792 if (m && pr->pr_flags & PR_ATOMIC) {
793 flags |= MSG_TRUNC;
794 if ((flags & MSG_PEEK) == 0)
795 (void) sbdroprecord(&so->so_rcv);
796 }
797 if ((flags & MSG_PEEK) == 0) {
798 if (m == 0)
799 so->so_rcv.sb_mb = nextrecord;
800 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
801 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
802 (struct mbuf *)(long)flags, (struct mbuf *)0,
803 (struct proc *)0);
804 }
805 if (orig_resid == uio->uio_resid && orig_resid &&
806 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
807 sbunlock(&so->so_rcv);
808 splx(s);
809 goto restart;
810 }
811
812 if (flagsp)
813 *flagsp |= flags;
814 release:
815 sbunlock(&so->so_rcv);
816 splx(s);
817 return (error);
818 }
819
820 int
821 soshutdown(so, how)
822 register struct socket *so;
823 register int how;
824 {
825 register struct protosw *pr = so->so_proto;
826
827 how++;
828 if (how & FREAD)
829 sorflush(so);
830 if (how & FWRITE)
831 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
832 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
833 return (0);
834 }
835
836 void
837 sorflush(so)
838 register struct socket *so;
839 {
840 register struct sockbuf *sb = &so->so_rcv;
841 register struct protosw *pr = so->so_proto;
842 register int s;
843 struct sockbuf asb;
844
845 sb->sb_flags |= SB_NOINTR;
846 (void) sblock(sb, M_WAITOK);
847 s = splimp();
848 socantrcvmore(so);
849 sbunlock(sb);
850 asb = *sb;
851 bzero((caddr_t)sb, sizeof (*sb));
852 splx(s);
853 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
854 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
855 sbrelease(&asb);
856 }
857
858 int
859 sosetopt(so, level, optname, m0)
860 register struct socket *so;
861 int level, optname;
862 struct mbuf *m0;
863 {
864 int error = 0;
865 register struct mbuf *m = m0;
866
867 if (level != SOL_SOCKET) {
868 if (so->so_proto && so->so_proto->pr_ctloutput)
869 return ((*so->so_proto->pr_ctloutput)
870 (PRCO_SETOPT, so, level, optname, &m0));
871 error = ENOPROTOOPT;
872 } else {
873 switch (optname) {
874
875 case SO_LINGER:
876 if (m == NULL || m->m_len != sizeof (struct linger)) {
877 error = EINVAL;
878 goto bad;
879 }
880 so->so_linger = mtod(m, struct linger *)->l_linger;
881 /* fall thru... */
882
883 case SO_DEBUG:
884 case SO_KEEPALIVE:
885 case SO_DONTROUTE:
886 case SO_USELOOPBACK:
887 case SO_BROADCAST:
888 case SO_REUSEADDR:
889 case SO_REUSEPORT:
890 case SO_OOBINLINE:
891 case SO_TIMESTAMP:
892 if (m == NULL || m->m_len < sizeof (int)) {
893 error = EINVAL;
894 goto bad;
895 }
896 if (*mtod(m, int *))
897 so->so_options |= optname;
898 else
899 so->so_options &= ~optname;
900 break;
901
902 case SO_SNDBUF:
903 case SO_RCVBUF:
904 case SO_SNDLOWAT:
905 case SO_RCVLOWAT:
906 {
907 int optval;
908
909 if (m == NULL || m->m_len < sizeof (int)) {
910 error = EINVAL;
911 goto bad;
912 }
913
914 /*
915 * Values < 1 make no sense for any of these
916 * options, so disallow them.
917 */
918 optval = *mtod(m, int *);
919 if (optval < 1) {
920 error = EINVAL;
921 goto bad;
922 }
923
924 switch (optname) {
925
926 case SO_SNDBUF:
927 case SO_RCVBUF:
928 if (sbreserve(optname == SO_SNDBUF ?
929 &so->so_snd : &so->so_rcv,
930 (u_long) optval) == 0) {
931 error = ENOBUFS;
932 goto bad;
933 }
934 break;
935
936 /*
937 * Make sure the low-water is never greater than
938 * the high-water.
939 */
940 case SO_SNDLOWAT:
941 so->so_snd.sb_lowat =
942 (optval > so->so_snd.sb_hiwat) ?
943 so->so_snd.sb_hiwat : optval;
944 break;
945 case SO_RCVLOWAT:
946 so->so_rcv.sb_lowat =
947 (optval > so->so_rcv.sb_hiwat) ?
948 so->so_rcv.sb_hiwat : optval;
949 break;
950 }
951 break;
952 }
953
954 case SO_SNDTIMEO:
955 case SO_RCVTIMEO:
956 {
957 struct timeval *tv;
958 short val;
959
960 if (m == NULL || m->m_len < sizeof (*tv)) {
961 error = EINVAL;
962 goto bad;
963 }
964 tv = mtod(m, struct timeval *);
965 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
966 error = EDOM;
967 goto bad;
968 }
969 val = tv->tv_sec * hz + tv->tv_usec / tick;
970
971 switch (optname) {
972
973 case SO_SNDTIMEO:
974 so->so_snd.sb_timeo = val;
975 break;
976 case SO_RCVTIMEO:
977 so->so_rcv.sb_timeo = val;
978 break;
979 }
980 break;
981 }
982
983 default:
984 error = ENOPROTOOPT;
985 break;
986 }
987 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
988 (void) ((*so->so_proto->pr_ctloutput)
989 (PRCO_SETOPT, so, level, optname, &m0));
990 m = NULL; /* freed by protocol */
991 }
992 }
993 bad:
994 if (m)
995 (void) m_free(m);
996 return (error);
997 }
998
999 int
1000 sogetopt(so, level, optname, mp)
1001 register struct socket *so;
1002 int level, optname;
1003 struct mbuf **mp;
1004 {
1005 register struct mbuf *m;
1006
1007 if (level != SOL_SOCKET) {
1008 if (so->so_proto && so->so_proto->pr_ctloutput) {
1009 return ((*so->so_proto->pr_ctloutput)
1010 (PRCO_GETOPT, so, level, optname, mp));
1011 } else
1012 return (ENOPROTOOPT);
1013 } else {
1014 m = m_get(M_WAIT, MT_SOOPTS);
1015 m->m_len = sizeof (int);
1016
1017 switch (optname) {
1018
1019 case SO_LINGER:
1020 m->m_len = sizeof (struct linger);
1021 mtod(m, struct linger *)->l_onoff =
1022 so->so_options & SO_LINGER;
1023 mtod(m, struct linger *)->l_linger = so->so_linger;
1024 break;
1025
1026 case SO_USELOOPBACK:
1027 case SO_DONTROUTE:
1028 case SO_DEBUG:
1029 case SO_KEEPALIVE:
1030 case SO_REUSEADDR:
1031 case SO_REUSEPORT:
1032 case SO_BROADCAST:
1033 case SO_OOBINLINE:
1034 case SO_TIMESTAMP:
1035 *mtod(m, int *) = so->so_options & optname;
1036 break;
1037
1038 case SO_TYPE:
1039 *mtod(m, int *) = so->so_type;
1040 break;
1041
1042 case SO_ERROR:
1043 *mtod(m, int *) = so->so_error;
1044 so->so_error = 0;
1045 break;
1046
1047 case SO_SNDBUF:
1048 *mtod(m, int *) = so->so_snd.sb_hiwat;
1049 break;
1050
1051 case SO_RCVBUF:
1052 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1053 break;
1054
1055 case SO_SNDLOWAT:
1056 *mtod(m, int *) = so->so_snd.sb_lowat;
1057 break;
1058
1059 case SO_RCVLOWAT:
1060 *mtod(m, int *) = so->so_rcv.sb_lowat;
1061 break;
1062
1063 case SO_SNDTIMEO:
1064 case SO_RCVTIMEO:
1065 {
1066 int val = (optname == SO_SNDTIMEO ?
1067 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1068
1069 m->m_len = sizeof(struct timeval);
1070 mtod(m, struct timeval *)->tv_sec = val / hz;
1071 mtod(m, struct timeval *)->tv_usec =
1072 (val % hz) * tick;
1073 break;
1074 }
1075
1076 default:
1077 (void)m_free(m);
1078 return (ENOPROTOOPT);
1079 }
1080 *mp = m;
1081 return (0);
1082 }
1083 }
1084
1085 void
1086 sohasoutofband(so)
1087 register struct socket *so;
1088 {
1089 struct proc *p;
1090
1091 if (so->so_pgid < 0)
1092 gsignal(-so->so_pgid, SIGURG);
1093 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1094 psignal(p, SIGURG);
1095 selwakeup(&so->so_rcv.sb_sel);
1096 }
1097