tcp_usrreq.c revision 1.4.4.1 1 /*
2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from: @(#)tcp_usrreq.c 7.15 (Berkeley) 6/28/90
34 * $Id: tcp_usrreq.c,v 1.4.4.1 1993/09/24 08:55:02 mycroft Exp $
35 */
36
37 #include "param.h"
38 #include "systm.h"
39 #include "malloc.h"
40 #include "mbuf.h"
41 #include "socket.h"
42 #include "socketvar.h"
43 #include "protosw.h"
44 #include "errno.h"
45 #include "stat.h"
46
47 #include "machine/cpu.h"
48
49 #include "../net/if.h"
50 #include "../net/route.h"
51
52 #include "in.h"
53 #include "in_systm.h"
54 #include "ip.h"
55 #include "in_pcb.h"
56 #include "ip_var.h"
57 #include "tcp.h"
58 #include "tcp_fsm.h"
59 #include "tcp_seq.h"
60 #include "tcp_timer.h"
61 #include "tcp_var.h"
62 #include "tcpip.h"
63 #include "tcp_debug.h"
64
65 /*
66 * TCP protocol interface to socket abstraction.
67 */
68 extern char *tcpstates[];
69 struct tcpcb *tcp_newtcpcb();
70
71 /*
72 * Process a TCP user request for TCP tb. If this is a send request
73 * then m is the mbuf chain of send data. If this is a timer expiration
74 * (called from the software clock routine), then timertype tells which timer.
75 */
76 /*ARGSUSED*/
77 tcp_usrreq(so, req, m, nam, control)
78 struct socket *so;
79 int req;
80 struct mbuf *m, *nam, *control;
81 {
82 register struct inpcb *inp;
83 register struct tcpcb *tp;
84 int s;
85 int error = 0;
86 int ostate;
87
88 if (req == PRU_CONTROL)
89 return (in_control(so, (int)m, (caddr_t)nam,
90 (struct ifnet *)control));
91 if (control && control->m_len) {
92 m_freem(control);
93 if (m)
94 m_freem(m);
95 return (EINVAL);
96 }
97
98 s = splnet();
99 inp = sotoinpcb(so);
100 /*
101 * When a TCP is attached to a socket, then there will be
102 * a (struct inpcb) pointed at by the socket, and this
103 * structure will point at a subsidary (struct tcpcb).
104 */
105 if (inp == 0 && req != PRU_ATTACH) {
106 splx(s);
107 return (EINVAL); /* XXX */
108 }
109 if (inp) {
110 tp = intotcpcb(inp);
111 /* WHAT IF TP IS 0? */
112 #ifdef KPROF
113 tcp_acounts[tp->t_state][req]++;
114 #endif
115 ostate = tp->t_state;
116 } else
117 ostate = 0;
118 switch (req) {
119
120 /*
121 * TCP attaches to socket via PRU_ATTACH, reserving space,
122 * and an internet control block.
123 */
124 case PRU_ATTACH:
125 if (inp) {
126 error = EISCONN;
127 break;
128 }
129 error = tcp_attach(so);
130 if (error)
131 break;
132 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
133 so->so_linger = TCP_LINGERTIME;
134 tp = sototcpcb(so);
135 break;
136
137 /*
138 * PRU_DETACH detaches the TCP protocol from the socket.
139 * If the protocol state is non-embryonic, then can't
140 * do this directly: have to initiate a PRU_DISCONNECT,
141 * which may finish later; embryonic TCB's can just
142 * be discarded here.
143 */
144 case PRU_DETACH:
145 if (tp->t_state > TCPS_LISTEN)
146 tp = tcp_disconnect(tp);
147 else
148 tp = tcp_close(tp);
149 break;
150
151 /*
152 * Give the socket an address.
153 */
154 case PRU_BIND:
155 error = in_pcbbind(inp, nam);
156 if (error)
157 break;
158 break;
159
160 /*
161 * Prepare to accept connections.
162 */
163 case PRU_LISTEN:
164 if (inp->inp_lport == 0)
165 error = in_pcbbind(inp, (struct mbuf *)0);
166 if (error == 0)
167 tp->t_state = TCPS_LISTEN;
168 break;
169
170 /*
171 * Initiate connection to peer.
172 * Create a template for use in transmissions on this connection.
173 * Enter SYN_SENT state, and mark socket as connecting.
174 * Start keep-alive timer, and seed output sequence space.
175 * Send initial segment on connection.
176 */
177 case PRU_CONNECT:
178 if (inp->inp_lport == 0) {
179 error = in_pcbbind(inp, (struct mbuf *)0);
180 if (error)
181 break;
182 }
183 error = in_pcbconnect(inp, nam);
184 if (error)
185 break;
186 tp->t_template = tcp_template(tp);
187 if (tp->t_template == 0) {
188 in_pcbdisconnect(inp);
189 error = ENOBUFS;
190 break;
191 }
192 soisconnecting(so);
193 tcpstat.tcps_connattempt++;
194 tp->t_state = TCPS_SYN_SENT;
195 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
196 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
197 tcp_sendseqinit(tp);
198 error = tcp_output(tp);
199 break;
200
201 /*
202 * Create a TCP connection between two sockets.
203 */
204 case PRU_CONNECT2:
205 error = EOPNOTSUPP;
206 break;
207
208 /*
209 * Initiate disconnect from peer.
210 * If connection never passed embryonic stage, just drop;
211 * else if don't need to let data drain, then can just drop anyways,
212 * else have to begin TCP shutdown process: mark socket disconnecting,
213 * drain unread data, state switch to reflect user close, and
214 * send segment (e.g. FIN) to peer. Socket will be really disconnected
215 * when peer sends FIN and acks ours.
216 *
217 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
218 */
219 case PRU_DISCONNECT:
220 tp = tcp_disconnect(tp);
221 break;
222
223 /*
224 * Accept a connection. Essentially all the work is
225 * done at higher levels; just return the address
226 * of the peer, storing through addr.
227 */
228 case PRU_ACCEPT: {
229 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
230
231 nam->m_len = sizeof (struct sockaddr_in);
232 sin->sin_family = AF_INET;
233 sin->sin_len = sizeof(*sin);
234 sin->sin_port = inp->inp_fport;
235 sin->sin_addr = inp->inp_faddr;
236 break;
237 }
238
239 /*
240 * Mark the connection as being incapable of further output.
241 */
242 case PRU_SHUTDOWN:
243 socantsendmore(so);
244 tp = tcp_usrclosed(tp);
245 if (tp)
246 error = tcp_output(tp);
247 break;
248
249 /*
250 * After a receive, possibly send window update to peer.
251 */
252 case PRU_RCVD:
253 (void) tcp_output(tp);
254 break;
255
256 /*
257 * Do a send by putting data in output queue and updating urgent
258 * marker if URG set. Possibly send more data.
259 */
260 case PRU_SEND:
261 sbappend(&so->so_snd, m);
262 error = tcp_output(tp);
263 break;
264
265 /*
266 * Abort the TCP.
267 */
268 case PRU_ABORT:
269 tp = tcp_drop(tp, ECONNABORTED);
270 break;
271
272 case PRU_SENSE:
273 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
274 (void) splx(s);
275 return (0);
276
277 case PRU_RCVOOB:
278 if ((so->so_oobmark == 0 &&
279 (so->so_state & SS_RCVATMARK) == 0) ||
280 so->so_options & SO_OOBINLINE ||
281 tp->t_oobflags & TCPOOB_HADDATA) {
282 error = EINVAL;
283 break;
284 }
285 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
286 error = EWOULDBLOCK;
287 break;
288 }
289 m->m_len = 1;
290 *mtod(m, caddr_t) = tp->t_iobc;
291 if (((int)nam & MSG_PEEK) == 0)
292 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
293 break;
294
295 case PRU_SENDOOB:
296 if (sbspace(&so->so_snd) < -512) {
297 m_freem(m);
298 error = ENOBUFS;
299 break;
300 }
301 /*
302 * According to RFC961 (Assigned Protocols),
303 * the urgent pointer points to the last octet
304 * of urgent data. We continue, however,
305 * to consider it to indicate the first octet
306 * of data past the urgent section.
307 * Otherwise, snd_up should be one lower.
308 */
309 sbappend(&so->so_snd, m);
310 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
311 tp->t_force = 1;
312 error = tcp_output(tp);
313 tp->t_force = 0;
314 break;
315
316 case PRU_SOCKADDR:
317 in_setsockaddr(inp, nam);
318 break;
319
320 case PRU_PEERADDR:
321 in_setpeeraddr(inp, nam);
322 break;
323
324 /*
325 * TCP slow timer went off; going through this
326 * routine for tracing's sake.
327 */
328 case PRU_SLOWTIMO:
329 tp = tcp_timers(tp, (int)nam);
330 req |= (int)nam << 8; /* for debug's sake */
331 break;
332
333 default:
334 panic("tcp_usrreq");
335 }
336 if (tp && (so->so_options & SO_DEBUG))
337 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
338 splx(s);
339 return (error);
340 }
341
342 tcp_ctloutput(op, so, level, optname, mp)
343 int op;
344 struct socket *so;
345 int level, optname;
346 struct mbuf **mp;
347 {
348 int error = 0;
349 struct inpcb *inp = sotoinpcb(so);
350 register struct tcpcb *tp = intotcpcb(inp);
351 register struct mbuf *m;
352
353 if (level != IPPROTO_TCP)
354 return (ip_ctloutput(op, so, level, optname, mp));
355
356 switch (op) {
357
358 case PRCO_SETOPT:
359 m = *mp;
360 switch (optname) {
361
362 case TCP_NODELAY:
363 if (m == NULL || m->m_len < sizeof (int))
364 error = EINVAL;
365 else if (*mtod(m, int *))
366 tp->t_flags |= TF_NODELAY;
367 else
368 tp->t_flags &= ~TF_NODELAY;
369 break;
370
371 case TCP_MAXSEG: /* not yet */
372 default:
373 error = EINVAL;
374 break;
375 }
376 if (m)
377 (void) m_free(m);
378 break;
379
380 case PRCO_GETOPT:
381 *mp = m = m_get(M_WAIT, MT_SOOPTS);
382 m->m_len = sizeof(int);
383
384 switch (optname) {
385 case TCP_NODELAY:
386 *mtod(m, int *) = tp->t_flags & TF_NODELAY;
387 break;
388 case TCP_MAXSEG:
389 *mtod(m, int *) = tp->t_maxseg;
390 break;
391 default:
392 error = EINVAL;
393 break;
394 }
395 break;
396 }
397 return (error);
398 }
399
400 u_long tcp_sendspace = 1024*8;
401 u_long tcp_recvspace = 1024*8;
402
403 /*
404 * Attach TCP protocol to socket, allocating
405 * internet protocol control block, tcp control block,
406 * bufer space, and entering LISTEN state if to accept connections.
407 */
408 tcp_attach(so)
409 struct socket *so;
410 {
411 register struct tcpcb *tp;
412 struct inpcb *inp;
413 int error;
414
415 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
416 error = soreserve(so, tcp_sendspace, tcp_recvspace);
417 if (error)
418 return (error);
419 }
420 error = in_pcballoc(so, &tcb);
421 if (error)
422 return (error);
423 inp = sotoinpcb(so);
424 tp = tcp_newtcpcb(inp);
425 if (tp == 0) {
426 int nofd = so->so_state & SS_NOFDREF; /* XXX */
427
428 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
429 in_pcbdetach(inp);
430 so->so_state |= nofd;
431 return (ENOBUFS);
432 }
433 tp->t_state = TCPS_CLOSED;
434 return (0);
435 }
436
437 /*
438 * Initiate (or continue) disconnect.
439 * If embryonic state, just send reset (once).
440 * If in ``let data drain'' option and linger null, just drop.
441 * Otherwise (hard), mark socket disconnecting and drop
442 * current input data; switch states based on user close, and
443 * send segment to peer (with FIN).
444 */
445 struct tcpcb *
446 tcp_disconnect(tp)
447 register struct tcpcb *tp;
448 {
449 struct socket *so = tp->t_inpcb->inp_socket;
450
451 if (tp->t_state < TCPS_ESTABLISHED)
452 tp = tcp_close(tp);
453 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
454 tp = tcp_drop(tp, 0);
455 else {
456 soisdisconnecting(so);
457 sbflush(&so->so_rcv);
458 tp = tcp_usrclosed(tp);
459 if (tp)
460 (void) tcp_output(tp);
461 }
462 return (tp);
463 }
464
465 /*
466 * User issued close, and wish to trail through shutdown states:
467 * if never received SYN, just forget it. If got a SYN from peer,
468 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
469 * If already got a FIN from peer, then almost done; go to LAST_ACK
470 * state. In all other cases, have already sent FIN to peer (e.g.
471 * after PRU_SHUTDOWN), and just have to play tedious game waiting
472 * for peer to send FIN or not respond to keep-alives, etc.
473 * We can let the user exit from the close as soon as the FIN is acked.
474 */
475 struct tcpcb *
476 tcp_usrclosed(tp)
477 register struct tcpcb *tp;
478 {
479
480 switch (tp->t_state) {
481
482 case TCPS_CLOSED:
483 case TCPS_LISTEN:
484 case TCPS_SYN_SENT:
485 tp->t_state = TCPS_CLOSED;
486 tp = tcp_close(tp);
487 break;
488
489 case TCPS_SYN_RECEIVED:
490 case TCPS_ESTABLISHED:
491 tp->t_state = TCPS_FIN_WAIT_1;
492 break;
493
494 case TCPS_CLOSE_WAIT:
495 tp->t_state = TCPS_LAST_ACK;
496 break;
497 }
498 if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
499 soisdisconnected(tp->t_inpcb->inp_socket);
500 return (tp);
501 }
502