kttcp.c revision 1.1 1 /* $NetBSD: kttcp.c,v 1.1 2002/06/28 23:27:14 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Frank van der Linden and Jason R. Thorpe for
8 * Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project by
21 * Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 * or promote products derived from this software without specific prior
24 * written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * kttcp.c --
41 *
42 * This module provides kernel support for testing network
43 * throughput from the perspective of the kernel. It is
44 * similar in spirit to the classic ttcp network benchmark
45 * program, the main difference being that with kttcp, the
46 * kernel is the source and sink of the data.
47 *
48 * Testing like this is useful for a few reasons:
49 *
50 * 1. This allows us to know what kind of performance we can
51 * expect from network applications that run in the kernel
52 * space, such as the NFS server or the NFS client. These
53 * applications don't have to move the data to/from userspace,
54 * and so benchmark programs which run in userspace don't
55 * give us an accurate model.
56 *
57 * 2. Since data received is just thrown away, the receiver
58 * is very fast. This can provide better exercise for the
59 * sender at the other end.
60 *
61 * 3. Since the NetBSD kernel currently uses a run-to-completion
62 * scheduling model, kttcp provides a benchmark model where
63 * preemption of the benchmark program is not an issue.
64 */
65
66 #include <sys/param.h>
67 #include <sys/types.h>
68 #include <sys/ioctl.h>
69 #include <sys/file.h>
70 #include <sys/filedesc.h>
71 #include <sys/conf.h>
72 #include <sys/systm.h>
73 #include <sys/protosw.h>
74 #include <sys/proc.h>
75 #include <sys/resourcevar.h>
76 #include <sys/signal.h>
77 #include <sys/socketvar.h>
78 #include <sys/socket.h>
79 #include <sys/mbuf.h>
80 #include <sys/mount.h>
81 #include <sys/syscallargs.h>
82
83 #include <dev/kttcpio.h>
84
85 static int kttcp_send(struct proc *p, struct kttcp_io_args *);
86 static int kttcp_recv(struct proc *p, struct kttcp_io_args *);
87 static int kttcp_sosend(struct socket *, unsigned long long,
88 unsigned long long *, struct proc *, int);
89 static int kttcp_soreceive(struct socket *, unsigned long long,
90 unsigned long long *, struct proc *, int *);
91
92 void kttcpattach(int);
93
94 cdev_decl(kttcp);
95
96 void
97 kttcpattach(int count)
98 {
99 /* Do nothing. */
100 }
101
102 int
103 kttcpopen(dev_t dev, int flags, int fmt, struct proc *p)
104 {
105
106 /* Always succeeds. */
107 return (0);
108 }
109
110 int
111 kttcpclose(dev_t dev, int flags, int fmt, struct proc *p)
112 {
113
114 /* Always succeeds. */
115 return (0);
116 }
117
118 int
119 kttcpioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
120 {
121 int error;
122
123 if ((flag & FWRITE) == 0)
124 return EPERM;
125
126 switch (cmd) {
127 case KTTCP_IO_SEND:
128 error = kttcp_send(p, (struct kttcp_io_args *) data);
129 break;
130
131 case KTTCP_IO_RECV:
132 error = kttcp_recv(p, (struct kttcp_io_args *) data);
133 break;
134
135 default:
136 return EINVAL;
137 }
138
139 return error;
140 }
141
142 static int
143 kttcp_send(struct proc *p, struct kttcp_io_args *kio)
144 {
145 struct file *fp;
146 int error;
147 struct timeval t0, t1;
148 unsigned long long len, done;
149
150 if (kio->kio_totalsize >= KTTCP_MAX_XMIT)
151 return EINVAL;
152
153 fp = fd_getfile(p->p_fd, kio->kio_socket);
154 if (fp == NULL)
155 return EBADF;
156 if (fp->f_type != DTYPE_SOCKET)
157 return EFTYPE;
158
159 len = kio->kio_totalsize;
160 microtime(&t0);
161 do {
162 error = kttcp_sosend((struct socket *)fp->f_data, len,
163 &done, p, 0);
164 len -= done;
165 } while (error == 0 && len > 0);
166 microtime(&t1);
167 if (error != 0)
168 return error;
169 timersub(&t1, &t0, &kio->kio_elapsed);
170
171 kio->kio_bytesdone = kio->kio_totalsize - len;
172
173 return 0;
174 }
175
176 static int
177 kttcp_recv(struct proc *p, struct kttcp_io_args *kio)
178 {
179 struct file *fp;
180 int error;
181 struct timeval t0, t1;
182 unsigned long long len, done;
183
184 if (kio->kio_totalsize > KTTCP_MAX_XMIT)
185 return EINVAL;
186
187 fp = fd_getfile(p->p_fd, kio->kio_socket);
188 if (fp == NULL || fp->f_type != DTYPE_SOCKET)
189 return EBADF;
190 len = kio->kio_totalsize;
191 microtime(&t0);
192 do {
193 error = kttcp_soreceive((struct socket *)fp->f_data,
194 len, &done, p, NULL);
195 len -= done;
196 } while (error == 0 && len > 0 && done > 0);
197 microtime(&t1);
198 if (error == EPIPE)
199 error = 0;
200 if (error != 0)
201 return error;
202 timersub(&t1, &t0, &kio->kio_elapsed);
203
204 kio->kio_bytesdone = kio->kio_totalsize - len;
205
206 return 0;
207 }
208
209 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
210
211 /*
212 * Slightly changed version of sosend()
213 */
214 static int
215 kttcp_sosend(struct socket *so, unsigned long long slen,
216 unsigned long long *done, struct proc *p, int flags)
217 {
218 struct mbuf **mp, *m, *top;
219 long space, len, mlen;
220 int error, s, dontroute, atomic;
221 long long resid;
222
223 atomic = sosendallatonce(so);
224 resid = slen;
225 top = NULL;
226 /*
227 * In theory resid should be unsigned.
228 * However, space must be signed, as it might be less than 0
229 * if we over-committed, and we must use a signed comparison
230 * of space and resid. On the other hand, a negative resid
231 * causes us to loop sending 0-length segments to the protocol.
232 */
233 if (resid < 0) {
234 error = EINVAL;
235 goto out;
236 }
237 dontroute =
238 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
239 (so->so_proto->pr_flags & PR_ATOMIC);
240 p->p_stats->p_ru.ru_msgsnd++;
241 #define snderr(errno) { error = errno; splx(s); goto release; }
242
243 restart:
244 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
245 goto out;
246 do {
247 s = splsoftnet();
248 if (so->so_state & SS_CANTSENDMORE)
249 snderr(EPIPE);
250 if (so->so_error) {
251 error = so->so_error;
252 so->so_error = 0;
253 splx(s);
254 goto release;
255 }
256 if ((so->so_state & SS_ISCONNECTED) == 0) {
257 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
258 if ((so->so_state & SS_ISCONFIRMING) == 0)
259 snderr(ENOTCONN);
260 } else
261 snderr(EDESTADDRREQ);
262 }
263 space = sbspace(&so->so_snd);
264 if (flags & MSG_OOB)
265 space += 1024;
266 if ((atomic && resid > so->so_snd.sb_hiwat))
267 snderr(EMSGSIZE);
268 if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
269 if (so->so_state & SS_NBIO)
270 snderr(EWOULDBLOCK);
271 sbunlock(&so->so_snd);
272 error = sbwait(&so->so_snd);
273 splx(s);
274 if (error)
275 goto out;
276 goto restart;
277 }
278 splx(s);
279 mp = ⊤
280 do {
281 do {
282 if (top == 0) {
283 MGETHDR(m, M_WAIT, MT_DATA);
284 mlen = MHLEN;
285 m->m_pkthdr.len = 0;
286 m->m_pkthdr.rcvif = (struct ifnet *)0;
287 } else {
288 MGET(m, M_WAIT, MT_DATA);
289 mlen = MLEN;
290 }
291 if (resid >= MINCLSIZE && space >= MCLBYTES) {
292 MCLGET(m, M_WAIT);
293 if ((m->m_flags & M_EXT) == 0)
294 goto nopages;
295 mlen = MCLBYTES;
296 #ifdef MAPPED_MBUFS
297 len = lmin(MCLBYTES, resid);
298 #else
299 if (atomic && top == 0) {
300 len = lmin(MCLBYTES - max_hdr,
301 resid);
302 m->m_data += max_hdr;
303 } else
304 len = lmin(MCLBYTES, resid);
305 #endif
306 space -= len;
307 } else {
308 nopages:
309 len = lmin(lmin(mlen, resid), space);
310 space -= len;
311 /*
312 * For datagram protocols, leave room
313 * for protocol headers in first mbuf.
314 */
315 if (atomic && top == 0 && len < mlen)
316 MH_ALIGN(m, len);
317 }
318 resid -= len;
319 m->m_len = len;
320 *mp = m;
321 top->m_pkthdr.len += len;
322 if (error)
323 goto release;
324 mp = &m->m_next;
325 if (resid <= 0) {
326 if (flags & MSG_EOR)
327 top->m_flags |= M_EOR;
328 break;
329 }
330 } while (space > 0 && atomic);
331
332 s = splsoftnet();
333
334 if (so->so_state & SS_CANTSENDMORE)
335 snderr(EPIPE);
336
337 if (dontroute)
338 so->so_options |= SO_DONTROUTE;
339 if (resid > 0)
340 so->so_state |= SS_MORETOCOME;
341 error = (*so->so_proto->pr_usrreq)(so,
342 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
343 top, NULL, NULL, p);
344 if (dontroute)
345 so->so_options &= ~SO_DONTROUTE;
346 if (resid > 0)
347 so->so_state &= ~SS_MORETOCOME;
348 splx(s);
349
350 top = 0;
351 mp = ⊤
352 if (error)
353 goto release;
354 } while (resid && space > 0);
355 } while (resid);
356
357 release:
358 sbunlock(&so->so_snd);
359 out:
360 if (top)
361 m_freem(top);
362 *done = slen - resid;
363 #if 0
364 printf("sosend: error %d slen %llu resid %lld\n", error, slen, resid);
365 #endif
366 return (error);
367 }
368
369 static int
370 kttcp_soreceive(struct socket *so, unsigned long long slen,
371 unsigned long long *done, struct proc *p, int *flagsp)
372 {
373 struct mbuf *m, **mp;
374 int flags, len, error, s, offset, moff, type;
375 long long orig_resid, resid;
376 struct protosw *pr;
377 struct mbuf *nextrecord;
378
379 pr = so->so_proto;
380 mp = NULL;
381 type = 0;
382 resid = orig_resid = slen;
383 if (flagsp)
384 flags = *flagsp &~ MSG_EOR;
385 else
386 flags = 0;
387 if (flags & MSG_OOB) {
388 m = m_get(M_WAIT, MT_DATA);
389 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
390 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
391 (struct proc *)0);
392 if (error)
393 goto bad;
394 do {
395 resid -= min(resid, m->m_len);
396 m = m_free(m);
397 } while (resid && error == 0 && m);
398 bad:
399 if (m)
400 m_freem(m);
401 return (error);
402 }
403 if (mp)
404 *mp = (struct mbuf *)0;
405 if (so->so_state & SS_ISCONFIRMING && resid)
406 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
407 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
408
409 restart:
410 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
411 return (error);
412 s = splsoftnet();
413
414 m = so->so_rcv.sb_mb;
415 /*
416 * If we have less data than requested, block awaiting more
417 * (subject to any timeout) if:
418 * 1. the current count is less than the low water mark,
419 * 2. MSG_WAITALL is set, and it is possible to do the entire
420 * receive operation at once if we block (resid <= hiwat), or
421 * 3. MSG_DONTWAIT is not set.
422 * If MSG_WAITALL is set but resid is larger than the receive buffer,
423 * we have to do the receive in sections, and thus risk returning
424 * a short count if a timeout or signal occurs after we start.
425 */
426 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
427 so->so_rcv.sb_cc < resid) &&
428 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
429 ((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) &&
430 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
431 #ifdef DIAGNOSTIC
432 if (m == 0 && so->so_rcv.sb_cc)
433 panic("receive 1");
434 #endif
435 if (so->so_error) {
436 if (m)
437 goto dontblock;
438 error = so->so_error;
439 if ((flags & MSG_PEEK) == 0)
440 so->so_error = 0;
441 goto release;
442 }
443 if (so->so_state & SS_CANTRCVMORE) {
444 if (m)
445 goto dontblock;
446 else
447 goto release;
448 }
449 for (; m; m = m->m_next)
450 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
451 m = so->so_rcv.sb_mb;
452 goto dontblock;
453 }
454 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
455 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
456 error = ENOTCONN;
457 goto release;
458 }
459 if (resid == 0)
460 goto release;
461 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
462 error = EWOULDBLOCK;
463 goto release;
464 }
465 sbunlock(&so->so_rcv);
466 error = sbwait(&so->so_rcv);
467 splx(s);
468 if (error)
469 return (error);
470 goto restart;
471 }
472 dontblock:
473 #ifdef notyet /* XXXX */
474 if (uio->uio_procp)
475 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
476 #endif
477 nextrecord = m->m_nextpkt;
478 if (pr->pr_flags & PR_ADDR) {
479 #ifdef DIAGNOSTIC
480 if (m->m_type != MT_SONAME)
481 panic("receive 1a");
482 #endif
483 orig_resid = 0;
484 if (flags & MSG_PEEK) {
485 m = m->m_next;
486 } else {
487 sbfree(&so->so_rcv, m);
488 MFREE(m, so->so_rcv.sb_mb);
489 m = so->so_rcv.sb_mb;
490 }
491 }
492 while (m && m->m_type == MT_CONTROL && error == 0) {
493 if (flags & MSG_PEEK) {
494 m = m->m_next;
495 } else {
496 sbfree(&so->so_rcv, m);
497 MFREE(m, so->so_rcv.sb_mb);
498 m = so->so_rcv.sb_mb;
499 }
500 }
501 if (m) {
502 if ((flags & MSG_PEEK) == 0)
503 m->m_nextpkt = nextrecord;
504 type = m->m_type;
505 if (type == MT_OOBDATA)
506 flags |= MSG_OOB;
507 }
508 moff = 0;
509 offset = 0;
510 while (m && resid > 0 && error == 0) {
511 if (m->m_type == MT_OOBDATA) {
512 if (type != MT_OOBDATA)
513 break;
514 } else if (type == MT_OOBDATA)
515 break;
516 #ifdef DIAGNOSTIC
517 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
518 panic("receive 3");
519 #endif
520 so->so_state &= ~SS_RCVATMARK;
521 len = resid;
522 if (so->so_oobmark && len > so->so_oobmark - offset)
523 len = so->so_oobmark - offset;
524 if (len > m->m_len - moff)
525 len = m->m_len - moff;
526 /*
527 * If mp is set, just pass back the mbufs.
528 * Otherwise copy them out via the uio, then free.
529 * Sockbuf must be consistent here (points to current mbuf,
530 * it points to next record) when we drop priority;
531 * we must note any additions to the sockbuf when we
532 * block interrupts again.
533 */
534 resid -= len;
535 if (len == m->m_len - moff) {
536 if (m->m_flags & M_EOR)
537 flags |= MSG_EOR;
538 if (flags & MSG_PEEK) {
539 m = m->m_next;
540 moff = 0;
541 } else {
542 nextrecord = m->m_nextpkt;
543 sbfree(&so->so_rcv, m);
544 if (mp) {
545 *mp = m;
546 mp = &m->m_next;
547 so->so_rcv.sb_mb = m = m->m_next;
548 *mp = (struct mbuf *)0;
549 } else {
550 MFREE(m, so->so_rcv.sb_mb);
551 m = so->so_rcv.sb_mb;
552 }
553 if (m)
554 m->m_nextpkt = nextrecord;
555 }
556 } else {
557 if (flags & MSG_PEEK)
558 moff += len;
559 else {
560 if (mp)
561 *mp = m_copym(m, 0, len, M_WAIT);
562 m->m_data += len;
563 m->m_len -= len;
564 so->so_rcv.sb_cc -= len;
565 }
566 }
567 if (so->so_oobmark) {
568 if ((flags & MSG_PEEK) == 0) {
569 so->so_oobmark -= len;
570 if (so->so_oobmark == 0) {
571 so->so_state |= SS_RCVATMARK;
572 break;
573 }
574 } else {
575 offset += len;
576 if (offset == so->so_oobmark)
577 break;
578 }
579 }
580 if (flags & MSG_EOR)
581 break;
582 /*
583 * If the MSG_WAITALL flag is set (for non-atomic socket),
584 * we must not quit until "uio->uio_resid == 0" or an error
585 * termination. If a signal/timeout occurs, return
586 * with a short count but without error.
587 * Keep sockbuf locked against other readers.
588 */
589 while (flags & MSG_WAITALL && m == 0 && resid > 0 &&
590 !sosendallatonce(so) && !nextrecord) {
591 if (so->so_error || so->so_state & SS_CANTRCVMORE)
592 break;
593 error = sbwait(&so->so_rcv);
594 if (error) {
595 sbunlock(&so->so_rcv);
596 splx(s);
597 return (0);
598 }
599 if ((m = so->so_rcv.sb_mb) != NULL)
600 nextrecord = m->m_nextpkt;
601 }
602 }
603
604 if (m && pr->pr_flags & PR_ATOMIC) {
605 flags |= MSG_TRUNC;
606 if ((flags & MSG_PEEK) == 0)
607 (void) sbdroprecord(&so->so_rcv);
608 }
609 if ((flags & MSG_PEEK) == 0) {
610 if (m == 0)
611 so->so_rcv.sb_mb = nextrecord;
612 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
613 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
614 (struct mbuf *)(long)flags, (struct mbuf *)0,
615 (struct proc *)0);
616 }
617 if (orig_resid == resid && orig_resid &&
618 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
619 sbunlock(&so->so_rcv);
620 splx(s);
621 goto restart;
622 }
623
624 if (flagsp)
625 *flagsp |= flags;
626 release:
627 sbunlock(&so->so_rcv);
628 splx(s);
629 *done = slen - resid;
630 #if 0
631 printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid);
632 #endif
633 return (error);
634 }
635