Home | History | Annotate | Line # | Download | only in kern
sys_pipe.c revision 1.4.2.11
      1 /*	$NetBSD: sys_pipe.c,v 1.4.2.11 2002/04/01 07:47:57 nathanw Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1996 John S. Dyson
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice immediately at the beginning of the file, without modification,
     12  *    this list of conditions, and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. Absolutely no warranty of function or purpose is made by the author
     17  *    John S. Dyson.
     18  * 4. Modifications may be freely made to this file if the above conditions
     19  *    are met.
     20  *
     21  * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.95 2002/03/09 22:06:31 alfred Exp $
     22  */
     23 
     24 /*
     25  * This file contains a high-performance replacement for the socket-based
     26  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
     27  * all features of sockets, but does do everything that pipes normally
     28  * do.
     29  *
     30  * Adaption for NetBSD UVM, including uvm_loan() based direct write, was
     31  * written by Jaromir Dolecek.
     32  */
     33 
     34 /*
     35  * This code has two modes of operation, a small write mode and a large
     36  * write mode.  The small write mode acts like conventional pipes with
     37  * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
     38  * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
     39  * and PIPE_SIZE in size, it is fully mapped into the kernel (on FreeBSD,
     40  * those pages are also wired), and the receiving process can copy it directly
     41  * from the pages in the sending process.
     42  *
     43  * If the sending process receives a signal, it is possible that it will
     44  * go away, and certainly its address space can change, because control
     45  * is returned back to the user-mode side.  In that case, the pipe code
     46  * arranges to copy the buffer supplied by the user process on FreeBSD, to
     47  * a pageable kernel buffer, and the receiving process will grab the data
     48  * from the pageable kernel buffer.  Since signals don't happen all that often,
     49  * the copy operation is normally eliminated.
     50  * For NetBSD, the pages are mapped read-only, COW for kernel by uvm_loan(),
     51  * so no explicit handling need to be done, all is handled by standard VM
     52  * facilities.
     53  *
     54  * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
     55  * happen for small transfers so that the system will not spend all of
     56  * its time context switching.  PIPE_SIZE is constrained by the
     57  * amount of kernel virtual memory.
     58  */
     59 
     60 #include <sys/cdefs.h>
     61 __KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.4.2.11 2002/04/01 07:47:57 nathanw Exp $");
     62 
     63 #include <sys/param.h>
     64 #include <sys/systm.h>
     65 #include <sys/proc.h>
     66 #include <sys/fcntl.h>
     67 #include <sys/file.h>
     68 #include <sys/filedesc.h>
     69 #include <sys/filio.h>
     70 #include <sys/kernel.h>
     71 #include <sys/lock.h>
     72 #include <sys/ttycom.h>
     73 #include <sys/stat.h>
     74 #include <sys/malloc.h>
     75 #include <sys/poll.h>
     76 #include <sys/signalvar.h>
     77 #include <sys/vnode.h>
     78 #include <sys/uio.h>
     79 #include <sys/lock.h>
     80 #ifdef __FreeBSD__
     81 #include <sys/mutex.h>
     82 #endif
     83 #ifdef __NetBSD__
     84 #include <sys/select.h>
     85 #include <sys/mount.h>
     86 #include <sys/syscallargs.h>
     87 #include <uvm/uvm.h>
     88 #include <sys/sysctl.h>
     89 #include <sys/kernel.h>
     90 #endif /* NetBSD, FreeBSD */
     91 
     92 #include <sys/pipe.h>
     93 
     94 #ifdef __NetBSD__
     95 /*
     96  * Avoid microtime(9), it's slow. We don't guard the read from time(9)
     97  * with splclock(9) since we don't actually need to be THAT sure the access
     98  * is atomic.
     99  */
    100 #define vfs_timestamp(tv)	(*(tv) = time)
    101 #endif
    102 
    103 /*
    104  * Use this define if you want to disable *fancy* VM things.  Expect an
    105  * approx 30% decrease in transfer rate.  This could be useful for
    106  * OpenBSD.
    107  */
    108 /* #define PIPE_NODIRECT */
    109 
    110 /*
    111  * interfaces to the outside world
    112  */
    113 #ifdef __FreeBSD__
    114 static int pipe_read(struct file *fp, struct uio *uio,
    115 		struct ucred *cred, int flags, struct thread *td);
    116 static int pipe_write(struct file *fp, struct uio *uio,
    117 		struct ucred *cred, int flags, struct thread *td);
    118 static int pipe_close(struct file *fp, struct thread *td);
    119 static int pipe_poll(struct file *fp, int events, struct ucred *cred,
    120 		struct thread *td);
    121 static int pipe_kqfilter(struct file *fp, struct knote *kn);
    122 static int pipe_stat(struct file *fp, struct stat *sb, struct thread *td);
    123 static int pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td);
    124 
    125 static struct fileops pipeops = {
    126 	pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter,
    127 	pipe_stat, pipe_close
    128 };
    129 
    130 static void	filt_pipedetach(struct knote *kn);
    131 static int	filt_piperead(struct knote *kn, long hint);
    132 static int	filt_pipewrite(struct knote *kn, long hint);
    133 
    134 static struct filterops pipe_rfiltops =
    135 	{ 1, NULL, filt_pipedetach, filt_piperead };
    136 static struct filterops pipe_wfiltops =
    137 	{ 1, NULL, filt_pipedetach, filt_pipewrite };
    138 
    139 #define PIPE_GET_GIANT(pipe)							\
    140 	do {								\
    141 		PIPE_UNLOCK(wpipe);					\
    142 		mtx_lock(&Giant);					\
    143 	} while (0)
    144 
    145 #define PIPE_DROP_GIANT(pipe)						\
    146 	do {								\
    147 		mtx_unlock(&Giant);					\
    148 		PIPE_LOCK(wpipe);					\
    149 	} while (0)
    150 
    151 #endif /* FreeBSD */
    152 
    153 #ifdef __NetBSD__
    154 static int pipe_read(struct file *fp, off_t *offset, struct uio *uio,
    155 		struct ucred *cred, int flags);
    156 static int pipe_write(struct file *fp, off_t *offset, struct uio *uio,
    157 		struct ucred *cred, int flags);
    158 static int pipe_close(struct file *fp, struct proc *p);
    159 static int pipe_poll(struct file *fp, int events, struct proc *p);
    160 static int pipe_fcntl(struct file *fp, u_int com, caddr_t data,
    161 		struct proc *p);
    162 static int pipe_stat(struct file *fp, struct stat *sb, struct proc *p);
    163 static int pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p);
    164 
    165 static struct fileops pipeops =
    166     { pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll,
    167       pipe_stat, pipe_close };
    168 
    169 /* XXXSMP perhaps use spinlocks & KERNEL_PROC_(UN)LOCK() ? just clear now */
    170 #define PIPE_GET_GIANT(pipe)
    171 #define PIPE_DROP_GIANT(pipe)
    172 #define GIANT_REQUIRED
    173 
    174 #endif /* NetBSD */
    175 
    176 /*
    177  * Default pipe buffer size(s), this can be kind-of large now because pipe
    178  * space is pageable.  The pipe code will try to maintain locality of
    179  * reference for performance reasons, so small amounts of outstanding I/O
    180  * will not wipe the cache.
    181  */
    182 #define MINPIPESIZE (PIPE_SIZE/3)
    183 #define MAXPIPESIZE (2*PIPE_SIZE/3)
    184 
    185 /*
    186  * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
    187  * is there so that on large systems, we don't exhaust it.
    188  */
    189 #define MAXPIPEKVA (8*1024*1024)
    190 static int maxpipekva = MAXPIPEKVA;
    191 
    192 /*
    193  * Limit for direct transfers, we cannot, of course limit
    194  * the amount of kva for pipes in general though.
    195  */
    196 #define LIMITPIPEKVA (16*1024*1024)
    197 static int limitpipekva = LIMITPIPEKVA;
    198 
    199 /*
    200  * Limit the number of "big" pipes
    201  */
    202 #define LIMITBIGPIPES  32
    203 static int maxbigpipes = LIMITBIGPIPES;
    204 static int nbigpipe = 0;
    205 
    206 /*
    207  * Amount of KVA consumed by pipe buffers.
    208  */
    209 static int amountpipekva = 0;
    210 
    211 static void pipeclose(struct pipe *cpipe);
    212 static void pipe_free_kmem(struct pipe *cpipe);
    213 static int pipe_create(struct pipe **cpipep, int allockva);
    214 static __inline int pipelock(struct pipe *cpipe, int catch);
    215 static __inline void pipeunlock(struct pipe *cpipe);
    216 static __inline void pipeselwakeup(struct pipe *cpipe, struct pipe *sigp);
    217 #ifndef PIPE_NODIRECT
    218 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
    219 #endif
    220 static int pipespace(struct pipe *cpipe, int size);
    221 
    222 #ifdef __NetBSD__
    223 #ifndef PIPE_NODIRECT
    224 static int pipe_loan_alloc(struct pipe *, int);
    225 static void pipe_loan_free(struct pipe *);
    226 #endif /* PIPE_NODIRECT */
    227 
    228 static struct pool pipe_pool;
    229 #endif /* NetBSD */
    230 
    231 #ifdef __FreeBSD__
    232 static vm_zone_t pipe_zone;
    233 
    234 static void pipeinit(void *dummy __unused);
    235 #ifndef PIPE_NODIRECT
    236 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
    237 static void pipe_destroy_write_buffer(struct pipe *wpipe);
    238 static void pipe_clone_write_buffer(struct pipe *wpipe);
    239 #endif
    240 
    241 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
    242 
    243 static void
    244 pipeinit(void *dummy __unused)
    245 {
    246 
    247 	pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4);
    248 }
    249 #endif /* FreeBSD */
    250 
    251 /*
    252  * The pipe system call for the DTYPE_PIPE type of pipes
    253  */
    254 
    255 /* ARGSUSED */
    256 #ifdef __FreeBSD__
    257 int
    258 pipe(td, uap)
    259 	struct thread *td;
    260 	struct pipe_args /* {
    261 		int	dummy;
    262 	} */ *uap;
    263 #elif defined(__NetBSD__)
    264 int
    265 sys_pipe(l, v, retval)
    266 	struct lwp *l;
    267 	void *v;
    268 	register_t *retval;
    269 #endif
    270 {
    271 	struct file *rf, *wf;
    272 	struct pipe *rpipe, *wpipe;
    273 	int fd, error;
    274 	struct proc *p;
    275 #ifdef __FreeBSD__
    276 	struct mtx *pmtx;
    277 
    278 	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
    279 
    280 	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
    281 
    282 	rpipe = wpipe = NULL;
    283 	if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 1)) {
    284 		pipeclose(rpipe);
    285 		pipeclose(wpipe);
    286 		free(pmtx, M_TEMP);
    287 		return (ENFILE);
    288 	}
    289 
    290 	error = falloc(td, &rf, &fd);
    291 	if (error) {
    292 		pipeclose(rpipe);
    293 		pipeclose(wpipe);
    294 		free(pmtx, M_TEMP);
    295 		return (error);
    296 	}
    297 	fhold(rf);
    298 	td->td_retval[0] = fd;
    299 
    300 	/*
    301 	 * Warning: once we've gotten past allocation of the fd for the
    302 	 * read-side, we can only drop the read side via fdrop() in order
    303 	 * to avoid races against processes which manage to dup() the read
    304 	 * side while we are blocked trying to allocate the write side.
    305 	 */
    306 	FILE_LOCK(rf);
    307 	rf->f_flag = FREAD | FWRITE;
    308 	rf->f_type = DTYPE_PIPE;
    309 	rf->f_data = (caddr_t)rpipe;
    310 	rf->f_ops = &pipeops;
    311 	FILE_UNLOCK(rf);
    312 	error = falloc(td, &wf, &fd);
    313 	if (error) {
    314 		struct filedesc *fdp = td->td_proc->p_fd;
    315 		FILEDESC_LOCK(fdp);
    316 		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
    317 			fdp->fd_ofiles[td->td_retval[0]] = NULL;
    318 			FILEDESC_UNLOCK(fdp);
    319 			fdrop(rf, td);
    320 		} else
    321 			FILEDESC_UNLOCK(fdp);
    322 		fdrop(rf, td);
    323 		/* rpipe has been closed by fdrop(). */
    324 		pipeclose(wpipe);
    325 		free(pmtx, M_TEMP);
    326 		return (error);
    327 	}
    328 	FILE_LOCK(wf);
    329 	wf->f_flag = FREAD | FWRITE;
    330 	wf->f_type = DTYPE_PIPE;
    331 	wf->f_data = (caddr_t)wpipe;
    332 	wf->f_ops = &pipeops;
    333 	p->p_retval[1] = fd;
    334 	rpipe->pipe_peer = wpipe;
    335 	wpipe->pipe_peer = rpipe;
    336 	mtx_init(pmtx, "pipe mutex", MTX_DEF);
    337 	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
    338 	fdrop(rf, td);
    339 #endif /* FreeBSD */
    340 
    341 #ifdef __NetBSD__
    342 	p = l->l_proc;
    343 	rpipe = wpipe = NULL;
    344 	if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) {
    345 		pipeclose(rpipe);
    346 		pipeclose(wpipe);
    347 		return (ENFILE);
    348 	}
    349 
    350 	/*
    351 	 * Note: the file structure returned from falloc() is marked
    352 	 * as 'larval' initially. Unless we mark it as 'mature' by
    353 	 * FILE_SET_MATURE(), any attempt to do anything with it would
    354 	 * return EBADF, including e.g. dup(2) or close(2). This avoids
    355 	 * file descriptor races if we block in the second falloc().
    356 	 */
    357 
    358 	error = falloc(p, &rf, &fd);
    359 	if (error)
    360 		goto free2;
    361 	retval[0] = fd;
    362 	rf->f_flag = FREAD;
    363 	rf->f_type = DTYPE_PIPE;
    364 	rf->f_data = (caddr_t)rpipe;
    365 	rf->f_ops = &pipeops;
    366 
    367 	error = falloc(p, &wf, &fd);
    368 	if (error)
    369 		goto free3;
    370 	retval[1] = fd;
    371 	wf->f_flag = FWRITE;
    372 	wf->f_type = DTYPE_PIPE;
    373 	wf->f_data = (caddr_t)wpipe;
    374 	wf->f_ops = &pipeops;
    375 
    376 	rpipe->pipe_peer = wpipe;
    377 	wpipe->pipe_peer = rpipe;
    378 
    379 	FILE_SET_MATURE(rf);
    380 	FILE_SET_MATURE(wf);
    381 	FILE_UNUSE(rf, p);
    382 	FILE_UNUSE(wf, p);
    383 	return (0);
    384 free3:
    385 	FILE_UNUSE(rf, p);
    386 	ffree(rf);
    387 	fdremove(p->p_fd, retval[0]);
    388 free2:
    389 	pipeclose(wpipe);
    390 	pipeclose(rpipe);
    391 #endif /* NetBSD */
    392 
    393 	return (error);
    394 }
    395 
    396 /*
    397  * Allocate kva for pipe circular buffer, the space is pageable
    398  * This routine will 'realloc' the size of a pipe safely, if it fails
    399  * it will retain the old buffer.
    400  * If it fails it will return ENOMEM.
    401  */
    402 static int
    403 pipespace(cpipe, size)
    404 	struct pipe *cpipe;
    405 	int size;
    406 {
    407 	caddr_t buffer;
    408 #ifdef __FreeBSD__
    409 	struct vm_object *object;
    410 	int npages, error;
    411 
    412 	GIANT_REQUIRED;
    413 	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
    414 	       ("pipespace: pipe mutex locked"));
    415 
    416 	npages = round_page(size)/PAGE_SIZE;
    417 	/*
    418 	 * Create an object, I don't like the idea of paging to/from
    419 	 * kernel_object.
    420 	 */
    421 	object = vm_object_allocate(OBJT_DEFAULT, npages);
    422 	buffer = (caddr_t) vm_map_min(kernel_map);
    423 
    424 	/*
    425 	 * Insert the object into the kernel map, and allocate kva for it.
    426 	 * The map entry is, by default, pageable.
    427 	 */
    428 	error = vm_map_find(kernel_map, object, 0,
    429 		(vm_offset_t *) &buffer, size, 1,
    430 		VM_PROT_ALL, VM_PROT_ALL, 0);
    431 
    432 	if (error != KERN_SUCCESS) {
    433 		vm_object_deallocate(object);
    434 		return (ENOMEM);
    435 	}
    436 #endif /* FreeBSD */
    437 
    438 #ifdef __NetBSD__
    439 	/*
    440 	 * Allocate pageable virtual address space. Physical memory is allocated
    441 	 * on demand.
    442 	 */
    443 	buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size));
    444 	if (buffer == NULL)
    445 		return (ENOMEM);
    446 #endif /* NetBSD */
    447 
    448 	/* free old resources if we're resizing */
    449 	pipe_free_kmem(cpipe);
    450 #ifdef __FreeBSD__
    451 	cpipe->pipe_buffer.object = object;
    452 #endif
    453 	cpipe->pipe_buffer.buffer = buffer;
    454 	cpipe->pipe_buffer.size = size;
    455 	cpipe->pipe_buffer.in = 0;
    456 	cpipe->pipe_buffer.out = 0;
    457 	cpipe->pipe_buffer.cnt = 0;
    458 	amountpipekva += cpipe->pipe_buffer.size;
    459 	return (0);
    460 }
    461 
    462 /*
    463  * initialize and allocate VM and memory for pipe
    464  */
    465 static int
    466 pipe_create(cpipep, allockva)
    467 	struct pipe **cpipep;
    468 	int allockva;
    469 {
    470 	struct pipe *cpipe;
    471 	int error;
    472 
    473 #ifdef __FreeBSD__
    474 	*cpipep = zalloc(pipe_zone);
    475 #endif
    476 #ifdef __NetBSD__
    477 	*cpipep = pool_get(&pipe_pool, M_WAITOK);
    478 #endif
    479 	if (*cpipep == NULL)
    480 		return (ENOMEM);
    481 
    482 	cpipe = *cpipep;
    483 
    484 	/* Initialize */
    485 	memset(cpipe, 0, sizeof(*cpipe));
    486 	cpipe->pipe_state = PIPE_SIGNALR;
    487 
    488 #ifdef __FreeBSD__
    489 	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
    490 #endif
    491 	if (allockva && (error = pipespace(cpipe, PIPE_SIZE)))
    492 		return (error);
    493 
    494 	vfs_timestamp(&cpipe->pipe_ctime);
    495 	cpipe->pipe_atime = cpipe->pipe_ctime;
    496 	cpipe->pipe_mtime = cpipe->pipe_ctime;
    497 #ifdef __NetBSD__
    498 	cpipe->pipe_pgid = NO_PID;
    499 	lockinit(&cpipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0);
    500 #endif
    501 
    502 	return (0);
    503 }
    504 
    505 
    506 /*
    507  * lock a pipe for I/O, blocking other access
    508  */
    509 static __inline int
    510 pipelock(cpipe, catch)
    511 	struct pipe *cpipe;
    512 	int catch;
    513 {
    514 	int error;
    515 
    516 #ifdef __FreeBSD__
    517 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
    518 	while (cpipe->pipe_state & PIPE_LOCKFL) {
    519 		cpipe->pipe_state |= PIPE_LWANT;
    520 		error = msleep(cpipe, PIPE_MTX(cpipe),
    521 		    catch ? (PRIBIO | PCATCH) : PRIBIO,
    522 		    "pipelk", 0);
    523 		if (error != 0)
    524 			return (error);
    525 	}
    526 	cpipe->pipe_state |= PIPE_LOCKFL;
    527 	return (0);
    528 #endif
    529 
    530 #ifdef __NetBSD__
    531 	do {
    532 		error = lockmgr(&cpipe->pipe_lock, LK_EXCLUSIVE, NULL);
    533 	} while (!catch && (error == EINTR || error == ERESTART));
    534 	return (error);
    535 #endif
    536 }
    537 
    538 /*
    539  * unlock a pipe I/O lock
    540  */
    541 static __inline void
    542 pipeunlock(cpipe)
    543 	struct pipe *cpipe;
    544 {
    545 
    546 #ifdef __FreeBSD__
    547 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
    548 	cpipe->pipe_state &= ~PIPE_LOCKFL;
    549 	if (cpipe->pipe_state & PIPE_LWANT) {
    550 		cpipe->pipe_state &= ~PIPE_LWANT;
    551 		wakeup(cpipe);
    552 	}
    553 #endif
    554 
    555 #ifdef __NetBSD__
    556 	lockmgr(&cpipe->pipe_lock, LK_RELEASE, NULL);
    557 #endif
    558 }
    559 
    560 /*
    561  * Select/poll wakup. This also sends SIGIO to peer connected to
    562  * 'sigpipe' side of pipe.
    563  */
    564 static __inline void
    565 pipeselwakeup(selp, sigp)
    566 	struct pipe *selp, *sigp;
    567 {
    568 	if (selp->pipe_state & PIPE_SEL) {
    569 		selp->pipe_state &= ~PIPE_SEL;
    570 		selwakeup(&selp->pipe_sel);
    571 	}
    572 #ifdef __FreeBSD__
    573 	if (sigp && (sigp->pipe_state & PIPE_ASYNC) && sigp->pipe_sigio)
    574 		pgsigio(sigp->pipe_sigio, SIGIO, 0);
    575 	KNOTE(&selp->pipe_sel.si_note, 0);
    576 #endif
    577 
    578 #ifdef __NetBSD__
    579 	if (sigp && (sigp->pipe_state & PIPE_ASYNC)
    580 	    && sigp->pipe_pgid != NO_PID){
    581 		struct proc *p;
    582 
    583 		if (sigp->pipe_pgid < 0)
    584 			gsignal(-sigp->pipe_pgid, SIGIO);
    585 		else if (sigp->pipe_pgid > 0 && (p = pfind(sigp->pipe_pgid)) != 0)
    586 			psignal(p, SIGIO);
    587 	}
    588 #endif /* NetBSD */
    589 }
    590 
    591 /* ARGSUSED */
    592 #ifdef __FreeBSD__
    593 static int
    594 pipe_read(fp, uio, cred, flags, td)
    595 	struct file *fp;
    596 	struct uio *uio;
    597 	struct ucred *cred;
    598 	struct thread *td;
    599 	int flags;
    600 	struct proc *p;
    601 #elif defined(__NetBSD__)
    602 static int
    603 pipe_read(fp, offset, uio, cred, flags)
    604 	struct file *fp;
    605 	off_t *offset;
    606 	struct uio *uio;
    607 	struct ucred *cred;
    608 	int flags;
    609 #endif
    610 {
    611 	struct pipe *rpipe = (struct pipe *) fp->f_data;
    612 	int error;
    613 	size_t nread = 0;
    614 	size_t size;
    615 	size_t ocnt;
    616 
    617 	PIPE_LOCK(rpipe);
    618 	++rpipe->pipe_busy;
    619 	error = pipelock(rpipe, 1);
    620 	if (error)
    621 		goto unlocked_error;
    622 
    623 	ocnt = rpipe->pipe_buffer.cnt;
    624 
    625 	while (uio->uio_resid) {
    626 		/*
    627 		 * normal pipe buffer receive
    628 		 */
    629 		if (rpipe->pipe_buffer.cnt > 0) {
    630 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
    631 			if (size > rpipe->pipe_buffer.cnt)
    632 				size = rpipe->pipe_buffer.cnt;
    633 			if (size > uio->uio_resid)
    634 				size = uio->uio_resid;
    635 
    636 			PIPE_UNLOCK(rpipe);
    637 			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
    638 					size, uio);
    639 			PIPE_LOCK(rpipe);
    640 			if (error)
    641 				break;
    642 
    643 			rpipe->pipe_buffer.out += size;
    644 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
    645 				rpipe->pipe_buffer.out = 0;
    646 
    647 			rpipe->pipe_buffer.cnt -= size;
    648 
    649 			/*
    650 			 * If there is no more to read in the pipe, reset
    651 			 * its pointers to the beginning.  This improves
    652 			 * cache hit stats.
    653 			 */
    654 			if (rpipe->pipe_buffer.cnt == 0) {
    655 				rpipe->pipe_buffer.in = 0;
    656 				rpipe->pipe_buffer.out = 0;
    657 			}
    658 			nread += size;
    659 #ifndef PIPE_NODIRECT
    660 		/*
    661 		 * Direct copy, bypassing a kernel buffer.
    662 		 */
    663 		} else if ((size = rpipe->pipe_map.cnt) &&
    664 			   (rpipe->pipe_state & PIPE_DIRECTW)) {
    665 			caddr_t	va;
    666 			if (size > uio->uio_resid)
    667 				size = uio->uio_resid;
    668 
    669 			va = (caddr_t) rpipe->pipe_map.kva +
    670 			    rpipe->pipe_map.pos;
    671 			PIPE_UNLOCK(rpipe);
    672 			error = uiomove(va, size, uio);
    673 			PIPE_LOCK(rpipe);
    674 			if (error)
    675 				break;
    676 			nread += size;
    677 			rpipe->pipe_map.pos += size;
    678 			rpipe->pipe_map.cnt -= size;
    679 			if (rpipe->pipe_map.cnt == 0) {
    680 				rpipe->pipe_state &= ~PIPE_DIRECTW;
    681 				wakeup(rpipe);
    682 			}
    683 #endif
    684 		} else {
    685 			/*
    686 			 * detect EOF condition
    687 			 * read returns 0 on EOF, no need to set error
    688 			 */
    689 			if (rpipe->pipe_state & PIPE_EOF)
    690 				break;
    691 
    692 			/*
    693 			 * If the "write-side" has been blocked, wake it up now.
    694 			 */
    695 			if (rpipe->pipe_state & PIPE_WANTW) {
    696 				rpipe->pipe_state &= ~PIPE_WANTW;
    697 				wakeup(rpipe);
    698 			}
    699 
    700 			/*
    701 			 * Break if some data was read.
    702 			 */
    703 			if (nread > 0)
    704 				break;
    705 
    706 			/*
    707 			 * don't block on non-blocking I/O
    708 			 */
    709 			if (fp->f_flag & FNONBLOCK) {
    710 				error = EAGAIN;
    711 				break;
    712 			}
    713 
    714 			/*
    715 			 * Unlock the pipe buffer for our remaining processing.
    716 			 * We will either break out with an error or we will
    717 			 * sleep and relock to loop.
    718 			 */
    719 			pipeunlock(rpipe);
    720 
    721 			/*
    722 			 * We want to read more, wake up select/poll.
    723 			 */
    724 			pipeselwakeup(rpipe, rpipe->pipe_peer);
    725 
    726 			rpipe->pipe_state |= PIPE_WANTR;
    727 #ifdef __FreeBSD__
    728 			error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
    729 				    "piperd", 0);
    730 #else
    731 			error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0);
    732 #endif
    733 			if (error != 0 || (error = pipelock(rpipe, 1)))
    734 				goto unlocked_error;
    735 		}
    736 	}
    737 	pipeunlock(rpipe);
    738 
    739 	/* XXX: should probably do this before getting any locks. */
    740 	if (error == 0)
    741 		vfs_timestamp(&rpipe->pipe_atime);
    742 unlocked_error:
    743 	--rpipe->pipe_busy;
    744 
    745 	/*
    746 	 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0.
    747 	 */
    748 	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) {
    749 		rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW);
    750 		wakeup(rpipe);
    751 	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
    752 		/*
    753 		 * Handle write blocking hysteresis.
    754 		 */
    755 		if (rpipe->pipe_state & PIPE_WANTW) {
    756 			rpipe->pipe_state &= ~PIPE_WANTW;
    757 			wakeup(rpipe);
    758 		}
    759 	}
    760 
    761 	/*
    762 	 * If anything was read off the buffer, signal to the writer it's
    763 	 * possible to write more data. Also send signal if we are here for the
    764 	 * first time after last write.
    765 	 */
    766 	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF
    767 	    && (ocnt != rpipe->pipe_buffer.cnt || (rpipe->pipe_state & PIPE_SIGNALR))) {
    768 		pipeselwakeup(rpipe, rpipe->pipe_peer);
    769 		rpipe->pipe_state &= ~PIPE_SIGNALR;
    770 	}
    771 
    772 	PIPE_UNLOCK(rpipe);
    773 	return (error);
    774 }
    775 
    776 #ifdef __FreeBSD__
    777 #ifndef PIPE_NODIRECT
    778 /*
    779  * Map the sending processes' buffer into kernel space and wire it.
    780  * This is similar to a physical write operation.
    781  */
    782 static int
    783 pipe_build_write_buffer(wpipe, uio)
    784 	struct pipe *wpipe;
    785 	struct uio *uio;
    786 {
    787 	size_t size;
    788 	int i;
    789 	vm_offset_t addr, endaddr, paddr;
    790 
    791 	GIANT_REQUIRED;
    792 	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
    793 
    794 	size = uio->uio_iov->iov_len;
    795 	if (size > wpipe->pipe_buffer.size)
    796 		size = wpipe->pipe_buffer.size;
    797 
    798 	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
    799 	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
    800 	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
    801 		vm_page_t m;
    802 
    803 		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
    804 		    (paddr = pmap_kextract(addr)) == 0) {
    805 			int j;
    806 
    807 			for (j = 0; j < i; j++)
    808 				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
    809 			return (EFAULT);
    810 		}
    811 
    812 		m = PHYS_TO_VM_PAGE(paddr);
    813 		vm_page_wire(m);
    814 		wpipe->pipe_map.ms[i] = m;
    815 	}
    816 
    817 /*
    818  * set up the control block
    819  */
    820 	wpipe->pipe_map.npages = i;
    821 	wpipe->pipe_map.pos =
    822 	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
    823 	wpipe->pipe_map.cnt = size;
    824 
    825 /*
    826  * and map the buffer
    827  */
    828 	if (wpipe->pipe_map.kva == 0) {
    829 		/*
    830 		 * We need to allocate space for an extra page because the
    831 		 * address range might (will) span pages at times.
    832 		 */
    833 		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
    834 			wpipe->pipe_buffer.size + PAGE_SIZE);
    835 		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
    836 	}
    837 	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
    838 		wpipe->pipe_map.npages);
    839 
    840 /*
    841  * and update the uio data
    842  */
    843 
    844 	uio->uio_iov->iov_len -= size;
    845 	uio->uio_iov->iov_base += size;
    846 	if (uio->uio_iov->iov_len == 0)
    847 		uio->uio_iov++;
    848 	uio->uio_resid -= size;
    849 	uio->uio_offset += size;
    850 	return (0);
    851 }
    852 
    853 /*
    854  * unmap and unwire the process buffer
    855  */
    856 static void
    857 pipe_destroy_write_buffer(wpipe)
    858 	struct pipe *wpipe;
    859 {
    860 	int i;
    861 
    862 	GIANT_REQUIRED;
    863 	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
    864 
    865 	if (wpipe->pipe_map.kva) {
    866 		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
    867 
    868 		if (amountpipekva > maxpipekva) {
    869 			vm_offset_t kva = wpipe->pipe_map.kva;
    870 			wpipe->pipe_map.kva = 0;
    871 			kmem_free(kernel_map, kva,
    872 				wpipe->pipe_buffer.size + PAGE_SIZE);
    873 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
    874 		}
    875 	}
    876 	for (i = 0; i < wpipe->pipe_map.npages; i++)
    877 		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
    878 	wpipe->pipe_map.npages = 0;
    879 }
    880 
    881 /*
    882  * In the case of a signal, the writing process might go away.  This
    883  * code copies the data into the circular buffer so that the source
    884  * pages can be freed without loss of data.
    885  */
    886 static void
    887 pipe_clone_write_buffer(wpipe)
    888 	struct pipe *wpipe;
    889 {
    890 	int size;
    891 	int pos;
    892 
    893 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
    894 	size = wpipe->pipe_map.cnt;
    895 	pos = wpipe->pipe_map.pos;
    896 	memcpy((caddr_t) wpipe->pipe_buffer.buffer,
    897 	    (caddr_t) wpipe->pipe_map.kva + pos, size);
    898 
    899 	wpipe->pipe_buffer.in = size;
    900 	wpipe->pipe_buffer.out = 0;
    901 	wpipe->pipe_buffer.cnt = size;
    902 	wpipe->pipe_state &= ~PIPE_DIRECTW;
    903 
    904 	PIPE_GET_GIANT(wpipe);
    905 	pipe_destroy_write_buffer(wpipe);
    906 	PIPE_DROP_GIANT(wpipe);
    907 }
    908 
    909 /*
    910  * This implements the pipe buffer write mechanism.  Note that only
    911  * a direct write OR a normal pipe write can be pending at any given time.
    912  * If there are any characters in the pipe buffer, the direct write will
    913  * be deferred until the receiving process grabs all of the bytes from
    914  * the pipe buffer.  Then the direct mapping write is set-up.
    915  */
    916 static int
    917 pipe_direct_write(wpipe, uio)
    918 	struct pipe *wpipe;
    919 	struct uio *uio;
    920 {
    921 	int error;
    922 
    923 retry:
    924 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
    925 	while (wpipe->pipe_state & PIPE_DIRECTW) {
    926 		if (wpipe->pipe_state & PIPE_WANTR) {
    927 			wpipe->pipe_state &= ~PIPE_WANTR;
    928 			wakeup(wpipe);
    929 		}
    930 		wpipe->pipe_state |= PIPE_WANTW;
    931 		error = msleep(wpipe, PIPE_MTX(wpipe),
    932 		    PRIBIO | PCATCH, "pipdww", 0);
    933 		if (error)
    934 			goto error1;
    935 		if (wpipe->pipe_state & PIPE_EOF) {
    936 			error = EPIPE;
    937 			goto error1;
    938 		}
    939 	}
    940 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
    941 	if (wpipe->pipe_buffer.cnt > 0) {
    942 		if (wpipe->pipe_state & PIPE_WANTR) {
    943 			wpipe->pipe_state &= ~PIPE_WANTR;
    944 			wakeup(wpipe);
    945 		}
    946 
    947 		wpipe->pipe_state |= PIPE_WANTW;
    948 		error = msleep(wpipe, PIPE_MTX(wpipe),
    949 		    PRIBIO | PCATCH, "pipdwc", 0);
    950 		if (error)
    951 			goto error1;
    952 		if (wpipe->pipe_state & PIPE_EOF) {
    953 			error = EPIPE;
    954 			goto error1;
    955 		}
    956 		goto retry;
    957 	}
    958 
    959 	wpipe->pipe_state |= PIPE_DIRECTW;
    960 
    961 	PIPE_GET_GIANT(wpipe);
    962 	error = pipe_build_write_buffer(wpipe, uio);
    963 	PIPE_DROP_GIANT(wpipe);
    964 	if (error) {
    965 		wpipe->pipe_state &= ~PIPE_DIRECTW;
    966 		goto error1;
    967 	}
    968 
    969 	error = 0;
    970 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
    971 		if (wpipe->pipe_state & PIPE_EOF) {
    972 			pipelock(wpipe, 0);
    973 			PIPE_GET_GIANT(wpipe);
    974 			pipe_destroy_write_buffer(wpipe);
    975 			PIPE_DROP_GIANT(wpipe);
    976 			pipeunlock(wpipe);
    977 			pipeselwakeup(wpipe, wpipe);
    978 			error = EPIPE;
    979 			goto error1;
    980 		}
    981 		if (wpipe->pipe_state & PIPE_WANTR) {
    982 			wpipe->pipe_state &= ~PIPE_WANTR;
    983 			wakeup(wpipe);
    984 		}
    985 		pipeselwakeup(wpipe, wpipe);
    986 		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
    987 		    "pipdwt", 0);
    988 	}
    989 
    990 	pipelock(wpipe,0);
    991 	if (wpipe->pipe_state & PIPE_DIRECTW) {
    992 		/*
    993 		 * this bit of trickery substitutes a kernel buffer for
    994 		 * the process that might be going away.
    995 		 */
    996 		pipe_clone_write_buffer(wpipe);
    997 	} else {
    998 		PIPE_GET_GIANT(wpipe);
    999 		pipe_destroy_write_buffer(wpipe);
   1000 		PIPE_DROP_GIANT(wpipe);
   1001 	}
   1002 	pipeunlock(wpipe);
   1003 	return (error);
   1004 
   1005 error1:
   1006 	wakeup(wpipe);
   1007 	return (error);
   1008 }
   1009 #endif /* !PIPE_NODIRECT */
   1010 #endif /* FreeBSD */
   1011 
   1012 #ifdef __NetBSD__
   1013 #ifndef PIPE_NODIRECT
   1014 /*
   1015  * Allocate structure for loan transfer.
   1016  */
   1017 static int
   1018 pipe_loan_alloc(wpipe, npages)
   1019 	struct pipe *wpipe;
   1020 	int npages;
   1021 {
   1022 	vsize_t len;
   1023 
   1024 	len = (vsize_t)npages << PAGE_SHIFT;
   1025 	wpipe->pipe_map.kva = uvm_km_valloc_wait(kernel_map, len);
   1026 	if (wpipe->pipe_map.kva == 0)
   1027 		return (ENOMEM);
   1028 
   1029 	amountpipekva += len;
   1030 	wpipe->pipe_map.npages = npages;
   1031 	wpipe->pipe_map.pgs = malloc(npages * sizeof(struct vm_page *), M_PIPE,
   1032 	    M_WAITOK);
   1033 	return (0);
   1034 }
   1035 
   1036 /*
   1037  * Free resources allocated for loan transfer.
   1038  */
   1039 static void
   1040 pipe_loan_free(wpipe)
   1041 	struct pipe *wpipe;
   1042 {
   1043 	vsize_t len;
   1044 
   1045 	len = (vsize_t)wpipe->pipe_map.npages << PAGE_SHIFT;
   1046 	uvm_km_free(kernel_map, wpipe->pipe_map.kva, len);
   1047 	wpipe->pipe_map.kva = 0;
   1048 	amountpipekva -= len;
   1049 	free(wpipe->pipe_map.pgs, M_PIPE);
   1050 	wpipe->pipe_map.pgs = NULL;
   1051 }
   1052 
   1053 /*
   1054  * NetBSD direct write, using uvm_loan() mechanism.
   1055  * This implements the pipe buffer write mechanism.  Note that only
   1056  * a direct write OR a normal pipe write can be pending at any given time.
   1057  * If there are any characters in the pipe buffer, the direct write will
   1058  * be deferred until the receiving process grabs all of the bytes from
   1059  * the pipe buffer.  Then the direct mapping write is set-up.
   1060  */
   1061 static int
   1062 pipe_direct_write(wpipe, uio)
   1063 	struct pipe *wpipe;
   1064 	struct uio *uio;
   1065 {
   1066 	int error, npages, j;
   1067 	struct vm_page **pgs;
   1068 	vaddr_t bbase, kva, base, bend;
   1069 	vsize_t blen, bcnt;
   1070 	voff_t bpos;
   1071 
   1072 retry:
   1073 	while (wpipe->pipe_state & PIPE_DIRECTW) {
   1074 		if (wpipe->pipe_state & PIPE_WANTR) {
   1075 			wpipe->pipe_state &= ~PIPE_WANTR;
   1076 			wakeup(wpipe);
   1077 		}
   1078 		wpipe->pipe_state |= PIPE_WANTW;
   1079 		error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0);
   1080 		if (error)
   1081 			goto error;
   1082 		if (wpipe->pipe_state & PIPE_EOF) {
   1083 			error = EPIPE;
   1084 			goto error;
   1085 		}
   1086 	}
   1087 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
   1088 	if (wpipe->pipe_buffer.cnt > 0) {
   1089 		if (wpipe->pipe_state & PIPE_WANTR) {
   1090 			wpipe->pipe_state &= ~PIPE_WANTR;
   1091 			wakeup(wpipe);
   1092 		}
   1093 
   1094 		wpipe->pipe_state |= PIPE_WANTW;
   1095 		error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0);
   1096 		if (error)
   1097 			goto error;
   1098 		if (wpipe->pipe_state & PIPE_EOF) {
   1099 			error = EPIPE;
   1100 			goto error;
   1101 		}
   1102 		goto retry;
   1103 	}
   1104 
   1105 	/*
   1106 	 * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers
   1107 	 * not aligned to PAGE_SIZE.
   1108 	 */
   1109 	bbase = (vaddr_t)uio->uio_iov->iov_base;
   1110 	base = trunc_page(bbase);
   1111 	bend = round_page(bbase + uio->uio_iov->iov_len);
   1112 	blen = bend - base;
   1113 	bpos = bbase - base;
   1114 
   1115 	if (blen > PIPE_DIRECT_CHUNK) {
   1116 		blen = PIPE_DIRECT_CHUNK;
   1117 		bend = base + blen;
   1118 		bcnt = PIPE_DIRECT_CHUNK - bpos;
   1119 	} else {
   1120 		bcnt = uio->uio_iov->iov_len;
   1121 	}
   1122 	npages = blen >> PAGE_SHIFT;
   1123 
   1124 	wpipe->pipe_map.pos = bpos;
   1125 	wpipe->pipe_map.cnt = bcnt;
   1126 
   1127 	/*
   1128 	 * Free the old kva if we need more pages than we have
   1129 	 * allocated.
   1130 	 */
   1131 	if (wpipe->pipe_map.kva && npages > wpipe->pipe_map.npages)
   1132 		pipe_loan_free(wpipe);
   1133 
   1134 	/* Allocate new kva. */
   1135 	if (wpipe->pipe_map.kva == 0) {
   1136 		error = pipe_loan_alloc(wpipe, npages);
   1137 		if (error) {
   1138 			goto error;
   1139 		}
   1140 	}
   1141 
   1142 	/* Loan the write buffer memory from writer process */
   1143 	pgs = wpipe->pipe_map.pgs;
   1144 	error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, base, blen,
   1145 	    pgs, UVM_LOAN_TOPAGE);
   1146 	if (error) {
   1147 		pgs = NULL;
   1148 		goto cleanup;
   1149 	}
   1150 
   1151 	/* Enter the loaned pages to kva */
   1152 	kva = wpipe->pipe_map.kva;
   1153 	for (j = 0; j < npages; j++, kva += PAGE_SIZE) {
   1154 		pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ);
   1155 	}
   1156 	pmap_update(pmap_kernel());
   1157 
   1158 	wpipe->pipe_state |= PIPE_DIRECTW;
   1159 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
   1160 		if (wpipe->pipe_state & PIPE_EOF) {
   1161 			error = EPIPE;
   1162 			break;
   1163 		}
   1164 		if (wpipe->pipe_state & PIPE_WANTR) {
   1165 			wpipe->pipe_state &= ~PIPE_WANTR;
   1166 			wakeup(wpipe);
   1167 		}
   1168 		pipeselwakeup(wpipe, wpipe);
   1169 		error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0);
   1170 	}
   1171 
   1172 	if (error)
   1173 		wpipe->pipe_state &= ~PIPE_DIRECTW;
   1174 
   1175 cleanup:
   1176 	pipelock(wpipe, 0);
   1177 	if (pgs != NULL) {
   1178 		pmap_kremove(wpipe->pipe_map.kva, blen);
   1179 		uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE);
   1180 	}
   1181 	if (error || amountpipekva > maxpipekva)
   1182 		pipe_loan_free(wpipe);
   1183 	pipeunlock(wpipe);
   1184 
   1185 	if (error) {
   1186 		pipeselwakeup(wpipe, wpipe);
   1187 
   1188 		/*
   1189 		 * If nothing was read from what we offered, return error
   1190 		 * straight on. Otherwise update uio resid first. Caller
   1191 		 * will deal with the error condition, returning short
   1192 		 * write, error, or restarting the write(2) as appropriate.
   1193 		 */
   1194 		if (wpipe->pipe_map.cnt == bcnt) {
   1195 error:
   1196 			wakeup(wpipe);
   1197 			return (error);
   1198 		}
   1199 
   1200 		bcnt -= wpipe->pipe_map.cnt;
   1201 	}
   1202 
   1203 	uio->uio_resid -= bcnt;
   1204 	/* uio_offset not updated, not set/used for write(2) */
   1205 	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + bcnt;
   1206 	uio->uio_iov->iov_len -= bcnt;
   1207 	if (uio->uio_iov->iov_len == 0) {
   1208 		uio->uio_iov++;
   1209 		uio->uio_iovcnt--;
   1210 	}
   1211 
   1212 	return (error);
   1213 }
   1214 #endif /* !PIPE_NODIRECT */
   1215 #endif /* NetBSD */
   1216 
   1217 #ifdef __FreeBSD__
   1218 static int
   1219 pipe_write(fp, uio, cred, flags, td)
   1220 	struct file *fp;
   1221 	off_t *offset;
   1222 	struct uio *uio;
   1223 	struct ucred *cred;
   1224 	int flags;
   1225 	struct thread *td;
   1226 #elif defined(__NetBSD__)
   1227 static int
   1228 pipe_write(fp, offset, uio, cred, flags)
   1229 	struct file *fp;
   1230 	off_t *offset;
   1231 	struct uio *uio;
   1232 	struct ucred *cred;
   1233 	int flags;
   1234 #endif
   1235 {
   1236 	int error = 0;
   1237 	struct pipe *wpipe, *rpipe;
   1238 
   1239 	rpipe = (struct pipe *) fp->f_data;
   1240 	wpipe = rpipe->pipe_peer;
   1241 
   1242 	PIPE_LOCK(rpipe);
   1243 	/*
   1244 	 * detect loss of pipe read side, issue SIGPIPE if lost.
   1245 	 */
   1246 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
   1247 		PIPE_UNLOCK(rpipe);
   1248 		return (EPIPE);
   1249 	}
   1250 
   1251 	++wpipe->pipe_busy;
   1252 
   1253 	/*
   1254 	 * If it is advantageous to resize the pipe buffer, do
   1255 	 * so.
   1256 	 */
   1257 	if ((uio->uio_resid > PIPE_SIZE) &&
   1258 		(nbigpipe < maxbigpipes) &&
   1259 #ifndef PIPE_NODIRECT
   1260 		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
   1261 #endif
   1262 		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
   1263 		(wpipe->pipe_buffer.cnt == 0)) {
   1264 
   1265 		if ((error = pipelock(wpipe,1)) == 0) {
   1266 			PIPE_GET_GIANT(rpipe);
   1267 			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
   1268 				nbigpipe++;
   1269 			PIPE_DROP_GIANT(rpipe);
   1270 			pipeunlock(wpipe);
   1271 		} else {
   1272 			/*
   1273 			 * If an error occurred, unbusy and return, waking up
   1274 			 * any waiting readers.
   1275 			 */
   1276 			--wpipe->pipe_busy;
   1277 			if (wpipe->pipe_busy == 0
   1278 			    && (wpipe->pipe_state & PIPE_WANTCLOSE)) {
   1279 				wpipe->pipe_state &=
   1280 				    ~(PIPE_WANTCLOSE | PIPE_WANTR);
   1281 				wakeup(wpipe);
   1282 			}
   1283 
   1284 			return (error);
   1285 		}
   1286 	}
   1287 
   1288 #ifdef __FreeBSD__
   1289 	/*
   1290 	 * If an early error occured unbusy and return, waking up any pending
   1291 	 * readers.
   1292 	 */
   1293 	if (error) {
   1294 		--wpipe->pipe_busy;
   1295 		if ((wpipe->pipe_busy == 0) &&
   1296 		    (wpipe->pipe_state & PIPE_WANT)) {
   1297 			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
   1298 			wakeup(wpipe);
   1299 		}
   1300 		PIPE_UNLOCK(rpipe);
   1301 		return(error);
   1302 	}
   1303 
   1304 	KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone"));
   1305 #endif
   1306 
   1307 	while (uio->uio_resid) {
   1308 		int space;
   1309 
   1310 #ifndef PIPE_NODIRECT
   1311 		/*
   1312 		 * If the transfer is large, we can gain performance if
   1313 		 * we do process-to-process copies directly.
   1314 		 * If the write is non-blocking, we don't use the
   1315 		 * direct write mechanism.
   1316 		 *
   1317 		 * The direct write mechanism will detect the reader going
   1318 		 * away on us.
   1319 		 */
   1320 		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
   1321 		    (fp->f_flag & FNONBLOCK) == 0 &&
   1322 		    (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) {
   1323 			error = pipe_direct_write(wpipe, uio);
   1324 
   1325 			/*
   1326 			 * Break out if error occured, unless it's ENOMEM.
   1327 			 * ENOMEM means we failed to allocate some resources
   1328 			 * for direct write, so we just fallback to ordinary
   1329 			 * write. If the direct write was successful,
   1330 			 * process rest of data via ordinary write.
   1331 			 */
   1332 			if (!error)
   1333 				continue;
   1334 
   1335 			if (error != ENOMEM)
   1336 				break;
   1337 		}
   1338 #endif /* PIPE_NODIRECT */
   1339 
   1340 		/*
   1341 		 * Pipe buffered writes cannot be coincidental with
   1342 		 * direct writes.  We wait until the currently executing
   1343 		 * direct write is completed before we start filling the
   1344 		 * pipe buffer.  We break out if a signal occurs or the
   1345 		 * reader goes away.
   1346 		 */
   1347 	retrywrite:
   1348 		while (wpipe->pipe_state & PIPE_DIRECTW) {
   1349 			if (wpipe->pipe_state & PIPE_WANTR) {
   1350 				wpipe->pipe_state &= ~PIPE_WANTR;
   1351 				wakeup(wpipe);
   1352 			}
   1353 #ifdef __FreeBSD__
   1354 			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
   1355 			    "pipbww", 0);
   1356 #else
   1357 			error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0);
   1358 #endif
   1359 			if (wpipe->pipe_state & PIPE_EOF)
   1360 				break;
   1361 			if (error)
   1362 				break;
   1363 		}
   1364 		if (wpipe->pipe_state & PIPE_EOF) {
   1365 			error = EPIPE;
   1366 			break;
   1367 		}
   1368 
   1369 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
   1370 
   1371 		/* Writes of size <= PIPE_BUF must be atomic. */
   1372 		if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF))
   1373 			space = 0;
   1374 
   1375 		if (space > 0) {
   1376 			int size;	/* Transfer size */
   1377 			int segsize;	/* first segment to transfer */
   1378 
   1379 			if ((error = pipelock(wpipe,1)) != 0)
   1380 				break;
   1381 
   1382 			/*
   1383 			 * It is possible for a direct write to
   1384 			 * slip in on us... handle it here...
   1385 			 */
   1386 			if (wpipe->pipe_state & PIPE_DIRECTW) {
   1387 				pipeunlock(wpipe);
   1388 				goto retrywrite;
   1389 			}
   1390 			/*
   1391 			 * If a process blocked in uiomove, our
   1392 			 * value for space might be bad.
   1393 			 *
   1394 			 * XXX will we be ok if the reader has gone
   1395 			 * away here?
   1396 			 */
   1397 			if (space > wpipe->pipe_buffer.size -
   1398 				    wpipe->pipe_buffer.cnt) {
   1399 				pipeunlock(wpipe);
   1400 				goto retrywrite;
   1401 			}
   1402 
   1403 			/*
   1404 			 * Transfer size is minimum of uio transfer
   1405 			 * and free space in pipe buffer.
   1406 			 */
   1407 			if (space > uio->uio_resid)
   1408 				size = uio->uio_resid;
   1409 			else
   1410 				size = space;
   1411 			/*
   1412 			 * First segment to transfer is minimum of
   1413 			 * transfer size and contiguous space in
   1414 			 * pipe buffer.  If first segment to transfer
   1415 			 * is less than the transfer size, we've got
   1416 			 * a wraparound in the buffer.
   1417 			 */
   1418 			segsize = wpipe->pipe_buffer.size -
   1419 				wpipe->pipe_buffer.in;
   1420 			if (segsize > size)
   1421 				segsize = size;
   1422 
   1423 			/* Transfer first segment */
   1424 
   1425 			PIPE_UNLOCK(rpipe);
   1426 			error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
   1427 						segsize, uio);
   1428 			PIPE_LOCK(rpipe);
   1429 
   1430 			if (error == 0 && segsize < size) {
   1431 				/*
   1432 				 * Transfer remaining part now, to
   1433 				 * support atomic writes.  Wraparound
   1434 				 * happened.
   1435 				 */
   1436 #ifdef DEBUG
   1437 				if (wpipe->pipe_buffer.in + segsize !=
   1438 				    wpipe->pipe_buffer.size)
   1439 					panic("Expected pipe buffer wraparound disappeared");
   1440 #endif
   1441 
   1442 				PIPE_UNLOCK(rpipe);
   1443 				error = uiomove(&wpipe->pipe_buffer.buffer[0],
   1444 						size - segsize, uio);
   1445 				PIPE_LOCK(rpipe);
   1446 			}
   1447 			if (error == 0) {
   1448 				wpipe->pipe_buffer.in += size;
   1449 				if (wpipe->pipe_buffer.in >=
   1450 				    wpipe->pipe_buffer.size) {
   1451 #ifdef DEBUG
   1452 					if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
   1453 						panic("Expected wraparound bad");
   1454 #endif
   1455 					wpipe->pipe_buffer.in = size - segsize;
   1456 				}
   1457 
   1458 				wpipe->pipe_buffer.cnt += size;
   1459 #ifdef DEBUG
   1460 				if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
   1461 					panic("Pipe buffer overflow");
   1462 #endif
   1463 			}
   1464 			pipeunlock(wpipe);
   1465 			if (error)
   1466 				break;
   1467 		} else {
   1468 			/*
   1469 			 * If the "read-side" has been blocked, wake it up now.
   1470 			 */
   1471 			if (wpipe->pipe_state & PIPE_WANTR) {
   1472 				wpipe->pipe_state &= ~PIPE_WANTR;
   1473 				wakeup(wpipe);
   1474 			}
   1475 
   1476 			/*
   1477 			 * don't block on non-blocking I/O
   1478 			 */
   1479 			if (fp->f_flag & FNONBLOCK) {
   1480 				error = EAGAIN;
   1481 				break;
   1482 			}
   1483 
   1484 			/*
   1485 			 * We have no more space and have something to offer,
   1486 			 * wake up select/poll.
   1487 			 */
   1488 			pipeselwakeup(wpipe, wpipe);
   1489 
   1490 			wpipe->pipe_state |= PIPE_WANTW;
   1491 #ifdef __FreeBSD__
   1492 			error = msleep(wpipe, PIPE_MTX(rpipe),
   1493 			    PRIBIO | PCATCH, "pipewr", 0);
   1494 #else
   1495 			error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0);
   1496 #endif
   1497 			if (error != 0)
   1498 				break;
   1499 			/*
   1500 			 * If read side wants to go away, we just issue a signal
   1501 			 * to ourselves.
   1502 			 */
   1503 			if (wpipe->pipe_state & PIPE_EOF) {
   1504 				error = EPIPE;
   1505 				break;
   1506 			}
   1507 		}
   1508 	}
   1509 
   1510 	--wpipe->pipe_busy;
   1511 	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) {
   1512 		wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR);
   1513 		wakeup(wpipe);
   1514 	} else if (wpipe->pipe_buffer.cnt > 0) {
   1515 		/*
   1516 		 * If we have put any characters in the buffer, we wake up
   1517 		 * the reader.
   1518 		 */
   1519 		if (wpipe->pipe_state & PIPE_WANTR) {
   1520 			wpipe->pipe_state &= ~PIPE_WANTR;
   1521 			wakeup(wpipe);
   1522 		}
   1523 	}
   1524 
   1525 	/*
   1526 	 * Don't return EPIPE if I/O was successful
   1527 	 */
   1528 	if ((error == EPIPE) && (wpipe->pipe_buffer.cnt == 0)
   1529 	    && (uio->uio_resid == 0))
   1530 		error = 0;
   1531 
   1532 	if (error == 0)
   1533 		vfs_timestamp(&wpipe->pipe_mtime);
   1534 
   1535 	/*
   1536 	 * We have something to offer, wake up select/poll.
   1537 	 * wpipe->pipe_map.cnt is always 0 in this point (direct write
   1538 	 * is only done synchronously), so check only wpipe->pipe_buffer.cnt
   1539 	 */
   1540 	if (wpipe->pipe_buffer.cnt)
   1541 		pipeselwakeup(wpipe, wpipe);
   1542 
   1543 	/*
   1544 	 * Arrange for next read(2) to do a signal.
   1545 	 */
   1546 	wpipe->pipe_state |= PIPE_SIGNALR;
   1547 
   1548 	PIPE_UNLOCK(rpipe);
   1549 	return (error);
   1550 }
   1551 
   1552 /*
   1553  * we implement a very minimal set of ioctls for compatibility with sockets.
   1554  */
   1555 int
   1556 #ifdef __FreeBSD__
   1557 pipe_ioctl(fp, cmd, data, td)
   1558 	struct file *fp;
   1559 	u_long cmd;
   1560 	caddr_t data;
   1561 	struct thread *td;
   1562 #else
   1563 pipe_ioctl(fp, cmd, data, p)
   1564 	struct file *fp;
   1565 	u_long cmd;
   1566 	caddr_t data;
   1567 	struct proc *p;
   1568 #endif
   1569 {
   1570 	struct pipe *mpipe = (struct pipe *)fp->f_data;
   1571 
   1572 	switch (cmd) {
   1573 
   1574 	case FIONBIO:
   1575 		return (0);
   1576 
   1577 	case FIOASYNC:
   1578 		PIPE_LOCK(mpipe);
   1579 		if (*(int *)data) {
   1580 			mpipe->pipe_state |= PIPE_ASYNC;
   1581 		} else {
   1582 			mpipe->pipe_state &= ~PIPE_ASYNC;
   1583 		}
   1584 		PIPE_UNLOCK(mpipe);
   1585 		return (0);
   1586 
   1587 	case FIONREAD:
   1588 		PIPE_LOCK(mpipe);
   1589 #ifndef PIPE_NODIRECT
   1590 		if (mpipe->pipe_state & PIPE_DIRECTW)
   1591 			*(int *)data = mpipe->pipe_map.cnt;
   1592 		else
   1593 #endif
   1594 			*(int *)data = mpipe->pipe_buffer.cnt;
   1595 		PIPE_UNLOCK(mpipe);
   1596 		return (0);
   1597 
   1598 #ifdef __FreeBSD__
   1599 	case FIOSETOWN:
   1600 		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
   1601 
   1602 	case FIOGETOWN:
   1603 		*(int *)data = fgetown(mpipe->pipe_sigio);
   1604 		return (0);
   1605 
   1606 	/* This is deprecated, FIOSETOWN should be used instead. */
   1607 	case TIOCSPGRP:
   1608 		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
   1609 
   1610 	/* This is deprecated, FIOGETOWN should be used instead. */
   1611 	case TIOCGPGRP:
   1612 		*(int *)data = -fgetown(mpipe->pipe_sigio);
   1613 		return (0);
   1614 #endif /* FreeBSD */
   1615 #ifdef __NetBSD__
   1616 	case TIOCSPGRP:
   1617 		mpipe->pipe_pgid = *(int *)data;
   1618 		return (0);
   1619 
   1620 	case TIOCGPGRP:
   1621 		*(int *)data = mpipe->pipe_pgid;
   1622 		return (0);
   1623 #endif /* NetBSD */
   1624 
   1625 	}
   1626 	return (EPASSTHROUGH);
   1627 }
   1628 
   1629 int
   1630 #ifdef __FreeBSD__
   1631 pipe_poll(fp, events, cred, td)
   1632 	struct file *fp;
   1633 	int events;
   1634 	struct ucred *cred;
   1635 	struct thread *td;
   1636 #elif defined(__NetBSD__)
   1637 pipe_poll(fp, events, td)
   1638 	struct file *fp;
   1639 	int events;
   1640 	struct proc *td;
   1641 #endif
   1642 {
   1643 	struct pipe *rpipe = (struct pipe *)fp->f_data;
   1644 	struct pipe *wpipe;
   1645 	int revents = 0;
   1646 
   1647 	wpipe = rpipe->pipe_peer;
   1648 	PIPE_LOCK(rpipe);
   1649 	if (events & (POLLIN | POLLRDNORM))
   1650 		if ((rpipe->pipe_buffer.cnt > 0) ||
   1651 #ifndef PIPE_NODIRECT
   1652 		    (rpipe->pipe_state & PIPE_DIRECTW) ||
   1653 #endif
   1654 		    (rpipe->pipe_state & PIPE_EOF))
   1655 			revents |= events & (POLLIN | POLLRDNORM);
   1656 
   1657 	if (events & (POLLOUT | POLLWRNORM))
   1658 		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)
   1659 		    || (
   1660 #ifndef PIPE_NODIRECT
   1661 		     ((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
   1662 #endif
   1663 		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
   1664 			revents |= events & (POLLOUT | POLLWRNORM);
   1665 
   1666 	if ((rpipe->pipe_state & PIPE_EOF) ||
   1667 	    (wpipe == NULL) ||
   1668 	    (wpipe->pipe_state & PIPE_EOF))
   1669 		revents |= POLLHUP;
   1670 
   1671 	if (revents == 0) {
   1672 		if (events & (POLLIN | POLLRDNORM)) {
   1673 			selrecord(td, &rpipe->pipe_sel);
   1674 			rpipe->pipe_state |= PIPE_SEL;
   1675 		}
   1676 
   1677 		if (events & (POLLOUT | POLLWRNORM)) {
   1678 			selrecord(td, &wpipe->pipe_sel);
   1679 			wpipe->pipe_state |= PIPE_SEL;
   1680 		}
   1681 	}
   1682 	PIPE_UNLOCK(rpipe);
   1683 
   1684 	return (revents);
   1685 }
   1686 
   1687 static int
   1688 #ifdef __FreeBSD__
   1689 pipe_stat(fp, ub, td)
   1690 	struct file *fp;
   1691 	struct stat *ub;
   1692 	struct thread *td;
   1693 #else
   1694 pipe_stat(fp, ub, td)
   1695 	struct file *fp;
   1696 	struct stat *ub;
   1697 	struct proc *td;
   1698 #endif
   1699 {
   1700 	struct pipe *pipe = (struct pipe *)fp->f_data;
   1701 
   1702 	memset((caddr_t)ub, 0, sizeof(*ub));
   1703 	ub->st_mode = S_IFIFO;
   1704 	ub->st_blksize = pipe->pipe_buffer.size;
   1705 	ub->st_size = pipe->pipe_buffer.cnt;
   1706 	ub->st_blocks = (ub->st_size) ? 1 : 0;
   1707 #ifdef __FreeBSD__
   1708 	ub->st_atimespec = pipe->pipe_atime;
   1709 	ub->st_mtimespec = pipe->pipe_mtime;
   1710 	ub->st_ctimespec = pipe->pipe_ctime;
   1711 #endif /* FreeBSD */
   1712 #ifdef __NetBSD__
   1713 	TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec)
   1714 	TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec);
   1715 	TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec);
   1716 #endif /* NetBSD */
   1717 	ub->st_uid = fp->f_cred->cr_uid;
   1718 	ub->st_gid = fp->f_cred->cr_gid;
   1719 	/*
   1720 	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
   1721 	 * XXX (st_dev, st_ino) should be unique.
   1722 	 */
   1723 	return (0);
   1724 }
   1725 
   1726 /* ARGSUSED */
   1727 static int
   1728 #ifdef __FreeBSD__
   1729 pipe_close(fp, td)
   1730 	struct file *fp;
   1731 	struct thread *td;
   1732 #else
   1733 pipe_close(fp, td)
   1734 	struct file *fp;
   1735 	struct proc *td;
   1736 #endif
   1737 {
   1738 	struct pipe *cpipe = (struct pipe *)fp->f_data;
   1739 
   1740 #ifdef __FreeBSD__
   1741 	fp->f_ops = &badfileops;
   1742 	funsetown(cpipe->pipe_sigio);
   1743 #endif
   1744 	fp->f_data = NULL;
   1745 	pipeclose(cpipe);
   1746 	return (0);
   1747 }
   1748 
   1749 static void
   1750 pipe_free_kmem(cpipe)
   1751 	struct pipe *cpipe;
   1752 {
   1753 
   1754 #ifdef __FreeBSD__
   1755 
   1756 	GIANT_REQUIRED;
   1757 	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
   1758 	       ("pipespace: pipe mutex locked"));
   1759 #endif
   1760 
   1761 	if (cpipe->pipe_buffer.buffer != NULL) {
   1762 		if (cpipe->pipe_buffer.size > PIPE_SIZE)
   1763 			--nbigpipe;
   1764 		amountpipekva -= cpipe->pipe_buffer.size;
   1765 #ifdef __FreeBSD__
   1766 		kmem_free(kernel_map,
   1767 			(vm_offset_t)cpipe->pipe_buffer.buffer,
   1768 			cpipe->pipe_buffer.size);
   1769 #elif defined(__NetBSD__)
   1770 		uvm_km_free(kernel_map,
   1771 			(vaddr_t)cpipe->pipe_buffer.buffer,
   1772 			cpipe->pipe_buffer.size);
   1773 #endif /* NetBSD */
   1774 		cpipe->pipe_buffer.buffer = NULL;
   1775 	}
   1776 #ifndef PIPE_NODIRECT
   1777 	if (cpipe->pipe_map.kva != 0) {
   1778 #ifdef __FreeBSD__
   1779 		amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
   1780 		kmem_free(kernel_map,
   1781 			cpipe->pipe_map.kva,
   1782 			cpipe->pipe_buffer.size + PAGE_SIZE);
   1783 #elif defined(__NetBSD__)
   1784 		pipe_loan_free(cpipe);
   1785 #endif /* NetBSD */
   1786 		cpipe->pipe_map.cnt = 0;
   1787 		cpipe->pipe_map.kva = 0;
   1788 		cpipe->pipe_map.pos = 0;
   1789 		cpipe->pipe_map.npages = 0;
   1790 	}
   1791 #endif /* !PIPE_NODIRECT */
   1792 }
   1793 
   1794 /*
   1795  * shutdown the pipe
   1796  */
   1797 static void
   1798 pipeclose(cpipe)
   1799 	struct pipe *cpipe;
   1800 {
   1801 	struct pipe *ppipe;
   1802 #ifdef __FreeBSD__
   1803 	int hadpeer = 0;
   1804 #endif
   1805 
   1806 	if (cpipe == NULL)
   1807 		return;
   1808 
   1809 	/* partially created pipes won't have a valid mutex. */
   1810 	if (PIPE_MTX(cpipe) != NULL)
   1811 		PIPE_LOCK(cpipe);
   1812 
   1813 	pipeselwakeup(cpipe, cpipe);
   1814 
   1815 	/*
   1816 	 * If the other side is blocked, wake it up saying that
   1817 	 * we want to close it down.
   1818 	 */
   1819 	while (cpipe->pipe_busy) {
   1820 		wakeup(cpipe);
   1821 		cpipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF;
   1822 #ifdef __FreeBSD__
   1823 		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
   1824 #else
   1825 		tsleep(cpipe, PRIBIO, "pipecl", 0);
   1826 #endif
   1827 	}
   1828 
   1829 	/*
   1830 	 * Disconnect from peer
   1831 	 */
   1832 	if ((ppipe = cpipe->pipe_peer) != NULL) {
   1833 #ifdef __FreeBSD__
   1834 		hadpeer++;
   1835 #endif
   1836 		pipeselwakeup(ppipe, ppipe);
   1837 
   1838 		ppipe->pipe_state |= PIPE_EOF;
   1839 		wakeup(ppipe);
   1840 #ifdef __FreeBSD__
   1841 		KNOTE(&ppipe->pipe_sel.si_note, 0);
   1842 #endif
   1843 		ppipe->pipe_peer = NULL;
   1844 	}
   1845 	/*
   1846 	 * free resources
   1847 	 */
   1848 #ifdef __FreeBSD__
   1849 	if (PIPE_MTX(cpipe) != NULL) {
   1850 		PIPE_UNLOCK(cpipe);
   1851 		if (!hadpeer) {
   1852 			mtx_destroy(PIPE_MTX(cpipe));
   1853 			free(PIPE_MTX(cpipe), M_TEMP);
   1854 		}
   1855 	}
   1856 	mtx_lock(&Giant);
   1857 	pipe_free_kmem(cpipe);
   1858 	zfree(pipe_zone, cpipe);
   1859 	mtx_unlock(&Giant);
   1860 #endif
   1861 
   1862 #ifdef __NetBSD__
   1863 	if (PIPE_MTX(cpipe) != NULL)
   1864 		PIPE_UNLOCK(cpipe);
   1865 
   1866 	pipe_free_kmem(cpipe);
   1867 	(void) lockmgr(&cpipe->pipe_lock, LK_DRAIN, NULL);
   1868 	pool_put(&pipe_pool, cpipe);
   1869 #endif
   1870 }
   1871 
   1872 #ifdef __FreeBSD__
   1873 /*ARGSUSED*/
   1874 static int
   1875 pipe_kqfilter(struct file *fp, struct knote *kn)
   1876 {
   1877 	struct pipe *cpipe;
   1878 
   1879 	cpipe = (struct pipe *)kn->kn_fp->f_data;
   1880 	switch (kn->kn_filter) {
   1881 	case EVFILT_READ:
   1882 		kn->kn_fop = &pipe_rfiltops;
   1883 		break;
   1884 	case EVFILT_WRITE:
   1885 		kn->kn_fop = &pipe_wfiltops;
   1886 		cpipe = cpipe->pipe_peer;
   1887 		break;
   1888 	default:
   1889 		return (1);
   1890 	}
   1891 	kn->kn_hook = (caddr_t)cpipe;
   1892 
   1893 	PIPE_LOCK(cpipe);
   1894 	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
   1895 	PIPE_UNLOCK(cpipe);
   1896 	return (0);
   1897 }
   1898 
   1899 static void
   1900 filt_pipedetach(struct knote *kn)
   1901 {
   1902 	struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
   1903 
   1904 	PIPE_LOCK(cpipe);
   1905 	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
   1906 	PIPE_UNLOCK(cpipe);
   1907 }
   1908 
   1909 /*ARGSUSED*/
   1910 static int
   1911 filt_piperead(struct knote *kn, long hint)
   1912 {
   1913 	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
   1914 	struct pipe *wpipe = rpipe->pipe_peer;
   1915 
   1916 	PIPE_LOCK(rpipe);
   1917 	kn->kn_data = rpipe->pipe_buffer.cnt;
   1918 	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
   1919 		kn->kn_data = rpipe->pipe_map.cnt;
   1920 
   1921 	if ((rpipe->pipe_state & PIPE_EOF) ||
   1922 	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
   1923 		kn->kn_flags |= EV_EOF;
   1924 		PIPE_UNLOCK(rpipe);
   1925 		return (1);
   1926 	}
   1927 	PIPE_UNLOCK(rpipe);
   1928 	return (kn->kn_data > 0);
   1929 }
   1930 
   1931 /*ARGSUSED*/
   1932 static int
   1933 filt_pipewrite(struct knote *kn, long hint)
   1934 {
   1935 	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
   1936 	struct pipe *wpipe = rpipe->pipe_peer;
   1937 
   1938 	PIPE_LOCK(rpipe);
   1939 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
   1940 		kn->kn_data = 0;
   1941 		kn->kn_flags |= EV_EOF;
   1942 		PIPE_UNLOCK(rpipe);
   1943 		return (1);
   1944 	}
   1945 	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
   1946 	if (wpipe->pipe_state & PIPE_DIRECTW)
   1947 		kn->kn_data = 0;
   1948 
   1949 	PIPE_UNLOCK(rpipe);
   1950 	return (kn->kn_data >= PIPE_BUF);
   1951 }
   1952 #endif /* FreeBSD */
   1953 
   1954 #ifdef __NetBSD__
   1955 static int
   1956 pipe_fcntl(fp, cmd, data, p)
   1957 	struct file *fp;
   1958 	u_int cmd;
   1959 	caddr_t data;
   1960 	struct proc *p;
   1961 {
   1962 	if (cmd == F_SETFL)
   1963 		return (0);
   1964 	else
   1965 		return (EOPNOTSUPP);
   1966 }
   1967 
   1968 /*
   1969  * Handle pipe sysctls.
   1970  */
   1971 int
   1972 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen)
   1973 	int *name;
   1974 	u_int namelen;
   1975 	void *oldp;
   1976 	size_t *oldlenp;
   1977 	void *newp;
   1978 	size_t newlen;
   1979 {
   1980 	/* All sysctl names at this level are terminal. */
   1981 	if (namelen != 1)
   1982 		return (ENOTDIR);		/* overloaded */
   1983 
   1984 	switch (name[0]) {
   1985 	case KERN_PIPE_MAXKVASZ:
   1986 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva));
   1987 	case KERN_PIPE_LIMITKVA:
   1988 		return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva));
   1989 	case KERN_PIPE_MAXBIGPIPES:
   1990 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes));
   1991 	case KERN_PIPE_NBIGPIPES:
   1992 		return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe));
   1993 	case KERN_PIPE_KVASIZE:
   1994 		return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva));
   1995 	default:
   1996 		return (EOPNOTSUPP);
   1997 	}
   1998 	/* NOTREACHED */
   1999 }
   2000 
   2001 /*
   2002  * Initialize pipe structs.
   2003  */
   2004 void
   2005 pipe_init(void)
   2006 {
   2007 	pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", NULL);
   2008 }
   2009 
   2010 #endif /* __NetBSD __ */
   2011