Home | History | Annotate | Line # | Download | only in puffs
puffs_msgif.c revision 1.55
      1 /*	$NetBSD: puffs_msgif.c,v 1.55 2007/11/12 16:39:34 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by the
      7  * Google Summer of Code program and the Ulla Tuominen Foundation.
      8  * The Google SoC project was mentored by Bill Studenmund.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.55 2007/11/12 16:39:34 pooka Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/fstrans.h>
     37 #include <sys/kmem.h>
     38 #include <sys/kthread.h>
     39 #include <sys/lock.h>
     40 #include <sys/malloc.h>
     41 #include <sys/mount.h>
     42 #include <sys/namei.h>
     43 #include <sys/proc.h>
     44 #include <sys/vnode.h>
     45 
     46 #include <dev/putter/putter_sys.h>
     47 
     48 #include <fs/puffs/puffs_msgif.h>
     49 #include <fs/puffs/puffs_sys.h>
     50 
     51 #include <miscfs/syncfs/syncfs.h> /* XXX: for syncer_mutex reference */
     52 
     53 /*
     54  * waitq data structures
     55  */
     56 
     57 /*
     58  * While a request is going to userspace, park the caller within the
     59  * kernel.  This is the kernel counterpart of "struct puffs_req".
     60  */
     61 struct puffs_msgpark {
     62 	struct puffs_req	*park_preq;	/* req followed by buf	*/
     63 
     64 	size_t			park_copylen;	/* userspace copylength	*/
     65 	size_t			park_maxlen;	/* max size in comeback */
     66 
     67 	parkdone_fn		park_done;	/* "biodone" a'la puffs	*/
     68 	void			*park_donearg;
     69 
     70 	int			park_flags;
     71 	int			park_refcount;
     72 
     73 	kcondvar_t		park_cv;
     74 	kmutex_t		park_mtx;
     75 
     76 	TAILQ_ENTRY(puffs_msgpark) park_entries;
     77 };
     78 #define PARKFLAG_WAITERGONE	0x01
     79 #define PARKFLAG_DONE		0x02
     80 #define PARKFLAG_ONQUEUE1	0x04
     81 #define PARKFLAG_ONQUEUE2	0x08
     82 #define PARKFLAG_CALL		0x10
     83 #define PARKFLAG_WANTREPLY	0x20
     84 
     85 static pool_cache_t parkpc;
     86 
     87 static int
     88 makepark(void *arg, void *obj, int flags)
     89 {
     90 	struct puffs_msgpark *park = obj;
     91 
     92 	mutex_init(&park->park_mtx, MUTEX_DEFAULT, IPL_NONE);
     93 	cv_init(&park->park_cv, "puffsrpl");
     94 
     95 	return 0;
     96 }
     97 
     98 static void
     99 nukepark(void *arg, void *obj)
    100 {
    101 	struct puffs_msgpark *park = obj;
    102 
    103 	cv_destroy(&park->park_cv);
    104 	mutex_destroy(&park->park_mtx);
    105 }
    106 
    107 void
    108 puffs_msgif_init()
    109 {
    110 
    111 	parkpc = pool_cache_init(sizeof(struct puffs_msgpark), 0, 0, 0,
    112 	    "puffprkl", NULL, IPL_NONE, makepark, nukepark, NULL);
    113 }
    114 
    115 void
    116 puffs_msgif_destroy()
    117 {
    118 
    119 	pool_cache_destroy(parkpc);
    120 }
    121 
    122 static int alloced;
    123 
    124 static struct puffs_msgpark *
    125 puffs_msgpark_alloc(int waitok)
    126 {
    127 	struct puffs_msgpark *park;
    128 
    129 	park = pool_cache_get(parkpc, waitok ? PR_WAITOK : PR_NOWAIT);
    130 	if (park == NULL)
    131 		return park;
    132 
    133 	park->park_refcount = 1;
    134 	park->park_preq = NULL;
    135 	park->park_flags = PARKFLAG_WANTREPLY;
    136 
    137 	return park;
    138 }
    139 
    140 static void
    141 puffs_msgpark_reference(struct puffs_msgpark *park)
    142 {
    143 
    144 	KASSERT(mutex_owned(&park->park_mtx));
    145 	park->park_refcount++;
    146 }
    147 
    148 /*
    149  * Release reference to park structure.
    150  */
    151 static void
    152 puffs_msgpark_release1(struct puffs_msgpark *park, int howmany)
    153 {
    154 	struct puffs_req *preq = park->park_preq;
    155 	int refcnt;
    156 
    157 	KASSERT(mutex_owned(&park->park_mtx));
    158 	refcnt = park->park_refcount -= howmany;
    159 	mutex_exit(&park->park_mtx);
    160 
    161 	KASSERT(refcnt >= 0);
    162 
    163 	if (refcnt == 0) {
    164 		alloced--;
    165 		if (preq)
    166 			kmem_free(preq, park->park_maxlen);
    167 		pool_cache_put(parkpc, park);
    168 	}
    169 }
    170 #define puffs_msgpark_release(a) puffs_msgpark_release1(a, 1)
    171 
    172 #ifdef PUFFSDEBUG
    173 static void
    174 parkdump(struct puffs_msgpark *park)
    175 {
    176 
    177 	DPRINTF(("park %p, preq %p, id %" PRIu64 "\n"
    178 	    "\tcopy %zu, max %zu - done: %p/%p\n"
    179 	    "\tflags 0x%08x, refcount %d, cv/mtx: %p/%p\n",
    180 	    park, park->park_preq, park->park_preq->preq_id,
    181 	    park->park_copylen, park->park_maxlen,
    182 	    park->park_done, park->park_donearg,
    183 	    park->park_flags, park->park_refcount,
    184 	    &park->park_cv, &park->park_mtx));
    185 }
    186 
    187 static void
    188 parkqdump(struct puffs_wq *q, int dumpall)
    189 {
    190 	struct puffs_msgpark *park;
    191 	int total = 0;
    192 
    193 	TAILQ_FOREACH(park, q, park_entries) {
    194 		if (dumpall)
    195 			parkdump(park);
    196 		total++;
    197 	}
    198 	DPRINTF(("puffs waitqueue at %p dumped, %d total\n", q, total));
    199 
    200 }
    201 #endif /* PUFFSDEBUG */
    202 
    203 /*
    204  * A word about locking in the park structures: the lock protects the
    205  * fields of the *park* structure (not preq) and acts as an interlock
    206  * in cv operations.  The lock is always internal to this module and
    207  * callers do not need to worry about it.
    208  */
    209 
    210 int
    211 puffs_msgmem_alloc(size_t len, struct puffs_msgpark **ppark, void **mem,
    212 	int cansleep)
    213 {
    214 	struct puffs_msgpark *park;
    215 	void *m;
    216 
    217 	m = kmem_zalloc(len, cansleep ? KM_SLEEP : KM_NOSLEEP);
    218 	if (m == NULL) {
    219 		KASSERT(cansleep == 0);
    220 		return ENOMEM;
    221 	}
    222 
    223 	park = puffs_msgpark_alloc(cansleep);
    224 	if (park == NULL) {
    225 		KASSERT(cansleep == 0);
    226 		kmem_free(m, len);
    227 		return ENOMEM;
    228 	}
    229 
    230 	park->park_preq = m;
    231 	park->park_maxlen = len;
    232 
    233 	*ppark = park;
    234 	*mem = m;
    235 
    236 	return 0;
    237 }
    238 
    239 void
    240 puffs_msgmem_release(struct puffs_msgpark *park)
    241 {
    242 
    243 	if (park == NULL)
    244 		return;
    245 
    246 	mutex_enter(&park->park_mtx);
    247 	puffs_msgpark_release(park);
    248 }
    249 
    250 void
    251 puffs_msg_setfaf(struct puffs_msgpark *park)
    252 {
    253 
    254 	park->park_flags &= ~PARKFLAG_WANTREPLY;
    255 }
    256 
    257 /*
    258  * kernel-user-kernel waitqueues
    259  */
    260 
    261 static int touser(struct puffs_mount *, struct puffs_msgpark *);
    262 
    263 static uint64_t
    264 puffs_getmsgid(struct puffs_mount *pmp)
    265 {
    266 	uint64_t rv;
    267 
    268 	mutex_enter(&pmp->pmp_lock);
    269 	rv = pmp->pmp_nextmsgid++;
    270 	mutex_exit(&pmp->pmp_lock);
    271 
    272 	return rv;
    273 }
    274 
    275 /* vfs request */
    276 int
    277 puffs_msg_vfs(struct puffs_mount *pmp, struct puffs_msgpark *park, int optype)
    278 {
    279 
    280 	park->park_preq->preq_opclass = PUFFSOP_VFS;
    281 	park->park_preq->preq_optype = optype;
    282 
    283 	park->park_copylen = park->park_maxlen;
    284 
    285 	return touser(pmp, park);
    286 }
    287 
    288 /*
    289  * vnode level request
    290  */
    291 int
    292 puffs_msg_vn(struct puffs_mount *pmp, struct puffs_msgpark *park,
    293 	int optype, size_t delta, struct vnode *vp_opc, struct vnode *vp_aux)
    294 {
    295 	struct puffs_req *preq;
    296 	void *cookie = VPTOPNC(vp_opc);
    297 	struct puffs_node *pnode;
    298 	int rv;
    299 
    300 	park->park_preq->preq_opclass = PUFFSOP_VN;
    301 	park->park_preq->preq_optype = optype;
    302 	park->park_preq->preq_cookie = cookie;
    303 
    304 	KASSERT(delta < park->park_maxlen); /* "<=" wouldn't make sense */
    305 	park->park_copylen = park->park_maxlen - delta;
    306 
    307 	rv = touser(pmp, park);
    308 
    309 	/*
    310 	 * Check if the user server requests that inactive be called
    311 	 * when the time is right.
    312 	 */
    313 	preq = park->park_preq;
    314 	if (preq->preq_setbacks & PUFFS_SETBACK_INACT_N1) {
    315 		pnode = vp_opc->v_data;
    316 		pnode->pn_stat |= PNODE_DOINACT;
    317 	}
    318 	if (preq->preq_setbacks & PUFFS_SETBACK_INACT_N2) {
    319 		/* if no vp_aux, just ignore */
    320 		if (vp_aux) {
    321 			pnode = vp_aux->v_data;
    322 			pnode->pn_stat |= PNODE_DOINACT;
    323 		}
    324 	}
    325 	if (preq->preq_setbacks & PUFFS_SETBACK_NOREF_N1) {
    326 		pnode = vp_opc->v_data;
    327 		pnode->pn_stat |= PNODE_NOREFS;
    328 	}
    329 	if (preq->preq_setbacks & PUFFS_SETBACK_NOREF_N2) {
    330 		/* if no vp_aux, just ignore */
    331 		if (vp_aux) {
    332 			pnode = vp_aux->v_data;
    333 			pnode->pn_stat |= PNODE_NOREFS;
    334 		}
    335 	}
    336 
    337 	return rv;
    338 }
    339 
    340 void
    341 puffs_msg_vncall(struct puffs_mount *pmp, struct puffs_msgpark *park,
    342 	int optype, size_t delta, parkdone_fn donefn, void *donearg,
    343 	struct vnode *vp_opc)
    344 {
    345 	void *cookie = VPTOPNC(vp_opc);
    346 
    347 	park->park_preq->preq_opclass = PUFFSOP_VN;
    348 	park->park_preq->preq_optype = optype;
    349 	park->park_preq->preq_cookie = cookie;
    350 
    351 	KASSERT(delta < park->park_maxlen);
    352 	park->park_copylen = park->park_maxlen - delta;
    353 	park->park_done = donefn;
    354 	park->park_donearg = donearg;
    355 	park->park_flags |= PARKFLAG_CALL;
    356 
    357 	(void) touser(pmp, park);
    358 }
    359 
    360 int
    361 puffs_msg_raw(struct puffs_mount *pmp, struct puffs_msgpark *park)
    362 {
    363 
    364 	park->park_copylen = park->park_maxlen;
    365 
    366 	return touser(pmp, park);
    367 }
    368 
    369 void
    370 puffs_msg_errnotify(struct puffs_mount *pmp, uint8_t type, int error,
    371 	const char *str, void *cookie)
    372 {
    373 	struct puffs_msgpark *park;
    374 	struct puffs_error *perr;
    375 
    376 	puffs_msgmem_alloc(sizeof(struct puffs_error), &park, (void **)&perr,1);
    377 
    378 	perr->perr_error = error;
    379 	strlcpy(perr->perr_str, str, sizeof(perr->perr_str));
    380 
    381 	park->park_preq->preq_opclass |= PUFFSOP_ERROR | PUFFSOPFLAG_FAF;
    382 	park->park_preq->preq_optype = type;
    383 	park->park_preq->preq_cookie = cookie;
    384 
    385 	park->park_copylen = park->park_maxlen;
    386 
    387 	(void)touser(pmp, park);
    388 }
    389 
    390 /*
    391  * Wait for the userspace ping-pong game in calling process context,
    392  * unless a FAF / async call, in which case just enqueues the request
    393  * and return immediately.
    394  */
    395 static int
    396 touser(struct puffs_mount *pmp, struct puffs_msgpark *park)
    397 {
    398 	struct lwp *l = curlwp;
    399 	struct mount *mp;
    400 	struct puffs_req *preq;
    401 	int rv = 0;
    402 
    403 	mp = PMPTOMP(pmp);
    404 	preq = park->park_preq;
    405 	preq->preq_buflen = park->park_maxlen;
    406 	KASSERT(preq->preq_id == 0);
    407 
    408 	if ((park->park_flags & PARKFLAG_WANTREPLY) == 0)
    409 		preq->preq_opclass |= PUFFSOPFLAG_FAF;
    410 	else
    411 		preq->preq_id = puffs_getmsgid(pmp);
    412 
    413 	/* fill in caller information */
    414 	preq->preq_pid = l->l_proc->p_pid;
    415 	preq->preq_lid = l->l_lid;
    416 
    417 	/*
    418 	 * To support cv_sig, yet another movie: check if there are signals
    419 	 * pending and we are issueing a non-FAF.  If so, return an error
    420 	 * directly UNLESS we are issueing INACTIVE.  In that case, convert
    421 	 * it to a FAF, fire off to the file server and return an error.
    422 	 * Yes, this is bordering disgusting.  Barfbags are on me.
    423 	 */
    424 	if ((park->park_flags & PARKFLAG_WANTREPLY)
    425 	   && (park->park_flags & PARKFLAG_CALL) == 0
    426 	   && (l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) {
    427 		if (PUFFSOP_OPCLASS(preq->preq_opclass) == PUFFSOP_VN
    428 		    && preq->preq_optype == PUFFS_VN_INACTIVE) {
    429 			park->park_preq->preq_opclass |= PUFFSOPFLAG_FAF;
    430 			park->park_flags &= ~PARKFLAG_WANTREPLY;
    431 			DPRINTF(("puffs touser: converted to FAF %p\n", park));
    432 			rv = EINTR;
    433 		} else {
    434 			return EINTR;
    435 		}
    436 	}
    437 
    438 	/*
    439 	 * test for suspension lock.
    440 	 *
    441 	 * Note that we *DO NOT* keep the lock, since that might block
    442 	 * lock acquiring PLUS it would give userlandia control over
    443 	 * the lock.  The operation queue enforces a strict ordering:
    444 	 * when the fs server gets in the op stream, it knows things
    445 	 * are in order.  The kernel locks can't guarantee that for
    446 	 * userspace, in any case.
    447 	 *
    448 	 * BUT: this presents a problem for ops which have a consistency
    449 	 * clause based on more than one operation.  Unfortunately such
    450 	 * operations (read, write) do not reliably work yet.
    451 	 *
    452 	 * Ya, Ya, it's wrong wong wrong, me be fixink this someday.
    453 	 *
    454 	 * XXX: and there is one more problem.  We sometimes need to
    455 	 * take a lazy lock in case the fs is suspending and we are
    456 	 * executing as the fs server context.  This might happen
    457 	 * e.g. in the case that the user server triggers a reclaim
    458 	 * in the kernel while the fs is suspending.  It's not a very
    459 	 * likely event, but it needs to be fixed some day.
    460 	 */
    461 
    462 	/*
    463 	 * MOREXXX: once PUFFS_WCACHEINFO is enabled, we can't take
    464 	 * the mutex here, since getpages() might be called locked.
    465 	 */
    466 	fstrans_start(mp, FSTRANS_NORMAL);
    467 	mutex_enter(&pmp->pmp_lock);
    468 	fstrans_done(mp);
    469 
    470 	if (pmp->pmp_status != PUFFSTAT_RUNNING) {
    471 		mutex_exit(&pmp->pmp_lock);
    472 		return ENXIO;
    473 	}
    474 
    475 #ifdef PUFFSDEBUG
    476 	parkqdump(&pmp->pmp_msg_touser, puffsdebug > 1);
    477 	parkqdump(&pmp->pmp_msg_replywait, puffsdebug > 1);
    478 #endif
    479 
    480 	mutex_enter(&park->park_mtx);
    481 	TAILQ_INSERT_TAIL(&pmp->pmp_msg_touser, park, park_entries);
    482 	park->park_flags |= PARKFLAG_ONQUEUE1;
    483 	puffs_mp_reference(pmp);
    484 	pmp->pmp_msg_touser_count++;
    485 	mutex_exit(&pmp->pmp_lock);
    486 
    487 	DPRINTF(("touser: req %" PRIu64 ", preq: %p, park: %p, "
    488 	    "c/t: 0x%x/0x%x, f: 0x%x\n", preq->preq_id, preq, park,
    489 	    preq->preq_opclass, preq->preq_optype, park->park_flags));
    490 
    491 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
    492 	putter_notify(pmp->pmp_pi);
    493 
    494 	if ((park->park_flags & PARKFLAG_WANTREPLY)
    495 	    && (park->park_flags & PARKFLAG_CALL) == 0) {
    496 		int error;
    497 
    498 		error = cv_wait_sig(&park->park_cv, &park->park_mtx);
    499 		DPRINTF(("puffs_touser: waiter for %p woke up with %d\n",
    500 		    park, error));
    501 		if (error) {
    502 			park->park_flags |= PARKFLAG_WAITERGONE;
    503 			if (park->park_flags & PARKFLAG_DONE) {
    504 				rv = preq->preq_rv;
    505 			} else {
    506 				/*
    507 				 * ok, we marked it as going away, but
    508 				 * still need to do queue ops.  take locks
    509 				 * in correct order.
    510 				 *
    511 				 * We don't want to release our reference
    512 				 * if it's on replywait queue to avoid error
    513 				 * to file server.  putop() code will DTRT.
    514 				 */
    515 				mutex_exit(&park->park_mtx);
    516 				mutex_enter(&pmp->pmp_lock);
    517 				mutex_enter(&park->park_mtx);
    518 
    519 				/* remove from queue1 */
    520 				if (park->park_flags & PARKFLAG_ONQUEUE1) {
    521 					TAILQ_REMOVE(&pmp->pmp_msg_touser,
    522 					    park, park_entries);
    523 					pmp->pmp_msg_touser_count--;
    524 					park->park_flags &= ~PARKFLAG_ONQUEUE1;
    525 				}
    526 
    527 				/*
    528 				 * If it's waiting for a response already,
    529 				 * boost reference count.  Park will get
    530 				 * nuked once the response arrives from
    531 				 * the file server.
    532 				 */
    533 				if (park->park_flags & PARKFLAG_ONQUEUE2)
    534 					puffs_msgpark_reference(park);
    535 
    536 				mutex_exit(&pmp->pmp_lock);
    537 
    538 				rv = error;
    539 			}
    540 		} else {
    541 			rv = preq->preq_rv;
    542 		}
    543 
    544 		/*
    545 		 * retake the lock and release.  This makes sure (haha,
    546 		 * I'm humorous) that we don't process the same vnode in
    547 		 * multiple threads due to the locks hacks we have in
    548 		 * puffs_lock().  In reality this is well protected by
    549 		 * the biglock, but once that's gone, well, hopefully
    550 		 * this will be fixed for real.  (and when you read this
    551 		 * comment in 2017 and subsequently barf, my condolences ;).
    552 		 */
    553 		if (rv == 0 && !fstrans_is_owner(mp)) {
    554 			fstrans_start(mp, FSTRANS_NORMAL);
    555 			fstrans_done(mp);
    556 		}
    557 
    558 	} else {
    559 		/*
    560 		 * Take extra reference for FAF, i.e. don't free us
    561 		 * immediately upon return to the caller, but rather
    562 		 * only when the message has been transported.
    563 		 */
    564 		puffs_msgpark_reference(park);
    565 	}
    566 
    567 	mutex_exit(&park->park_mtx);
    568 
    569 	mutex_enter(&pmp->pmp_lock);
    570 	puffs_mp_release(pmp);
    571 	mutex_exit(&pmp->pmp_lock);
    572 
    573 	return rv;
    574 }
    575 
    576 /*
    577  * Get next request in the outgoing queue.  "maxsize" controls the
    578  * size the caller can accommodate and "nonblock" signals if this
    579  * should block while waiting for input.  Handles all locking internally.
    580  */
    581 int
    582 puffs_msgif_getout(void *this, size_t maxsize, int nonblock,
    583 	uint8_t **data, size_t *dlen, void **parkptr)
    584 {
    585 	struct puffs_mount *pmp = this;
    586 	struct puffs_msgpark *park;
    587 	struct puffs_req *preq;
    588 	int error;
    589 
    590 	error = 0;
    591 	mutex_enter(&pmp->pmp_lock);
    592 	puffs_mp_reference(pmp);
    593 	for (;;) {
    594 		/* RIP? */
    595 		if (pmp->pmp_status != PUFFSTAT_RUNNING) {
    596 			error = ENXIO;
    597 			break;
    598 		}
    599 
    600 		/* need platinum yendorian express card? */
    601 		if (TAILQ_EMPTY(&pmp->pmp_msg_touser)) {
    602 			DPRINTF(("puffs_getout: no outgoing op, "));
    603 			if (nonblock) {
    604 				DPRINTF(("returning EWOULDBLOCK\n"));
    605 				error = EWOULDBLOCK;
    606 				break;
    607 			}
    608 			DPRINTF(("waiting ...\n"));
    609 
    610 			error = cv_wait_sig(&pmp->pmp_msg_waiter_cv,
    611 			    &pmp->pmp_lock);
    612 			if (error)
    613 				break;
    614 			else
    615 				continue;
    616 		}
    617 
    618 		park = TAILQ_FIRST(&pmp->pmp_msg_touser);
    619 		if (park == NULL)
    620 			continue;
    621 
    622 		mutex_enter(&park->park_mtx);
    623 		puffs_msgpark_reference(park);
    624 
    625 		DPRINTF(("puffs_getout: found park at %p, ", park));
    626 
    627 		/* If it's a goner, don't process any furher */
    628 		if (park->park_flags & PARKFLAG_WAITERGONE) {
    629 			DPRINTF(("waitergone!\n"));
    630 			puffs_msgpark_release(park);
    631 			continue;
    632 		}
    633 		preq = park->park_preq;
    634 
    635 #if 0
    636 		/* check size */
    637 		/*
    638 		 * XXX: this check is not valid for now, we don't know
    639 		 * the size of the caller's input buffer.  i.e. this
    640 		 * will most likely go away
    641 		 */
    642 		if (maxsize < preq->preq_frhdr.pfr_len) {
    643 			DPRINTF(("buffer too small\n"));
    644 			puffs_msgpark_release(park);
    645 			error = E2BIG;
    646 			break;
    647 		}
    648 #endif
    649 
    650 		DPRINTF(("returning\n"));
    651 
    652 		/*
    653 		 * Ok, we found what we came for.  Release it from the
    654 		 * outgoing queue but do not unlock.  We will unlock
    655 		 * only after we "releaseout" it to avoid complications:
    656 		 * otherwise it is (theoretically) possible for userland
    657 		 * to race us into "put" before we have a change to put
    658 		 * this baby on the receiving queue.
    659 		 */
    660 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
    661 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
    662 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
    663 		mutex_exit(&park->park_mtx);
    664 
    665 		pmp->pmp_msg_touser_count--;
    666 		KASSERT(pmp->pmp_msg_touser_count >= 0);
    667 
    668 		break;
    669 	}
    670 	puffs_mp_release(pmp);
    671 	mutex_exit(&pmp->pmp_lock);
    672 
    673 	if (error == 0) {
    674 		*data = (uint8_t *)preq;
    675 		preq->preq_pth.pth_framelen = park->park_copylen;
    676 		*dlen = preq->preq_pth.pth_framelen;
    677 		*parkptr = park;
    678 	}
    679 
    680 	return error;
    681 }
    682 
    683 /*
    684  * Release outgoing structure.  Now, depending on the success of the
    685  * outgoing send, it is either going onto the result waiting queue
    686  * or the death chamber.
    687  */
    688 void
    689 puffs_msgif_releaseout(void *this, void *parkptr, int status)
    690 {
    691 	struct puffs_mount *pmp = this;
    692 	struct puffs_msgpark *park = parkptr;
    693 
    694 	DPRINTF(("puffs_releaseout: returning park %p, errno %d: " ,
    695 	    park, status));
    696 	mutex_enter(&pmp->pmp_lock);
    697 	mutex_enter(&park->park_mtx);
    698 	if (park->park_flags & PARKFLAG_WANTREPLY) {
    699 		if (status == 0) {
    700 			DPRINTF(("enqueue replywait\n"));
    701 			TAILQ_INSERT_TAIL(&pmp->pmp_msg_replywait, park,
    702 			    park_entries);
    703 			park->park_flags |= PARKFLAG_ONQUEUE2;
    704 		} else {
    705 			DPRINTF(("error path!\n"));
    706 			park->park_preq->preq_rv = status;
    707 			park->park_flags |= PARKFLAG_DONE;
    708 			cv_signal(&park->park_cv);
    709 		}
    710 		puffs_msgpark_release(park);
    711 	} else {
    712 		DPRINTF(("release\n"));
    713 		puffs_msgpark_release1(park, 2);
    714 	}
    715 	mutex_exit(&pmp->pmp_lock);
    716 }
    717 
    718 size_t
    719 puffs_msgif_waitcount(void *this)
    720 {
    721 	struct puffs_mount *pmp = this;
    722 	size_t rv;
    723 
    724 	mutex_enter(&pmp->pmp_lock);
    725 	rv = pmp->pmp_msg_touser_count;
    726 	mutex_exit(&pmp->pmp_lock);
    727 
    728 	return rv;
    729 }
    730 
    731 /*
    732  * XXX: locking with this one?
    733  */
    734 static void
    735 puffs_msgif_incoming(void *this, struct puffs_req *preq)
    736 {
    737 	struct puffs_mount *pmp = this;
    738 	struct putter_hdr *pth = &preq->preq_pth;
    739 	struct puffs_msgpark *park;
    740 	int release, wgone;
    741 
    742 	/* XXX */
    743 	if (PUFFSOP_OPCLASS(preq->preq_opclass) != PUFFSOP_VN
    744 	    && PUFFSOP_OPCLASS(preq->preq_opclass) != PUFFSOP_VFS)
    745 		return;
    746 
    747 	mutex_enter(&pmp->pmp_lock);
    748 
    749 	/* Locate waiter */
    750 	TAILQ_FOREACH(park, &pmp->pmp_msg_replywait, park_entries) {
    751 		if (park->park_preq->preq_id == preq->preq_id)
    752 			break;
    753 	}
    754 	if (park == NULL) {
    755 		DPRINTF(("puffs_msgif_income: no request: %" PRIu64 "\n",
    756 		    preq->preq_id));
    757 		mutex_exit(&pmp->pmp_lock);
    758 		return; /* XXX send error */
    759 	}
    760 
    761 	mutex_enter(&park->park_mtx);
    762 	puffs_msgpark_reference(park);
    763 	if (pth->pth_framelen > park->park_maxlen) {
    764 		DPRINTF(("puffs_msgif_income: invalid buffer length: "
    765 		    "%" PRIu64 " (req %" PRIu64 ", \n", pth->pth_framelen,
    766 		    preq->preq_id));
    767 		park->park_preq->preq_rv = EPROTO;
    768 		cv_signal(&park->park_cv);
    769 		puffs_msgpark_release(park);
    770 		mutex_exit(&pmp->pmp_lock);
    771 		return; /* XXX: error */
    772 	}
    773 	wgone = park->park_flags & PARKFLAG_WAITERGONE;
    774 
    775 	KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
    776 	TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
    777 	park->park_flags &= ~PARKFLAG_ONQUEUE2;
    778 	mutex_exit(&pmp->pmp_lock);
    779 
    780 	if (wgone) {
    781 		DPRINTF(("puffs_putop: bad service - waiter gone for "
    782 		    "park %p\n", park));
    783 		release = 2;
    784 	} else {
    785 		if (park->park_flags & PARKFLAG_CALL) {
    786 			DPRINTF(("puffs_msgif_income: call for %p, arg %p\n",
    787 			    park->park_preq, park->park_donearg));
    788 			park->park_done(pmp, preq, park->park_donearg);
    789 			release = 2;
    790 		} else {
    791 			/* XXX: yes, I know */
    792 			memcpy(park->park_preq, preq, pth->pth_framelen);
    793 			release = 1;
    794 		}
    795 	}
    796 
    797 	if (!wgone) {
    798 		DPRINTF(("puffs_putop: flagging done for "
    799 		    "park %p\n", park));
    800 		cv_signal(&park->park_cv);
    801 	}
    802 
    803 	park->park_flags |= PARKFLAG_DONE;
    804 	puffs_msgpark_release1(park, release);
    805 }
    806 
    807 /*
    808  * helpers
    809  */
    810 static void
    811 dosuspendresume(void *arg)
    812 {
    813 	struct puffs_mount *pmp = arg;
    814 	struct mount *mp;
    815 	int rv;
    816 
    817 	mp = PMPTOMP(pmp);
    818 	/*
    819 	 * XXX?  does this really do any good or is it just
    820 	 * paranoid stupidity?  or stupid paranoia?
    821 	 */
    822 	if (mp->mnt_iflag & IMNT_UNMOUNT) {
    823 		printf("puffs dosuspendresume(): detected suspend on "
    824 		    "unmounting fs\n");
    825 		goto out;
    826 	}
    827 
    828 	/* Do the dance.  Allow only one concurrent suspend */
    829 	rv = vfs_suspend(PMPTOMP(pmp), 1);
    830 	if (rv == 0)
    831 		vfs_resume(PMPTOMP(pmp));
    832 
    833  out:
    834 	mutex_enter(&pmp->pmp_lock);
    835 	KASSERT(pmp->pmp_suspend == 1);
    836 	pmp->pmp_suspend = 0;
    837 	puffs_mp_release(pmp);
    838 	mutex_exit(&pmp->pmp_lock);
    839 
    840 	kthread_exit(0);
    841 }
    842 
    843 static void
    844 puffsop_suspend(struct puffs_mount *pmp)
    845 {
    846 	int rv = 0;
    847 
    848 	mutex_enter(&pmp->pmp_lock);
    849 	if (pmp->pmp_suspend || pmp->pmp_status != PUFFSTAT_RUNNING) {
    850 		rv = EBUSY;
    851 	} else {
    852 		puffs_mp_reference(pmp);
    853 		pmp->pmp_suspend = 1;
    854 	}
    855 	mutex_exit(&pmp->pmp_lock);
    856 	if (rv)
    857 		return;
    858 	rv = kthread_create(PRI_NONE, 0, NULL, dosuspendresume,
    859 	    pmp, NULL, "puffsusp");
    860 
    861 	/* XXX: "return" rv */
    862 }
    863 
    864 static int
    865 puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
    866 {
    867 	struct vnode *vp;
    868 	voff_t offlo, offhi;
    869 	int rv, flags = 0;
    870 
    871 	/* XXX: slurry */
    872 	if (pf->pf_op == PUFFS_INVAL_NAMECACHE_ALL) {
    873 		cache_purgevfs(PMPTOMP(pmp));
    874 		return 0;
    875 	}
    876 
    877 	/*
    878 	 * Get vnode, don't lock it.  Namecache is protected by its own lock
    879 	 * and we have a reference to protect against premature harvesting.
    880 	 *
    881 	 * The node we want here might be locked and the op is in
    882 	 * userspace waiting for us to complete ==> deadlock.  Another
    883 	 * reason we need to eventually bump locking to userspace, as we
    884 	 * will need to lock the node if we wish to do flushes.
    885 	 */
    886 	rv = puffs_cookie2vnode(pmp, pf->pf_cookie, 0, 0, &vp);
    887 	if (rv) {
    888 		if (rv == PUFFS_NOSUCHCOOKIE)
    889 			return ENOENT;
    890 		return rv;
    891 	}
    892 
    893 	switch (pf->pf_op) {
    894 #if 0
    895 	/* not quite ready, yet */
    896 	case PUFFS_INVAL_NAMECACHE_NODE:
    897 	struct componentname *pf_cn;
    898 	char *name;
    899 		/* get comfortab^Wcomponentname */
    900 		MALLOC(pf_cn, struct componentname *,
    901 		    sizeof(struct componentname), M_PUFFS, M_WAITOK | M_ZERO);
    902 		memset(pf_cn, 0, sizeof(struct componentname));
    903 		break;
    904 
    905 #endif
    906 	case PUFFS_INVAL_NAMECACHE_DIR:
    907 		if (vp->v_type != VDIR) {
    908 			rv = EINVAL;
    909 			break;
    910 		}
    911 		cache_purge1(vp, NULL, PURGE_CHILDREN);
    912 		break;
    913 
    914 	case PUFFS_INVAL_PAGECACHE_NODE_RANGE:
    915 		flags = PGO_FREE;
    916 		/*FALLTHROUGH*/
    917 	case PUFFS_FLUSH_PAGECACHE_NODE_RANGE:
    918 		if (flags == 0)
    919 			flags = PGO_CLEANIT;
    920 
    921 		if (pf->pf_end > vp->v_size || vp->v_type != VREG) {
    922 			rv = EINVAL;
    923 			break;
    924 		}
    925 
    926 		offlo = trunc_page(pf->pf_start);
    927 		offhi = round_page(pf->pf_end);
    928 		if (offhi != 0 && offlo >= offhi) {
    929 			rv = EINVAL;
    930 			break;
    931 		}
    932 
    933 		simple_lock(&vp->v_uobj.vmobjlock);
    934 		rv = VOP_PUTPAGES(vp, offlo, offhi, flags);
    935 		break;
    936 
    937 	default:
    938 		rv = EINVAL;
    939 	}
    940 
    941 	vrele(vp);
    942 
    943 	return rv;
    944 }
    945 
    946 int
    947 puffs_msgif_dispatch(void *this, uint8_t *buf)
    948 {
    949 	struct puffs_mount *pmp = this;
    950 	struct puffs_req *preq = (struct puffs_req *)buf;
    951 
    952 	switch (PUFFSOP_OPCLASS(preq->preq_opclass)) {
    953 	case PUFFSOP_VN:
    954 	case PUFFSOP_VFS:
    955 		puffs_msgif_incoming(pmp, preq);
    956 		break;
    957 	case PUFFSOP_FLUSH:
    958 		puffsop_flush(pmp, (void *)buf);
    959 		break;
    960 	case PUFFSOP_SUSPEND:
    961 		puffsop_suspend(pmp);
    962 		break;
    963 	default:
    964 		/* XXX: send error */
    965 		break;
    966 	}
    967 
    968 	return 0;
    969 }
    970 
    971 int
    972 puffs_msgif_close(void *this)
    973 {
    974 	struct puffs_mount *pmp = this;
    975 	struct mount *mp = PMPTOMP(pmp);
    976 	int gone, rv;
    977 
    978 	mutex_enter(&pmp->pmp_lock);
    979 	puffs_mp_reference(pmp);
    980 
    981 	/*
    982 	 * Free the waiting callers before proceeding any further.
    983 	 * The syncer might be jogging around in this file system
    984 	 * currently.  If we allow it to go to the userspace of no
    985 	 * return while trying to get the syncer lock, well ...
    986 	 * synclk: I feel happy, I feel fine.
    987 	 * lockmgr: You're not fooling anyone, you know.
    988 	 */
    989 	puffs_userdead(pmp);
    990 
    991 	/*
    992 	 * Make sure someone from puffs_unmount() isn't currently in
    993 	 * userspace.  If we don't take this precautionary step,
    994 	 * they might notice that the mountpoint has disappeared
    995 	 * from under them once they return.  Especially note that we
    996 	 * cannot simply test for an unmounter before calling
    997 	 * dounmount(), since it might be possible that that particular
    998 	 * invocation of unmount was called without MNT_FORCE.  Here we
    999 	 * *must* make sure unmount succeeds.  Also, restart is necessary
   1000 	 * since pmp isn't locked.  We might end up with PUTTER_DEAD after
   1001 	 * restart and exit from there.
   1002 	 */
   1003 	if (pmp->pmp_unmounting) {
   1004 		cv_wait(&pmp->pmp_unmounting_cv, &pmp->pmp_lock);
   1005 		puffs_mp_release(pmp);
   1006 		mutex_exit(&pmp->pmp_lock);
   1007 		DPRINTF(("puffs_fop_close: unmount was in progress for pmp %p, "
   1008 		    "restart\n", pmp));
   1009 		return ERESTART;
   1010 	}
   1011 
   1012 	/* Won't access pmp from here anymore */
   1013 	puffs_mp_release(pmp);
   1014 	mutex_exit(&pmp->pmp_lock);
   1015 
   1016 	/*
   1017 	 * Detach from VFS.  First do necessary XXX-dance (from
   1018 	 * sys_unmount() & other callers of dounmount()
   1019 	 *
   1020 	 * XXX Freeze syncer.  Must do this before locking the
   1021 	 * mount point.  See dounmount() for details.
   1022 	 *
   1023 	 * XXX2: take a reference to the mountpoint before starting to
   1024 	 * wait for syncer_mutex.  Otherwise the mointpoint can be
   1025 	 * wiped out while we wait.
   1026 	 */
   1027 	simple_lock(&mp->mnt_slock);
   1028 	mp->mnt_wcnt++;
   1029 	simple_unlock(&mp->mnt_slock);
   1030 
   1031 	mutex_enter(&syncer_mutex);
   1032 
   1033 	simple_lock(&mp->mnt_slock);
   1034 	mp->mnt_wcnt--;
   1035 	if (mp->mnt_wcnt == 0)
   1036 		wakeup(&mp->mnt_wcnt);
   1037 	gone = mp->mnt_iflag & IMNT_GONE;
   1038 	simple_unlock(&mp->mnt_slock);
   1039 	if (gone) {
   1040 		mutex_exit(&syncer_mutex);
   1041 		return 0;
   1042 	}
   1043 
   1044 	/*
   1045 	 * microscopic race condition here (although not with the current
   1046 	 * kernel), but can't really fix it without starting a crusade
   1047 	 * against vfs_busy(), so let it be, let it be, let it be
   1048 	 */
   1049 
   1050 	/*
   1051 	 * The only way vfs_busy() will fail for us is if the filesystem
   1052 	 * is already a goner.
   1053 	 * XXX: skating on the thin ice of modern calling conventions ...
   1054 	 */
   1055 	if (vfs_busy(mp, 0, 0)) {
   1056 		mutex_exit(&syncer_mutex);
   1057 		return 0;
   1058 	}
   1059 
   1060 	/*
   1061 	 * Once we have the mount point, unmount() can't interfere..
   1062 	 * or at least in theory it shouldn't.  dounmount() reentracy
   1063 	 * might require some visiting at some point.
   1064 	 */
   1065 	rv = dounmount(mp, MNT_FORCE, curlwp);
   1066 	KASSERT(rv == 0);
   1067 
   1068 	return 0;
   1069 }
   1070 
   1071 /*
   1072  * We're dead, kaput, RIP, slightly more than merely pining for the
   1073  * fjords, belly-up, fallen, lifeless, finished, expired, gone to meet
   1074  * our maker, ceased to be, etcetc.  YASD.  It's a dead FS!
   1075  *
   1076  * Caller must hold puffs mutex.
   1077  */
   1078 void
   1079 puffs_userdead(struct puffs_mount *pmp)
   1080 {
   1081 	struct puffs_msgpark *park, *park_next;
   1082 
   1083 	/*
   1084 	 * Mark filesystem status as dying so that operations don't
   1085 	 * attempt to march to userspace any longer.
   1086 	 */
   1087 	pmp->pmp_status = PUFFSTAT_DYING;
   1088 
   1089 	/* signal waiters on REQUEST TO file server queue */
   1090 	for (park = TAILQ_FIRST(&pmp->pmp_msg_touser); park; park = park_next) {
   1091 		uint8_t opclass;
   1092 
   1093 		mutex_enter(&park->park_mtx);
   1094 		puffs_msgpark_reference(park);
   1095 		park_next = TAILQ_NEXT(park, park_entries);
   1096 
   1097 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
   1098 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
   1099 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
   1100 		pmp->pmp_msg_touser_count--;
   1101 
   1102 		/*
   1103 		 * Even though waiters on QUEUE1 are removed in touser()
   1104 		 * in case of WAITERGONE, it is still possible for us to
   1105 		 * get raced here due to having to retake locks in said
   1106 		 * touser().  In the race case simply "ignore" the item
   1107 		 * on the queue and move on to the next one.
   1108 		 */
   1109 		if (park->park_flags & PARKFLAG_WAITERGONE) {
   1110 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
   1111 			KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
   1112 			puffs_msgpark_release(park);
   1113 
   1114 		} else {
   1115 			opclass = park->park_preq->preq_opclass;
   1116 			park->park_preq->preq_rv = ENXIO;
   1117 
   1118 			if (park->park_flags & PARKFLAG_CALL) {
   1119 				park->park_done(pmp, park->park_preq,
   1120 				    park->park_donearg);
   1121 				puffs_msgpark_release1(park, 2);
   1122 			} else if ((park->park_flags & PARKFLAG_WANTREPLY)==0) {
   1123 				puffs_msgpark_release1(park, 2);
   1124 			} else {
   1125 				park->park_preq->preq_rv = ENXIO;
   1126 				cv_signal(&park->park_cv);
   1127 				puffs_msgpark_release(park);
   1128 			}
   1129 		}
   1130 	}
   1131 
   1132 	/* signal waiters on RESPONSE FROM file server queue */
   1133 	for (park=TAILQ_FIRST(&pmp->pmp_msg_replywait); park; park=park_next) {
   1134 		mutex_enter(&park->park_mtx);
   1135 		puffs_msgpark_reference(park);
   1136 		park_next = TAILQ_NEXT(park, park_entries);
   1137 
   1138 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
   1139 		KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
   1140 
   1141 		TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
   1142 		park->park_flags &= ~PARKFLAG_ONQUEUE2;
   1143 
   1144 		if (park->park_flags & PARKFLAG_WAITERGONE) {
   1145 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
   1146 			puffs_msgpark_release(park);
   1147 		} else {
   1148 			park->park_preq->preq_rv = ENXIO;
   1149 			if (park->park_flags & PARKFLAG_CALL) {
   1150 				park->park_done(pmp, park->park_preq,
   1151 				    park->park_donearg);
   1152 				puffs_msgpark_release1(park, 2);
   1153 			} else {
   1154 				cv_signal(&park->park_cv);
   1155 				puffs_msgpark_release(park);
   1156 			}
   1157 		}
   1158 	}
   1159 
   1160 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
   1161 }
   1162