Home | History | Annotate | Line # | Download | only in librumpuser
rumpuser_sp.c revision 1.27
      1 /*      $NetBSD: rumpuser_sp.c,v 1.27 2010/12/16 17:05:44 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Sysproxy routines.  This provides system RPC support over host sockets.
     30  * The most notable limitation is that the client and server must share
     31  * the same ABI.  This does not mean that they have to be the same
     32  * machine or that they need to run the same version of the host OS,
     33  * just that they must agree on the data structures.  This even *might*
     34  * work correctly from one hardware architecture to another.
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.27 2010/12/16 17:05:44 pooka Exp $");
     39 
     40 #include <sys/types.h>
     41 #include <sys/atomic.h>
     42 #include <sys/mman.h>
     43 #include <sys/socket.h>
     44 
     45 #include <arpa/inet.h>
     46 #include <netinet/in.h>
     47 #include <netinet/tcp.h>
     48 
     49 #include <assert.h>
     50 #include <errno.h>
     51 #include <fcntl.h>
     52 #include <poll.h>
     53 #include <pthread.h>
     54 #include <stdarg.h>
     55 #include <stdio.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include <unistd.h>
     59 
     60 #include <rump/rumpuser.h>
     61 #include "rumpuser_int.h"
     62 
     63 #include "sp_common.c"
     64 
     65 #ifndef MAXCLI
     66 #define MAXCLI 256
     67 #endif
     68 #ifndef MAXWORKER
     69 #define MAXWORKER 128
     70 #endif
     71 #ifndef IDLEWORKER
     72 #define IDLEWORKER 16
     73 #endif
     74 int rumpsp_maxworker = MAXWORKER;
     75 int rumpsp_idleworker = IDLEWORKER;
     76 
     77 static struct pollfd pfdlist[MAXCLI];
     78 static struct spclient spclist[MAXCLI];
     79 static unsigned int disco;
     80 static volatile int spfini;
     81 
     82 static struct rumpuser_sp_ops spops;
     83 
     84 static char banner[MAXBANNER];
     85 
     86 #define PROTOMAJOR 0
     87 #define PROTOMINOR 0
     88 
     89 /*
     90  * Manual wrappers, since librump does not have access to the
     91  * user namespace wrapped interfaces.
     92  */
     93 
     94 static void
     95 lwproc_switch(struct lwp *l)
     96 {
     97 
     98 	spops.spop_schedule();
     99 	spops.spop_lwproc_switch(l);
    100 	spops.spop_unschedule();
    101 }
    102 
    103 static void
    104 lwproc_release(void)
    105 {
    106 
    107 	spops.spop_schedule();
    108 	spops.spop_lwproc_release();
    109 	spops.spop_unschedule();
    110 }
    111 
    112 static int
    113 lwproc_newproc(struct spclient *spc)
    114 {
    115 	int rv;
    116 
    117 	spops.spop_schedule();
    118 	rv = spops.spop_lwproc_newproc(spc);
    119 	spops.spop_unschedule();
    120 
    121 	return rv;
    122 }
    123 
    124 static int
    125 lwproc_newlwp(pid_t pid)
    126 {
    127 	int rv;
    128 
    129 	spops.spop_schedule();
    130 	rv = spops.spop_lwproc_newlwp(pid);
    131 	spops.spop_unschedule();
    132 
    133 	return rv;
    134 }
    135 
    136 static struct lwp *
    137 lwproc_curlwp(void)
    138 {
    139 	struct lwp *l;
    140 
    141 	spops.spop_schedule();
    142 	l = spops.spop_lwproc_curlwp();
    143 	spops.spop_unschedule();
    144 
    145 	return l;
    146 }
    147 
    148 static pid_t
    149 lwproc_getpid(void)
    150 {
    151 	pid_t p;
    152 
    153 	spops.spop_schedule();
    154 	p = spops.spop_getpid();
    155 	spops.spop_unschedule();
    156 
    157 	return p;
    158 }
    159 
    160 static int
    161 rumpsyscall(int sysnum, void *data, register_t *retval)
    162 {
    163 	int rv;
    164 
    165 	spops.spop_schedule();
    166 	rv = spops.spop_syscall(sysnum, data, retval);
    167 	spops.spop_unschedule();
    168 
    169 	return rv;
    170 }
    171 
    172 static uint64_t
    173 nextreq(struct spclient *spc)
    174 {
    175 	uint64_t nw;
    176 
    177 	pthread_mutex_lock(&spc->spc_mtx);
    178 	nw = spc->spc_nextreq++;
    179 	pthread_mutex_unlock(&spc->spc_mtx);
    180 
    181 	return nw;
    182 }
    183 
    184 static void
    185 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
    186 {
    187 	struct rsp_hdr rhdr;
    188 
    189 	rhdr.rsp_len = sizeof(rhdr);
    190 	rhdr.rsp_reqno = reqno;
    191 	rhdr.rsp_class = RUMPSP_ERROR;
    192 	rhdr.rsp_type = 0;
    193 	rhdr.rsp_error = error;
    194 
    195 	sendlock(spc);
    196 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    197 	sendunlock(spc);
    198 }
    199 
    200 static int
    201 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
    202 {
    203 	struct rsp_hdr rhdr;
    204 	int rv;
    205 
    206 	rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
    207 	rhdr.rsp_reqno = reqno;
    208 	rhdr.rsp_class = RUMPSP_RESP;
    209 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    210 	rhdr.rsp_error = 0;
    211 
    212 	sendlock(spc);
    213 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    214 	rv = dosend(spc, &error, sizeof(error));
    215 	sendunlock(spc);
    216 
    217 	return rv;
    218 }
    219 
    220 static int
    221 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
    222 	register_t *retval)
    223 {
    224 	struct rsp_hdr rhdr;
    225 	struct rsp_sysresp sysresp;
    226 	int rv;
    227 
    228 	rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
    229 	rhdr.rsp_reqno = reqno;
    230 	rhdr.rsp_class = RUMPSP_RESP;
    231 	rhdr.rsp_type = RUMPSP_SYSCALL;
    232 	rhdr.rsp_sysnum = 0;
    233 
    234 	sysresp.rsys_error = error;
    235 	memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
    236 
    237 	sendlock(spc);
    238 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    239 	rv = dosend(spc, &sysresp, sizeof(sysresp));
    240 	sendunlock(spc);
    241 
    242 	return rv;
    243 }
    244 
    245 static int
    246 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
    247 	int wantstr, void **resp)
    248 {
    249 	struct rsp_hdr rhdr;
    250 	struct rsp_copydata copydata;
    251 	struct respwait rw;
    252 	int rv;
    253 
    254 	DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
    255 
    256 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
    257 	rhdr.rsp_class = RUMPSP_REQ;
    258 	if (wantstr)
    259 		rhdr.rsp_type = RUMPSP_COPYINSTR;
    260 	else
    261 		rhdr.rsp_type = RUMPSP_COPYIN;
    262 	rhdr.rsp_sysnum = 0;
    263 
    264 	copydata.rcp_addr = __UNCONST(remaddr);
    265 	copydata.rcp_len = *dlen;
    266 
    267 	putwait(spc, &rw, &rhdr);
    268 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    269 	rv = dosend(spc, &copydata, sizeof(copydata));
    270 	if (rv) {
    271 		unputwait(spc, &rw);
    272 		return rv;
    273 	}
    274 
    275 	rv = waitresp(spc, &rw);
    276 
    277 	DPRINTF(("copyin: response %d\n", rv));
    278 
    279 	*resp = rw.rw_data;
    280 	if (wantstr)
    281 		*dlen = rw.rw_dlen;
    282 
    283 	return rv;
    284 
    285 }
    286 
    287 static int
    288 send_copyout_req(struct spclient *spc, const void *remaddr,
    289 	const void *data, size_t dlen)
    290 {
    291 	struct rsp_hdr rhdr;
    292 	struct rsp_copydata copydata;
    293 	int rv;
    294 
    295 	DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
    296 
    297 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
    298 	rhdr.rsp_reqno = nextreq(spc);
    299 	rhdr.rsp_class = RUMPSP_REQ;
    300 	rhdr.rsp_type = RUMPSP_COPYOUT;
    301 	rhdr.rsp_sysnum = 0;
    302 
    303 	copydata.rcp_addr = __UNCONST(remaddr);
    304 	copydata.rcp_len = dlen;
    305 
    306 	sendlock(spc);
    307 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    308 	rv = dosend(spc, &copydata, sizeof(copydata));
    309 	rv = dosend(spc, data, dlen);
    310 	sendunlock(spc);
    311 
    312 	return rv;
    313 }
    314 
    315 static int
    316 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
    317 {
    318 	struct rsp_hdr rhdr;
    319 	struct respwait rw;
    320 	int rv;
    321 
    322 	DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
    323 
    324 	rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
    325 	rhdr.rsp_class = RUMPSP_REQ;
    326 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    327 	rhdr.rsp_sysnum = 0;
    328 
    329 	putwait(spc, &rw, &rhdr);
    330 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    331 	rv = dosend(spc, &howmuch, sizeof(howmuch));
    332 	if (rv) {
    333 		unputwait(spc, &rw);
    334 		return rv;
    335 	}
    336 
    337 	rv = waitresp(spc, &rw);
    338 
    339 	*resp = rw.rw_data;
    340 
    341 	DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
    342 
    343 	return rv;
    344 }
    345 
    346 static void
    347 spcref(struct spclient *spc)
    348 {
    349 
    350 	pthread_mutex_lock(&spc->spc_mtx);
    351 	spc->spc_refcnt++;
    352 	pthread_mutex_unlock(&spc->spc_mtx);
    353 }
    354 
    355 static void
    356 spcrelease(struct spclient *spc)
    357 {
    358 	int ref;
    359 
    360 	pthread_mutex_lock(&spc->spc_mtx);
    361 	ref = --spc->spc_refcnt;
    362 	pthread_mutex_unlock(&spc->spc_mtx);
    363 
    364 	if (ref > 0)
    365 		return;
    366 
    367 	DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
    368 
    369 	_DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
    370 	_DIAGASSERT(spc->spc_buf == NULL);
    371 
    372 	lwproc_switch(spc->spc_mainlwp);
    373 	lwproc_release();
    374 	spc->spc_mainlwp = NULL;
    375 
    376 	close(spc->spc_fd);
    377 	spc->spc_fd = -1;
    378 	spc->spc_state = SPCSTATE_NEW;
    379 
    380 	atomic_inc_uint(&disco);
    381 }
    382 
    383 static void
    384 serv_handledisco(unsigned int idx)
    385 {
    386 	struct spclient *spc = &spclist[idx];
    387 
    388 	DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
    389 
    390 	pfdlist[idx].fd = -1;
    391 	pfdlist[idx].revents = 0;
    392 	pthread_mutex_lock(&spc->spc_mtx);
    393 	spc->spc_state = SPCSTATE_DYING;
    394 	kickall(spc);
    395 	pthread_mutex_unlock(&spc->spc_mtx);
    396 
    397 	/*
    398 	 * Nobody's going to attempt to send/receive anymore,
    399 	 * so reinit info relevant to that.
    400 	 */
    401 	/*LINTED:pointer casts may be ok*/
    402 	memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
    403 
    404 	spcrelease(spc);
    405 }
    406 
    407 static void
    408 serv_shutdown(void)
    409 {
    410 	struct spclient *spc;
    411 	unsigned int i;
    412 
    413 	for (i = 1; i < MAXCLI; i++) {
    414 		spc = &spclist[i];
    415 		if (spc->spc_fd == -1)
    416 			continue;
    417 
    418 		shutdown(spc->spc_fd, SHUT_RDWR);
    419 		serv_handledisco(i);
    420 
    421 		spcrelease(spc);
    422 	}
    423 }
    424 
    425 static unsigned
    426 serv_handleconn(int fd, connecthook_fn connhook, int busy)
    427 {
    428 	struct sockaddr_storage ss;
    429 	socklen_t sl = sizeof(ss);
    430 	int newfd, flags;
    431 	unsigned i;
    432 
    433 	/*LINTED: cast ok */
    434 	newfd = accept(fd, (struct sockaddr *)&ss, &sl);
    435 	if (newfd == -1)
    436 		return 0;
    437 
    438 	if (busy) {
    439 		close(newfd); /* EBUSY */
    440 		return 0;
    441 	}
    442 
    443 	flags = fcntl(newfd, F_GETFL, 0);
    444 	if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
    445 		close(newfd);
    446 		return 0;
    447 	}
    448 
    449 	if (connhook(newfd) != 0) {
    450 		close(newfd);
    451 		return 0;
    452 	}
    453 
    454 	/* write out a banner for the client */
    455 	if (write(newfd, banner, strlen(banner)) != (ssize_t)strlen(banner)) {
    456 		close(newfd);
    457 		return 0;
    458 	}
    459 
    460 	/* find empty slot the simple way */
    461 	for (i = 0; i < MAXCLI; i++) {
    462 		if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
    463 			break;
    464 	}
    465 
    466 	if (lwproc_newproc(&spclist[i]) != 0) {
    467 		close(newfd);
    468 		return 0;
    469 	}
    470 
    471 	assert(i < MAXCLI);
    472 
    473 	pfdlist[i].fd = newfd;
    474 	spclist[i].spc_fd = newfd;
    475 	spclist[i].spc_mainlwp = lwproc_curlwp();
    476 	spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
    477 	spclist[i].spc_pid = lwproc_getpid();
    478 	spclist[i].spc_refcnt = 1;
    479 
    480 	TAILQ_INIT(&spclist[i].spc_respwait);
    481 
    482 	DPRINTF(("rump_sp: added new connection fd %d at idx %u, pid %d\n",
    483 	    newfd, i, lwproc_getpid()));
    484 
    485 	lwproc_switch(NULL);
    486 
    487 	return i;
    488 }
    489 
    490 static void
    491 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
    492 {
    493 	register_t retval[2] = {0, 0};
    494 	int rv, sysnum;
    495 
    496 	sysnum = (int)rhdr->rsp_sysnum;
    497 	DPRINTF(("rump_sp: handling syscall %d from client %d\n",
    498 	    sysnum, 0));
    499 
    500 	lwproc_newlwp(spc->spc_pid);
    501 	rv = rumpsyscall(sysnum, data, retval);
    502 	lwproc_release();
    503 
    504 	DPRINTF(("rump_sp: got return value %d & %d/%d\n",
    505 	    rv, retval[0], retval[1]));
    506 
    507 	send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
    508 }
    509 
    510 struct sysbouncearg {
    511 	struct spclient *sba_spc;
    512 	struct rsp_hdr sba_hdr;
    513 	uint8_t *sba_data;
    514 
    515 	TAILQ_ENTRY(sysbouncearg) sba_entries;
    516 };
    517 static pthread_mutex_t sbamtx;
    518 static pthread_cond_t sbacv;
    519 static int nworker, idleworker;
    520 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist);
    521 
    522 /*ARGSUSED*/
    523 static void *
    524 serv_syscallbouncer(void *arg)
    525 {
    526 	struct sysbouncearg *sba;
    527 
    528 	for (;;) {
    529 		pthread_mutex_lock(&sbamtx);
    530 		if (idleworker >= rumpsp_idleworker) {
    531 			nworker--;
    532 			pthread_mutex_unlock(&sbamtx);
    533 			break;
    534 		}
    535 		idleworker++;
    536 		while (TAILQ_EMPTY(&syslist)) {
    537 			pthread_cond_wait(&sbacv, &sbamtx);
    538 		}
    539 
    540 		sba = TAILQ_FIRST(&syslist);
    541 		TAILQ_REMOVE(&syslist, sba, sba_entries);
    542 		idleworker--;
    543 		pthread_mutex_unlock(&sbamtx);
    544 
    545 		serv_handlesyscall(sba->sba_spc,
    546 		    &sba->sba_hdr, sba->sba_data);
    547 		spcrelease(sba->sba_spc);
    548 		free(sba->sba_data);
    549 		free(sba);
    550 	}
    551 
    552 	return NULL;
    553 }
    554 
    555 static int
    556 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
    557 {
    558 	struct spclient *spc = arg;
    559 	void *rdata = NULL; /* XXXuninit */
    560 	int rv, nlocks;
    561 
    562 	rumpuser__kunlock(0, &nlocks, NULL);
    563 
    564 	rv = copyin_req(spc, raddr, len, wantstr, &rdata);
    565 	if (rv)
    566 		goto out;
    567 
    568 	memcpy(laddr, rdata, *len);
    569 	free(rdata);
    570 
    571  out:
    572 	rumpuser__klock(nlocks, NULL);
    573 	if (rv)
    574 		return EFAULT;
    575 	return 0;
    576 }
    577 
    578 int
    579 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
    580 {
    581 
    582 	return sp_copyin(arg, raddr, laddr, &len, 0);
    583 }
    584 
    585 int
    586 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
    587 {
    588 
    589 	return sp_copyin(arg, raddr, laddr, len, 1);
    590 }
    591 
    592 static int
    593 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
    594 {
    595 	struct spclient *spc = arg;
    596 	int nlocks, rv;
    597 
    598 	rumpuser__kunlock(0, &nlocks, NULL);
    599 	rv = send_copyout_req(spc, raddr, laddr, dlen);
    600 	rumpuser__klock(nlocks, NULL);
    601 
    602 	if (rv)
    603 		return EFAULT;
    604 	return 0;
    605 }
    606 
    607 int
    608 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
    609 {
    610 
    611 	return sp_copyout(arg, laddr, raddr, dlen);
    612 }
    613 
    614 int
    615 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
    616 {
    617 
    618 	return sp_copyout(arg, laddr, raddr, *dlen);
    619 }
    620 
    621 int
    622 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
    623 {
    624 	struct spclient *spc = arg;
    625 	void *resp, *rdata;
    626 	int nlocks, rv;
    627 
    628 	rumpuser__kunlock(0, &nlocks, NULL);
    629 
    630 	rv = anonmmap_req(spc, howmuch, &rdata);
    631 	if (rv) {
    632 		rv = EFAULT;
    633 		goto out;
    634 	}
    635 
    636 	resp = *(void **)rdata;
    637 	free(rdata);
    638 
    639 	if (resp == NULL) {
    640 		rv = ENOMEM;
    641 	}
    642 
    643 	*addr = resp;
    644 
    645  out:
    646 	rumpuser__klock(nlocks, NULL);
    647 
    648 	if (rv)
    649 		return rv;
    650 	return 0;
    651 }
    652 
    653 /*
    654  *
    655  * Startup routines and mainloop for server.
    656  *
    657  */
    658 
    659 struct spservarg {
    660 	int sps_sock;
    661 	connecthook_fn sps_connhook;
    662 };
    663 
    664 static pthread_attr_t pattr_detached;
    665 static void
    666 handlereq(struct spclient *spc)
    667 {
    668 	struct sysbouncearg *sba;
    669 	pthread_t pt;
    670 	int retries, rv;
    671 
    672 	if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
    673 		if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
    674 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAUTH);
    675 			spcfreebuf(spc);
    676 			return;
    677 		}
    678 
    679 		rv = send_handshake_resp(spc, spc->spc_hdr.rsp_reqno, 0);
    680 		spcfreebuf(spc);
    681 		if (rv) {
    682 			shutdown(spc->spc_fd, SHUT_RDWR);
    683 			return;
    684 		}
    685 		spc->spc_state = SPCSTATE_RUNNING;
    686 		return;
    687 	}
    688 
    689 	if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
    690 		send_error_resp(spc, spc->spc_hdr.rsp_reqno, EINVAL);
    691 		spcfreebuf(spc);
    692 		return;
    693 	}
    694 
    695 	retries = 0;
    696 	while ((sba = malloc(sizeof(*sba))) == NULL) {
    697 		if (nworker == 0 || retries > 10) {
    698 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAGAIN);
    699 			spcfreebuf(spc);
    700 			return;
    701 		}
    702 		/* slim chance of more memory? */
    703 		usleep(10000);
    704 	}
    705 
    706 	sba->sba_spc = spc;
    707 	sba->sba_hdr = spc->spc_hdr;
    708 	sba->sba_data = spc->spc_buf;
    709 	spcresetbuf(spc);
    710 
    711 	spcref(spc);
    712 
    713 	pthread_mutex_lock(&sbamtx);
    714 	TAILQ_INSERT_TAIL(&syslist, sba, sba_entries);
    715 	if (idleworker > 0) {
    716 		/* do we have a daemon's tool (i.e. idle threads)? */
    717 		pthread_cond_signal(&sbacv);
    718 	} else if (nworker < rumpsp_maxworker) {
    719 		/*
    720 		 * Else, need to create one
    721 		 * (if we can, otherwise just expect another
    722 		 * worker to pick up the syscall)
    723 		 */
    724 		if (pthread_create(&pt, &pattr_detached,
    725 		    serv_syscallbouncer, NULL) == 0)
    726 			nworker++;
    727 	}
    728 	pthread_mutex_unlock(&sbamtx);
    729 }
    730 
    731 static void *
    732 spserver(void *arg)
    733 {
    734 	struct spservarg *sarg = arg;
    735 	struct spclient *spc;
    736 	unsigned idx;
    737 	int seen;
    738 	int rv;
    739 	unsigned int nfds, maxidx;
    740 
    741 	for (idx = 0; idx < MAXCLI; idx++) {
    742 		pfdlist[idx].fd = -1;
    743 		pfdlist[idx].events = POLLIN;
    744 
    745 		spc = &spclist[idx];
    746 		pthread_mutex_init(&spc->spc_mtx, NULL);
    747 		pthread_cond_init(&spc->spc_cv, NULL);
    748 		spc->spc_fd = -1;
    749 	}
    750 	pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
    751 	pfdlist[0].events = POLLIN;
    752 	nfds = 1;
    753 	maxidx = 0;
    754 
    755 	pthread_attr_init(&pattr_detached);
    756 	pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
    757 	/* XXX: doesn't stacksize currently work on NetBSD */
    758 	pthread_attr_setstacksize(&pattr_detached, 32*1024);
    759 
    760 	pthread_mutex_init(&sbamtx, NULL);
    761 	pthread_cond_init(&sbacv, NULL);
    762 
    763 	DPRINTF(("rump_sp: server mainloop\n"));
    764 
    765 	for (;;) {
    766 		int discoed;
    767 
    768 		/* g/c hangarounds (eventually) */
    769 		discoed = atomic_swap_uint(&disco, 0);
    770 		while (discoed--) {
    771 			nfds--;
    772 			idx = maxidx;
    773 			while (idx) {
    774 				if (pfdlist[idx].fd != -1) {
    775 					maxidx = idx;
    776 					break;
    777 				}
    778 				idx--;
    779 			}
    780 			DPRINTF(("rump_sp: set maxidx to [%u]\n",
    781 			    maxidx));
    782 		}
    783 
    784 		DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
    785 		seen = 0;
    786 		rv = poll(pfdlist, maxidx+1, INFTIM);
    787 		assert(maxidx+1 <= MAXCLI);
    788 		assert(rv != 0);
    789 		if (rv == -1) {
    790 			if (errno == EINTR)
    791 				continue;
    792 			fprintf(stderr, "rump_spserver: poll returned %d\n",
    793 			    errno);
    794 			break;
    795 		}
    796 
    797 		for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
    798 			if ((pfdlist[idx].revents & POLLIN) == 0)
    799 				continue;
    800 
    801 			seen++;
    802 			DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
    803 			    idx, seen, rv));
    804 			if (idx > 0) {
    805 				spc = &spclist[idx];
    806 				DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
    807 				switch (readframe(spc)) {
    808 				case 0:
    809 					break;
    810 				case -1:
    811 					serv_handledisco(idx);
    812 					break;
    813 				default:
    814 					switch (spc->spc_hdr.rsp_class) {
    815 					case RUMPSP_RESP:
    816 						kickwaiter(spc);
    817 						break;
    818 					case RUMPSP_REQ:
    819 						handlereq(spc);
    820 						break;
    821 					default:
    822 						send_error_resp(spc,
    823 						    spc->spc_hdr.rsp_reqno,
    824 						    ENOENT);
    825 						spcfreebuf(spc);
    826 						break;
    827 					}
    828 					break;
    829 				}
    830 
    831 			} else {
    832 				DPRINTF(("rump_sp: mainloop new connection\n"));
    833 
    834 				if (__predict_false(spfini)) {
    835 					close(spclist[0].spc_fd);
    836 					serv_shutdown();
    837 					goto out;
    838 				}
    839 
    840 				idx = serv_handleconn(pfdlist[0].fd,
    841 				    sarg->sps_connhook, nfds == MAXCLI);
    842 				if (idx)
    843 					nfds++;
    844 				if (idx > maxidx)
    845 					maxidx = idx;
    846 				DPRINTF(("rump_sp: maxid now %d\n", maxidx));
    847 			}
    848 		}
    849 	}
    850 
    851  out:
    852 	return NULL;
    853 }
    854 
    855 static unsigned cleanupidx;
    856 static struct sockaddr *cleanupsa;
    857 int
    858 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
    859 	const char *ostype, const char *osrelease, const char *machine)
    860 {
    861 	pthread_t pt;
    862 	struct spservarg *sarg;
    863 	struct sockaddr *sap;
    864 	char *p;
    865 	unsigned idx;
    866 	int error, s;
    867 
    868 	p = strdup(url);
    869 	if (p == NULL)
    870 		return ENOMEM;
    871 	error = parseurl(p, &sap, &idx, 1);
    872 	free(p);
    873 	if (error)
    874 		return error;
    875 
    876 	snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
    877 	    PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
    878 
    879 	s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
    880 	if (s == -1)
    881 		return errno;
    882 
    883 	spops = *spopsp;
    884 	sarg = malloc(sizeof(*sarg));
    885 	if (sarg == NULL) {
    886 		close(s);
    887 		return ENOMEM;
    888 	}
    889 
    890 	sarg->sps_sock = s;
    891 	sarg->sps_connhook = parsetab[idx].connhook;
    892 
    893 	cleanupidx = idx;
    894 	cleanupsa = sap;
    895 
    896 	/* sloppy error recovery */
    897 
    898 	/*LINTED*/
    899 	if (bind(s, sap, sap->sa_len) == -1) {
    900 		fprintf(stderr, "rump_sp: server bind failed\n");
    901 		return errno;
    902 	}
    903 
    904 	if (listen(s, MAXCLI) == -1) {
    905 		fprintf(stderr, "rump_sp: server listen failed\n");
    906 		return errno;
    907 	}
    908 
    909 	if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
    910 		fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
    911 		return errno;
    912 	}
    913 	pthread_detach(pt);
    914 
    915 	return 0;
    916 }
    917 
    918 void
    919 rumpuser_sp_fini()
    920 {
    921 
    922 	if (spclist[0].spc_fd) {
    923 		parsetab[cleanupidx].cleanup(cleanupsa);
    924 		shutdown(spclist[0].spc_fd, SHUT_RDWR);
    925 		spfini = 1;
    926 	}
    927 }
    928