Home | History | Annotate | Line # | Download | only in librumpuser
rumpuser_sp.c revision 1.28
      1 /*      $NetBSD: rumpuser_sp.c,v 1.28 2011/01/02 13:01:45 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Sysproxy routines.  This provides system RPC support over host sockets.
     30  * The most notable limitation is that the client and server must share
     31  * the same ABI.  This does not mean that they have to be the same
     32  * machine or that they need to run the same version of the host OS,
     33  * just that they must agree on the data structures.  This even *might*
     34  * work correctly from one hardware architecture to another.
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.28 2011/01/02 13:01:45 pooka Exp $");
     39 
     40 #include <sys/types.h>
     41 #include <sys/atomic.h>
     42 #include <sys/mman.h>
     43 #include <sys/socket.h>
     44 
     45 #include <arpa/inet.h>
     46 #include <netinet/in.h>
     47 #include <netinet/tcp.h>
     48 
     49 #include <assert.h>
     50 #include <errno.h>
     51 #include <fcntl.h>
     52 #include <poll.h>
     53 #include <pthread.h>
     54 #include <stdarg.h>
     55 #include <stdio.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include <unistd.h>
     59 
     60 #include <rump/rump.h> /* XXX: for rfork flags */
     61 #include <rump/rumpuser.h>
     62 #include "rumpuser_int.h"
     63 
     64 #include "sp_common.c"
     65 
     66 #ifndef MAXCLI
     67 #define MAXCLI 256
     68 #endif
     69 #ifndef MAXWORKER
     70 #define MAXWORKER 128
     71 #endif
     72 #ifndef IDLEWORKER
     73 #define IDLEWORKER 16
     74 #endif
     75 int rumpsp_maxworker = MAXWORKER;
     76 int rumpsp_idleworker = IDLEWORKER;
     77 
     78 static struct pollfd pfdlist[MAXCLI];
     79 static struct spclient spclist[MAXCLI];
     80 static unsigned int disco;
     81 static volatile int spfini;
     82 
     83 static struct rumpuser_sp_ops spops;
     84 
     85 static char banner[MAXBANNER];
     86 
     87 #define PROTOMAJOR 0
     88 #define PROTOMINOR 0
     89 
     90 /*
     91  * Manual wrappers, since librump does not have access to the
     92  * user namespace wrapped interfaces.
     93  */
     94 
     95 static void
     96 lwproc_switch(struct lwp *l)
     97 {
     98 
     99 	spops.spop_schedule();
    100 	spops.spop_lwproc_switch(l);
    101 	spops.spop_unschedule();
    102 }
    103 
    104 static void
    105 lwproc_release(void)
    106 {
    107 
    108 	spops.spop_schedule();
    109 	spops.spop_lwproc_release();
    110 	spops.spop_unschedule();
    111 }
    112 
    113 static int
    114 lwproc_rfork(struct spclient *spc, int flags)
    115 {
    116 	int rv;
    117 
    118 	spops.spop_schedule();
    119 	rv = spops.spop_lwproc_rfork(spc, flags);
    120 	spops.spop_unschedule();
    121 
    122 	return rv;
    123 }
    124 
    125 static int
    126 lwproc_newlwp(pid_t pid)
    127 {
    128 	int rv;
    129 
    130 	spops.spop_schedule();
    131 	rv = spops.spop_lwproc_newlwp(pid);
    132 	spops.spop_unschedule();
    133 
    134 	return rv;
    135 }
    136 
    137 static struct lwp *
    138 lwproc_curlwp(void)
    139 {
    140 	struct lwp *l;
    141 
    142 	spops.spop_schedule();
    143 	l = spops.spop_lwproc_curlwp();
    144 	spops.spop_unschedule();
    145 
    146 	return l;
    147 }
    148 
    149 static pid_t
    150 lwproc_getpid(void)
    151 {
    152 	pid_t p;
    153 
    154 	spops.spop_schedule();
    155 	p = spops.spop_getpid();
    156 	spops.spop_unschedule();
    157 
    158 	return p;
    159 }
    160 
    161 static int
    162 rumpsyscall(int sysnum, void *data, register_t *retval)
    163 {
    164 	int rv;
    165 
    166 	spops.spop_schedule();
    167 	rv = spops.spop_syscall(sysnum, data, retval);
    168 	spops.spop_unschedule();
    169 
    170 	return rv;
    171 }
    172 
    173 static uint64_t
    174 nextreq(struct spclient *spc)
    175 {
    176 	uint64_t nw;
    177 
    178 	pthread_mutex_lock(&spc->spc_mtx);
    179 	nw = spc->spc_nextreq++;
    180 	pthread_mutex_unlock(&spc->spc_mtx);
    181 
    182 	return nw;
    183 }
    184 
    185 static void
    186 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
    187 {
    188 	struct rsp_hdr rhdr;
    189 
    190 	rhdr.rsp_len = sizeof(rhdr);
    191 	rhdr.rsp_reqno = reqno;
    192 	rhdr.rsp_class = RUMPSP_ERROR;
    193 	rhdr.rsp_type = 0;
    194 	rhdr.rsp_error = error;
    195 
    196 	sendlock(spc);
    197 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    198 	sendunlock(spc);
    199 }
    200 
    201 static int
    202 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
    203 {
    204 	struct rsp_hdr rhdr;
    205 	int rv;
    206 
    207 	rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
    208 	rhdr.rsp_reqno = reqno;
    209 	rhdr.rsp_class = RUMPSP_RESP;
    210 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    211 	rhdr.rsp_error = 0;
    212 
    213 	sendlock(spc);
    214 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    215 	rv = dosend(spc, &error, sizeof(error));
    216 	sendunlock(spc);
    217 
    218 	return rv;
    219 }
    220 
    221 static int
    222 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
    223 	register_t *retval)
    224 {
    225 	struct rsp_hdr rhdr;
    226 	struct rsp_sysresp sysresp;
    227 	int rv;
    228 
    229 	rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
    230 	rhdr.rsp_reqno = reqno;
    231 	rhdr.rsp_class = RUMPSP_RESP;
    232 	rhdr.rsp_type = RUMPSP_SYSCALL;
    233 	rhdr.rsp_sysnum = 0;
    234 
    235 	sysresp.rsys_error = error;
    236 	memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
    237 
    238 	sendlock(spc);
    239 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    240 	rv = dosend(spc, &sysresp, sizeof(sysresp));
    241 	sendunlock(spc);
    242 
    243 	return rv;
    244 }
    245 
    246 static int
    247 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
    248 	int wantstr, void **resp)
    249 {
    250 	struct rsp_hdr rhdr;
    251 	struct rsp_copydata copydata;
    252 	struct respwait rw;
    253 	int rv;
    254 
    255 	DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
    256 
    257 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
    258 	rhdr.rsp_class = RUMPSP_REQ;
    259 	if (wantstr)
    260 		rhdr.rsp_type = RUMPSP_COPYINSTR;
    261 	else
    262 		rhdr.rsp_type = RUMPSP_COPYIN;
    263 	rhdr.rsp_sysnum = 0;
    264 
    265 	copydata.rcp_addr = __UNCONST(remaddr);
    266 	copydata.rcp_len = *dlen;
    267 
    268 	putwait(spc, &rw, &rhdr);
    269 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    270 	rv = dosend(spc, &copydata, sizeof(copydata));
    271 	if (rv) {
    272 		unputwait(spc, &rw);
    273 		return rv;
    274 	}
    275 
    276 	rv = waitresp(spc, &rw);
    277 
    278 	DPRINTF(("copyin: response %d\n", rv));
    279 
    280 	*resp = rw.rw_data;
    281 	if (wantstr)
    282 		*dlen = rw.rw_dlen;
    283 
    284 	return rv;
    285 
    286 }
    287 
    288 static int
    289 send_copyout_req(struct spclient *spc, const void *remaddr,
    290 	const void *data, size_t dlen)
    291 {
    292 	struct rsp_hdr rhdr;
    293 	struct rsp_copydata copydata;
    294 	int rv;
    295 
    296 	DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
    297 
    298 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
    299 	rhdr.rsp_reqno = nextreq(spc);
    300 	rhdr.rsp_class = RUMPSP_REQ;
    301 	rhdr.rsp_type = RUMPSP_COPYOUT;
    302 	rhdr.rsp_sysnum = 0;
    303 
    304 	copydata.rcp_addr = __UNCONST(remaddr);
    305 	copydata.rcp_len = dlen;
    306 
    307 	sendlock(spc);
    308 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    309 	rv = dosend(spc, &copydata, sizeof(copydata));
    310 	rv = dosend(spc, data, dlen);
    311 	sendunlock(spc);
    312 
    313 	return rv;
    314 }
    315 
    316 static int
    317 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
    318 {
    319 	struct rsp_hdr rhdr;
    320 	struct respwait rw;
    321 	int rv;
    322 
    323 	DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
    324 
    325 	rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
    326 	rhdr.rsp_class = RUMPSP_REQ;
    327 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    328 	rhdr.rsp_sysnum = 0;
    329 
    330 	putwait(spc, &rw, &rhdr);
    331 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    332 	rv = dosend(spc, &howmuch, sizeof(howmuch));
    333 	if (rv) {
    334 		unputwait(spc, &rw);
    335 		return rv;
    336 	}
    337 
    338 	rv = waitresp(spc, &rw);
    339 
    340 	*resp = rw.rw_data;
    341 
    342 	DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
    343 
    344 	return rv;
    345 }
    346 
    347 static void
    348 spcref(struct spclient *spc)
    349 {
    350 
    351 	pthread_mutex_lock(&spc->spc_mtx);
    352 	spc->spc_refcnt++;
    353 	pthread_mutex_unlock(&spc->spc_mtx);
    354 }
    355 
    356 static void
    357 spcrelease(struct spclient *spc)
    358 {
    359 	int ref;
    360 
    361 	pthread_mutex_lock(&spc->spc_mtx);
    362 	ref = --spc->spc_refcnt;
    363 	pthread_mutex_unlock(&spc->spc_mtx);
    364 
    365 	if (ref > 0)
    366 		return;
    367 
    368 	DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
    369 
    370 	_DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
    371 	_DIAGASSERT(spc->spc_buf == NULL);
    372 
    373 	lwproc_switch(spc->spc_mainlwp);
    374 	lwproc_release();
    375 	spc->spc_mainlwp = NULL;
    376 
    377 	close(spc->spc_fd);
    378 	spc->spc_fd = -1;
    379 	spc->spc_state = SPCSTATE_NEW;
    380 
    381 	atomic_inc_uint(&disco);
    382 }
    383 
    384 static void
    385 serv_handledisco(unsigned int idx)
    386 {
    387 	struct spclient *spc = &spclist[idx];
    388 
    389 	DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
    390 
    391 	pfdlist[idx].fd = -1;
    392 	pfdlist[idx].revents = 0;
    393 	pthread_mutex_lock(&spc->spc_mtx);
    394 	spc->spc_state = SPCSTATE_DYING;
    395 	kickall(spc);
    396 	pthread_mutex_unlock(&spc->spc_mtx);
    397 
    398 	/*
    399 	 * Nobody's going to attempt to send/receive anymore,
    400 	 * so reinit info relevant to that.
    401 	 */
    402 	/*LINTED:pointer casts may be ok*/
    403 	memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
    404 
    405 	spcrelease(spc);
    406 }
    407 
    408 static void
    409 serv_shutdown(void)
    410 {
    411 	struct spclient *spc;
    412 	unsigned int i;
    413 
    414 	for (i = 1; i < MAXCLI; i++) {
    415 		spc = &spclist[i];
    416 		if (spc->spc_fd == -1)
    417 			continue;
    418 
    419 		shutdown(spc->spc_fd, SHUT_RDWR);
    420 		serv_handledisco(i);
    421 
    422 		spcrelease(spc);
    423 	}
    424 }
    425 
    426 static unsigned
    427 serv_handleconn(int fd, connecthook_fn connhook, int busy)
    428 {
    429 	struct sockaddr_storage ss;
    430 	socklen_t sl = sizeof(ss);
    431 	int newfd, flags;
    432 	unsigned i;
    433 
    434 	/*LINTED: cast ok */
    435 	newfd = accept(fd, (struct sockaddr *)&ss, &sl);
    436 	if (newfd == -1)
    437 		return 0;
    438 
    439 	if (busy) {
    440 		close(newfd); /* EBUSY */
    441 		return 0;
    442 	}
    443 
    444 	flags = fcntl(newfd, F_GETFL, 0);
    445 	if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
    446 		close(newfd);
    447 		return 0;
    448 	}
    449 
    450 	if (connhook(newfd) != 0) {
    451 		close(newfd);
    452 		return 0;
    453 	}
    454 
    455 	/* write out a banner for the client */
    456 	if (write(newfd, banner, strlen(banner)) != (ssize_t)strlen(banner)) {
    457 		close(newfd);
    458 		return 0;
    459 	}
    460 
    461 	/* find empty slot the simple way */
    462 	for (i = 0; i < MAXCLI; i++) {
    463 		if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
    464 			break;
    465 	}
    466 
    467 	if (lwproc_rfork(&spclist[i], RUMP_RFCFDG) != 0) {
    468 		close(newfd);
    469 		return 0;
    470 	}
    471 
    472 	assert(i < MAXCLI);
    473 
    474 	pfdlist[i].fd = newfd;
    475 	spclist[i].spc_fd = newfd;
    476 	spclist[i].spc_mainlwp = lwproc_curlwp();
    477 	spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
    478 	spclist[i].spc_pid = lwproc_getpid();
    479 	spclist[i].spc_refcnt = 1;
    480 
    481 	TAILQ_INIT(&spclist[i].spc_respwait);
    482 
    483 	DPRINTF(("rump_sp: added new connection fd %d at idx %u, pid %d\n",
    484 	    newfd, i, lwproc_getpid()));
    485 
    486 	lwproc_switch(NULL);
    487 
    488 	return i;
    489 }
    490 
    491 static void
    492 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
    493 {
    494 	register_t retval[2] = {0, 0};
    495 	int rv, sysnum;
    496 
    497 	sysnum = (int)rhdr->rsp_sysnum;
    498 	DPRINTF(("rump_sp: handling syscall %d from client %d\n",
    499 	    sysnum, 0));
    500 
    501 	lwproc_newlwp(spc->spc_pid);
    502 	rv = rumpsyscall(sysnum, data, retval);
    503 	lwproc_release();
    504 
    505 	DPRINTF(("rump_sp: got return value %d & %d/%d\n",
    506 	    rv, retval[0], retval[1]));
    507 
    508 	send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
    509 }
    510 
    511 struct sysbouncearg {
    512 	struct spclient *sba_spc;
    513 	struct rsp_hdr sba_hdr;
    514 	uint8_t *sba_data;
    515 
    516 	TAILQ_ENTRY(sysbouncearg) sba_entries;
    517 };
    518 static pthread_mutex_t sbamtx;
    519 static pthread_cond_t sbacv;
    520 static int nworker, idleworker;
    521 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist);
    522 
    523 /*ARGSUSED*/
    524 static void *
    525 serv_syscallbouncer(void *arg)
    526 {
    527 	struct sysbouncearg *sba;
    528 
    529 	for (;;) {
    530 		pthread_mutex_lock(&sbamtx);
    531 		if (idleworker >= rumpsp_idleworker) {
    532 			nworker--;
    533 			pthread_mutex_unlock(&sbamtx);
    534 			break;
    535 		}
    536 		idleworker++;
    537 		while (TAILQ_EMPTY(&syslist)) {
    538 			pthread_cond_wait(&sbacv, &sbamtx);
    539 		}
    540 
    541 		sba = TAILQ_FIRST(&syslist);
    542 		TAILQ_REMOVE(&syslist, sba, sba_entries);
    543 		idleworker--;
    544 		pthread_mutex_unlock(&sbamtx);
    545 
    546 		serv_handlesyscall(sba->sba_spc,
    547 		    &sba->sba_hdr, sba->sba_data);
    548 		spcrelease(sba->sba_spc);
    549 		free(sba->sba_data);
    550 		free(sba);
    551 	}
    552 
    553 	return NULL;
    554 }
    555 
    556 static int
    557 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
    558 {
    559 	struct spclient *spc = arg;
    560 	void *rdata = NULL; /* XXXuninit */
    561 	int rv, nlocks;
    562 
    563 	rumpuser__kunlock(0, &nlocks, NULL);
    564 
    565 	rv = copyin_req(spc, raddr, len, wantstr, &rdata);
    566 	if (rv)
    567 		goto out;
    568 
    569 	memcpy(laddr, rdata, *len);
    570 	free(rdata);
    571 
    572  out:
    573 	rumpuser__klock(nlocks, NULL);
    574 	if (rv)
    575 		return EFAULT;
    576 	return 0;
    577 }
    578 
    579 int
    580 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
    581 {
    582 
    583 	return sp_copyin(arg, raddr, laddr, &len, 0);
    584 }
    585 
    586 int
    587 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
    588 {
    589 
    590 	return sp_copyin(arg, raddr, laddr, len, 1);
    591 }
    592 
    593 static int
    594 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
    595 {
    596 	struct spclient *spc = arg;
    597 	int nlocks, rv;
    598 
    599 	rumpuser__kunlock(0, &nlocks, NULL);
    600 	rv = send_copyout_req(spc, raddr, laddr, dlen);
    601 	rumpuser__klock(nlocks, NULL);
    602 
    603 	if (rv)
    604 		return EFAULT;
    605 	return 0;
    606 }
    607 
    608 int
    609 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
    610 {
    611 
    612 	return sp_copyout(arg, laddr, raddr, dlen);
    613 }
    614 
    615 int
    616 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
    617 {
    618 
    619 	return sp_copyout(arg, laddr, raddr, *dlen);
    620 }
    621 
    622 int
    623 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
    624 {
    625 	struct spclient *spc = arg;
    626 	void *resp, *rdata;
    627 	int nlocks, rv;
    628 
    629 	rumpuser__kunlock(0, &nlocks, NULL);
    630 
    631 	rv = anonmmap_req(spc, howmuch, &rdata);
    632 	if (rv) {
    633 		rv = EFAULT;
    634 		goto out;
    635 	}
    636 
    637 	resp = *(void **)rdata;
    638 	free(rdata);
    639 
    640 	if (resp == NULL) {
    641 		rv = ENOMEM;
    642 	}
    643 
    644 	*addr = resp;
    645 
    646  out:
    647 	rumpuser__klock(nlocks, NULL);
    648 
    649 	if (rv)
    650 		return rv;
    651 	return 0;
    652 }
    653 
    654 /*
    655  *
    656  * Startup routines and mainloop for server.
    657  *
    658  */
    659 
    660 struct spservarg {
    661 	int sps_sock;
    662 	connecthook_fn sps_connhook;
    663 };
    664 
    665 static pthread_attr_t pattr_detached;
    666 static void
    667 handlereq(struct spclient *spc)
    668 {
    669 	struct sysbouncearg *sba;
    670 	pthread_t pt;
    671 	int retries, rv;
    672 
    673 	if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
    674 		if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
    675 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAUTH);
    676 			spcfreebuf(spc);
    677 			return;
    678 		}
    679 
    680 		rv = send_handshake_resp(spc, spc->spc_hdr.rsp_reqno, 0);
    681 		spcfreebuf(spc);
    682 		if (rv) {
    683 			shutdown(spc->spc_fd, SHUT_RDWR);
    684 			return;
    685 		}
    686 		spc->spc_state = SPCSTATE_RUNNING;
    687 		return;
    688 	}
    689 
    690 	if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
    691 		send_error_resp(spc, spc->spc_hdr.rsp_reqno, EINVAL);
    692 		spcfreebuf(spc);
    693 		return;
    694 	}
    695 
    696 	retries = 0;
    697 	while ((sba = malloc(sizeof(*sba))) == NULL) {
    698 		if (nworker == 0 || retries > 10) {
    699 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAGAIN);
    700 			spcfreebuf(spc);
    701 			return;
    702 		}
    703 		/* slim chance of more memory? */
    704 		usleep(10000);
    705 	}
    706 
    707 	sba->sba_spc = spc;
    708 	sba->sba_hdr = spc->spc_hdr;
    709 	sba->sba_data = spc->spc_buf;
    710 	spcresetbuf(spc);
    711 
    712 	spcref(spc);
    713 
    714 	pthread_mutex_lock(&sbamtx);
    715 	TAILQ_INSERT_TAIL(&syslist, sba, sba_entries);
    716 	if (idleworker > 0) {
    717 		/* do we have a daemon's tool (i.e. idle threads)? */
    718 		pthread_cond_signal(&sbacv);
    719 	} else if (nworker < rumpsp_maxworker) {
    720 		/*
    721 		 * Else, need to create one
    722 		 * (if we can, otherwise just expect another
    723 		 * worker to pick up the syscall)
    724 		 */
    725 		if (pthread_create(&pt, &pattr_detached,
    726 		    serv_syscallbouncer, NULL) == 0)
    727 			nworker++;
    728 	}
    729 	pthread_mutex_unlock(&sbamtx);
    730 }
    731 
    732 static void *
    733 spserver(void *arg)
    734 {
    735 	struct spservarg *sarg = arg;
    736 	struct spclient *spc;
    737 	unsigned idx;
    738 	int seen;
    739 	int rv;
    740 	unsigned int nfds, maxidx;
    741 
    742 	for (idx = 0; idx < MAXCLI; idx++) {
    743 		pfdlist[idx].fd = -1;
    744 		pfdlist[idx].events = POLLIN;
    745 
    746 		spc = &spclist[idx];
    747 		pthread_mutex_init(&spc->spc_mtx, NULL);
    748 		pthread_cond_init(&spc->spc_cv, NULL);
    749 		spc->spc_fd = -1;
    750 	}
    751 	pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
    752 	pfdlist[0].events = POLLIN;
    753 	nfds = 1;
    754 	maxidx = 0;
    755 
    756 	pthread_attr_init(&pattr_detached);
    757 	pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
    758 	/* XXX: doesn't stacksize currently work on NetBSD */
    759 	pthread_attr_setstacksize(&pattr_detached, 32*1024);
    760 
    761 	pthread_mutex_init(&sbamtx, NULL);
    762 	pthread_cond_init(&sbacv, NULL);
    763 
    764 	DPRINTF(("rump_sp: server mainloop\n"));
    765 
    766 	for (;;) {
    767 		int discoed;
    768 
    769 		/* g/c hangarounds (eventually) */
    770 		discoed = atomic_swap_uint(&disco, 0);
    771 		while (discoed--) {
    772 			nfds--;
    773 			idx = maxidx;
    774 			while (idx) {
    775 				if (pfdlist[idx].fd != -1) {
    776 					maxidx = idx;
    777 					break;
    778 				}
    779 				idx--;
    780 			}
    781 			DPRINTF(("rump_sp: set maxidx to [%u]\n",
    782 			    maxidx));
    783 		}
    784 
    785 		DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
    786 		seen = 0;
    787 		rv = poll(pfdlist, maxidx+1, INFTIM);
    788 		assert(maxidx+1 <= MAXCLI);
    789 		assert(rv != 0);
    790 		if (rv == -1) {
    791 			if (errno == EINTR)
    792 				continue;
    793 			fprintf(stderr, "rump_spserver: poll returned %d\n",
    794 			    errno);
    795 			break;
    796 		}
    797 
    798 		for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
    799 			if ((pfdlist[idx].revents & POLLIN) == 0)
    800 				continue;
    801 
    802 			seen++;
    803 			DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
    804 			    idx, seen, rv));
    805 			if (idx > 0) {
    806 				spc = &spclist[idx];
    807 				DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
    808 				switch (readframe(spc)) {
    809 				case 0:
    810 					break;
    811 				case -1:
    812 					serv_handledisco(idx);
    813 					break;
    814 				default:
    815 					switch (spc->spc_hdr.rsp_class) {
    816 					case RUMPSP_RESP:
    817 						kickwaiter(spc);
    818 						break;
    819 					case RUMPSP_REQ:
    820 						handlereq(spc);
    821 						break;
    822 					default:
    823 						send_error_resp(spc,
    824 						    spc->spc_hdr.rsp_reqno,
    825 						    ENOENT);
    826 						spcfreebuf(spc);
    827 						break;
    828 					}
    829 					break;
    830 				}
    831 
    832 			} else {
    833 				DPRINTF(("rump_sp: mainloop new connection\n"));
    834 
    835 				if (__predict_false(spfini)) {
    836 					close(spclist[0].spc_fd);
    837 					serv_shutdown();
    838 					goto out;
    839 				}
    840 
    841 				idx = serv_handleconn(pfdlist[0].fd,
    842 				    sarg->sps_connhook, nfds == MAXCLI);
    843 				if (idx)
    844 					nfds++;
    845 				if (idx > maxidx)
    846 					maxidx = idx;
    847 				DPRINTF(("rump_sp: maxid now %d\n", maxidx));
    848 			}
    849 		}
    850 	}
    851 
    852  out:
    853 	return NULL;
    854 }
    855 
    856 static unsigned cleanupidx;
    857 static struct sockaddr *cleanupsa;
    858 int
    859 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
    860 	const char *ostype, const char *osrelease, const char *machine)
    861 {
    862 	pthread_t pt;
    863 	struct spservarg *sarg;
    864 	struct sockaddr *sap;
    865 	char *p;
    866 	unsigned idx;
    867 	int error, s;
    868 
    869 	p = strdup(url);
    870 	if (p == NULL)
    871 		return ENOMEM;
    872 	error = parseurl(p, &sap, &idx, 1);
    873 	free(p);
    874 	if (error)
    875 		return error;
    876 
    877 	snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
    878 	    PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
    879 
    880 	s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
    881 	if (s == -1)
    882 		return errno;
    883 
    884 	spops = *spopsp;
    885 	sarg = malloc(sizeof(*sarg));
    886 	if (sarg == NULL) {
    887 		close(s);
    888 		return ENOMEM;
    889 	}
    890 
    891 	sarg->sps_sock = s;
    892 	sarg->sps_connhook = parsetab[idx].connhook;
    893 
    894 	cleanupidx = idx;
    895 	cleanupsa = sap;
    896 
    897 	/* sloppy error recovery */
    898 
    899 	/*LINTED*/
    900 	if (bind(s, sap, sap->sa_len) == -1) {
    901 		fprintf(stderr, "rump_sp: server bind failed\n");
    902 		return errno;
    903 	}
    904 
    905 	if (listen(s, MAXCLI) == -1) {
    906 		fprintf(stderr, "rump_sp: server listen failed\n");
    907 		return errno;
    908 	}
    909 
    910 	if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
    911 		fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
    912 		return errno;
    913 	}
    914 	pthread_detach(pt);
    915 
    916 	return 0;
    917 }
    918 
    919 void
    920 rumpuser_sp_fini()
    921 {
    922 
    923 	if (spclist[0].spc_fd) {
    924 		parsetab[cleanupidx].cleanup(cleanupsa);
    925 		shutdown(spclist[0].spc_fd, SHUT_RDWR);
    926 		spfini = 1;
    927 	}
    928 }
    929