Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.42
      1 /*      $NetBSD: rumpclient.c,v 1.42 2011/03/08 18:28:01 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __RCSID("$NetBSD: rumpclient.c,v 1.42 2011/03/08 18:28:01 pooka Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/event.h>
     37 #include <sys/mman.h>
     38 #include <sys/socket.h>
     39 
     40 #include <arpa/inet.h>
     41 #include <netinet/in.h>
     42 #include <netinet/tcp.h>
     43 
     44 #include <assert.h>
     45 #include <dlfcn.h>
     46 #include <err.h>
     47 #include <errno.h>
     48 #include <fcntl.h>
     49 #include <link.h>
     50 #include <poll.h>
     51 #include <pthread.h>
     52 #include <signal.h>
     53 #include <stdarg.h>
     54 #include <stdbool.h>
     55 #include <stdio.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include <unistd.h>
     59 
     60 #include <rump/rumpclient.h>
     61 
     62 #define HOSTOPS
     63 int	(*host_socket)(int, int, int);
     64 int	(*host_close)(int);
     65 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     66 int	(*host_fcntl)(int, int, ...);
     67 int	(*host_poll)(struct pollfd *, nfds_t, int);
     68 ssize_t	(*host_read)(int, void *, size_t);
     69 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     70 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     71 int	(*host_dup)(int);
     72 
     73 int	(*host_kqueue)(void);
     74 int	(*host_kevent)(int, const struct kevent *, size_t,
     75 		       struct kevent *, size_t, const struct timespec *);
     76 
     77 int	(*host_execve)(const char *, char *const[], char *const[]);
     78 
     79 #include "sp_common.c"
     80 
     81 static struct spclient clispc = {
     82 	.spc_fd = -1,
     83 };
     84 
     85 static int kq = -1;
     86 static sigset_t fullset;
     87 
     88 static int doconnect(bool);
     89 static int handshake_req(struct spclient *, int, void *, int, bool);
     90 
     91 /*
     92  * Default: don't retry.  Most clients can't handle it
     93  * (consider e.g. fds suddenly going missing).
     94  */
     95 static time_t retrytimo = 0;
     96 
     97 static int
     98 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
     99 {
    100 	struct timeval starttime, curtime;
    101 	time_t prevreconmsg;
    102 	unsigned reconretries;
    103 	int rv;
    104 
    105 	for (prevreconmsg = 0, reconretries = 0;;) {
    106 		rv = dosend(spc, iov, iovlen);
    107 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    108 			/* no persistent connections */
    109 			if (retrytimo == 0) {
    110 				rv = ENOTCONN;
    111 				break;
    112 			}
    113 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    114 				exit(1);
    115 
    116 			if (!prevreconmsg) {
    117 				prevreconmsg = time(NULL);
    118 				gettimeofday(&starttime, NULL);
    119 			}
    120 			if (reconretries == 1) {
    121 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    122 					rv = ENOTCONN;
    123 					break;
    124 				}
    125 				fprintf(stderr, "rump_sp: connection to "
    126 				    "kernel lost, trying to reconnect ...\n");
    127 			} else if (time(NULL) - prevreconmsg > 120) {
    128 				fprintf(stderr, "rump_sp: still trying to "
    129 				    "reconnect ...\n");
    130 				prevreconmsg = time(NULL);
    131 			}
    132 
    133 			/* check that we aren't over the limit */
    134 			if (retrytimo > 0) {
    135 				struct timeval tmp;
    136 
    137 				gettimeofday(&curtime, NULL);
    138 				timersub(&curtime, &starttime, &tmp);
    139 				if (tmp.tv_sec >= retrytimo) {
    140 					fprintf(stderr, "rump_sp: reconnect "
    141 					    "failed, %lld second timeout\n",
    142 					    (long long)retrytimo);
    143 					return ENOTCONN;
    144 				}
    145 			}
    146 
    147 			/* adhoc backoff timer */
    148 			if (reconretries < 10) {
    149 				usleep(100000 * reconretries);
    150 			} else {
    151 				sleep(MIN(10, reconretries-9));
    152 			}
    153 			reconretries++;
    154 
    155 			if ((rv = doconnect(false)) != 0)
    156 				continue;
    157 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    158 			    NULL, 0, true)) != 0)
    159 				continue;
    160 
    161 			/*
    162 			 * ok, reconnect succesful.  we need to return to
    163 			 * the upper layer to get the entire PDU resent.
    164 			 */
    165 			if (reconretries != 1)
    166 				fprintf(stderr, "rump_sp: reconnected!\n");
    167 			rv = EAGAIN;
    168 			break;
    169 		} else {
    170 			_DIAGASSERT(errno != EAGAIN);
    171 			break;
    172 		}
    173 	}
    174 
    175 	return rv;
    176 }
    177 
    178 static int
    179 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    180 	bool keeplock)
    181 {
    182 	uint64_t mygen;
    183 	bool imalive = true;
    184 
    185 	pthread_mutex_lock(&spc->spc_mtx);
    186 	if (!keeplock)
    187 		sendunlockl(spc);
    188 	mygen = spc->spc_generation;
    189 
    190 	rw->rw_error = 0;
    191 	while (!rw->rw_done && rw->rw_error == 0) {
    192 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    193 			break;
    194 
    195 		/* are we free to receive? */
    196 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    197 			struct kevent kev[8];
    198 			int gotresp, dosig, rv, i;
    199 
    200 			spc->spc_istatus = SPCSTATUS_BUSY;
    201 			pthread_mutex_unlock(&spc->spc_mtx);
    202 
    203 			dosig = 0;
    204 			for (gotresp = 0; !gotresp; ) {
    205 				/*
    206 				 * typically we don't have a frame waiting
    207 				 * when we come in here, so call kevent now
    208 				 */
    209 				rv = host_kevent(kq, NULL, 0,
    210 				    kev, __arraycount(kev), NULL);
    211 
    212 				if (__predict_false(rv == -1)) {
    213 					goto activity;
    214 				}
    215 
    216 				/*
    217 				 * XXX: don't know how this can happen
    218 				 * (timeout cannot expire since there
    219 				 * isn't one), but it does happen.
    220 				 * treat it as an expectional condition
    221 				 * and go through tryread to determine
    222 				 * alive status.
    223 				 */
    224 				if (__predict_false(rv == 0))
    225 					goto activity;
    226 
    227 				for (i = 0; i < rv; i++) {
    228 					if (kev[i].filter == EVFILT_SIGNAL)
    229 						dosig++;
    230 				}
    231 				if (dosig)
    232 					goto cleanup;
    233 
    234 				/*
    235 				 * ok, activity.  try to read a frame to
    236 				 * determine what happens next.
    237 				 */
    238  activity:
    239 				switch (readframe(spc)) {
    240 				case 0:
    241 					continue;
    242 				case -1:
    243 					imalive = false;
    244 					goto cleanup;
    245 				default:
    246 					/* case 1 */
    247 					break;
    248 				}
    249 
    250 				switch (spc->spc_hdr.rsp_class) {
    251 				case RUMPSP_RESP:
    252 				case RUMPSP_ERROR:
    253 					kickwaiter(spc);
    254 					gotresp = spc->spc_hdr.rsp_reqno ==
    255 					    rw->rw_reqno;
    256 					break;
    257 				case RUMPSP_REQ:
    258 					handlereq(spc);
    259 					break;
    260 				default:
    261 					/* panic */
    262 					break;
    263 				}
    264 			}
    265 
    266  cleanup:
    267 			pthread_mutex_lock(&spc->spc_mtx);
    268 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    269 				kickall(spc);
    270 			spc->spc_istatus = SPCSTATUS_FREE;
    271 
    272 			/* take one for the team */
    273 			if (dosig) {
    274 				pthread_mutex_unlock(&spc->spc_mtx);
    275 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    276 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    277 				pthread_mutex_lock(&spc->spc_mtx);
    278 			}
    279 		} else {
    280 			spc->spc_istatus = SPCSTATUS_WANTED;
    281 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    282 		}
    283 	}
    284 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    285 	pthread_mutex_unlock(&spc->spc_mtx);
    286 	pthread_cond_destroy(&rw->rw_cv);
    287 
    288 	if (spc->spc_generation != mygen || !imalive) {
    289 		return ENOTCONN;
    290 	}
    291 	return rw->rw_error;
    292 }
    293 
    294 static int
    295 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    296 	const void *data, size_t dlen, void **resp)
    297 {
    298 	struct rsp_hdr rhdr;
    299 	struct respwait rw;
    300 	struct iovec iov[2];
    301 	int rv;
    302 
    303 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    304 	rhdr.rsp_class = RUMPSP_REQ;
    305 	rhdr.rsp_type = RUMPSP_SYSCALL;
    306 	rhdr.rsp_sysnum = sysnum;
    307 
    308 	IOVPUT(iov[0], rhdr);
    309 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    310 
    311 	do {
    312 		putwait(spc, &rw, &rhdr);
    313 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    314 			unputwait(spc, &rw);
    315 			continue;
    316 		}
    317 
    318 		rv = cliwaitresp(spc, &rw, omask, false);
    319 		if (rv == ENOTCONN)
    320 			rv = EAGAIN;
    321 	} while (rv == EAGAIN);
    322 
    323 	*resp = rw.rw_data;
    324 	return rv;
    325 }
    326 
    327 static int
    328 handshake_req(struct spclient *spc, int type, void *data,
    329 	int cancel, bool haslock)
    330 {
    331 	struct handshake_fork rf;
    332 	const char *myprogname;
    333 	struct rsp_hdr rhdr;
    334 	struct respwait rw;
    335 	sigset_t omask;
    336 	size_t bonus;
    337 	struct iovec iov[2];
    338 	int rv;
    339 
    340 	if (type == HANDSHAKE_FORK) {
    341 		bonus = sizeof(rf);
    342 	} else {
    343 		myprogname = getprogname();
    344 		bonus = strlen(myprogname)+1;
    345 	}
    346 
    347 	/* performs server handshake */
    348 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    349 	rhdr.rsp_class = RUMPSP_REQ;
    350 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    351 	rhdr.rsp_handshake = type;
    352 
    353 	IOVPUT(iov[0], rhdr);
    354 
    355 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    356 	if (haslock)
    357 		putwait_locked(spc, &rw, &rhdr);
    358 	else
    359 		putwait(spc, &rw, &rhdr);
    360 	if (type == HANDSHAKE_FORK) {
    361 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    362 		rf.rf_cancel = cancel;
    363 		IOVPUT(iov[1], rf);
    364 	} else {
    365 		IOVPUT_WITHSIZE(iov[1], __UNCONST(getprogname()), bonus);
    366 	}
    367 	rv = send_with_recon(spc, iov, __arraycount(iov));
    368 	if (rv || cancel) {
    369 		if (haslock)
    370 			unputwait_locked(spc, &rw);
    371 		else
    372 			unputwait(spc, &rw);
    373 		if (cancel) {
    374 			goto out;
    375 		}
    376 	} else {
    377 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    378 	}
    379 	if (rv)
    380 		goto out;
    381 
    382 	rv = *(int *)rw.rw_data;
    383 	free(rw.rw_data);
    384 
    385  out:
    386 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    387 	return rv;
    388 }
    389 
    390 static int
    391 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    392 {
    393 	struct rsp_hdr rhdr;
    394 	struct respwait rw;
    395 	struct iovec iov[1];
    396 	int rv;
    397 
    398 	rhdr.rsp_len = sizeof(rhdr);
    399 	rhdr.rsp_class = RUMPSP_REQ;
    400 	rhdr.rsp_type = RUMPSP_PREFORK;
    401 	rhdr.rsp_error = 0;
    402 
    403 	IOVPUT(iov[0], rhdr);
    404 
    405 	do {
    406 		putwait(spc, &rw, &rhdr);
    407 		rv = send_with_recon(spc, iov, __arraycount(iov));
    408 		if (rv != 0) {
    409 			unputwait(spc, &rw);
    410 			continue;
    411 		}
    412 
    413 		rv = cliwaitresp(spc, &rw, omask, false);
    414 		if (rv == ENOTCONN)
    415 			rv = EAGAIN;
    416 	} while (rv == EAGAIN);
    417 
    418 	*resp = rw.rw_data;
    419 	return rv;
    420 }
    421 
    422 /*
    423  * prevent response code from deadlocking with reconnect code
    424  */
    425 static int
    426 resp_sendlock(struct spclient *spc)
    427 {
    428 	int rv = 0;
    429 
    430 	pthread_mutex_lock(&spc->spc_mtx);
    431 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    432 		if (__predict_false(spc->spc_reconnecting)) {
    433 			rv = EBUSY;
    434 			goto out;
    435 		}
    436 		spc->spc_ostatus = SPCSTATUS_WANTED;
    437 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    438 	}
    439 	spc->spc_ostatus = SPCSTATUS_BUSY;
    440 
    441  out:
    442 	pthread_mutex_unlock(&spc->spc_mtx);
    443 	return rv;
    444 }
    445 
    446 static void
    447 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    448 	int wantstr)
    449 {
    450 	struct rsp_hdr rhdr;
    451 	struct iovec iov[2];
    452 
    453 	if (wantstr)
    454 		dlen = MIN(dlen, strlen(data)+1);
    455 
    456 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    457 	rhdr.rsp_reqno = reqno;
    458 	rhdr.rsp_class = RUMPSP_RESP;
    459 	rhdr.rsp_type = RUMPSP_COPYIN;
    460 	rhdr.rsp_sysnum = 0;
    461 
    462 	IOVPUT(iov[0], rhdr);
    463 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    464 
    465 	if (resp_sendlock(spc) != 0)
    466 		return;
    467 	(void)SENDIOV(spc, iov);
    468 	sendunlock(spc);
    469 }
    470 
    471 static void
    472 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    473 {
    474 	struct rsp_hdr rhdr;
    475 	struct iovec iov[2];
    476 
    477 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    478 	rhdr.rsp_reqno = reqno;
    479 	rhdr.rsp_class = RUMPSP_RESP;
    480 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    481 	rhdr.rsp_sysnum = 0;
    482 
    483 	IOVPUT(iov[0], rhdr);
    484 	IOVPUT(iov[1], addr);
    485 
    486 	if (resp_sendlock(spc) != 0)
    487 		return;
    488 	(void)SENDIOV(spc, iov);
    489 	sendunlock(spc);
    490 }
    491 
    492 int
    493 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    494 	register_t *retval)
    495 {
    496 	struct rsp_sysresp *resp;
    497 	sigset_t omask;
    498 	void *rdata;
    499 	int rv;
    500 
    501 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    502 
    503 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    504 	    sysnum, data, dlen));
    505 
    506 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    507 	if (rv)
    508 		goto out;
    509 
    510 	resp = rdata;
    511 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    512 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    513 
    514 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    515 	rv = resp->rsys_error;
    516 	free(rdata);
    517 
    518  out:
    519 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    520 	return rv;
    521 }
    522 
    523 static void
    524 handlereq(struct spclient *spc)
    525 {
    526 	struct rsp_copydata *copydata;
    527 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    528 	void *mapaddr;
    529 	size_t maplen;
    530 	int reqtype = spc->spc_hdr.rsp_type;
    531 
    532 	switch (reqtype) {
    533 	case RUMPSP_COPYIN:
    534 	case RUMPSP_COPYINSTR:
    535 		/*LINTED*/
    536 		copydata = (struct rsp_copydata *)spc->spc_buf;
    537 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    538 		    copydata->rcp_addr, copydata->rcp_len));
    539 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    540 		    copydata->rcp_addr, copydata->rcp_len,
    541 		    reqtype == RUMPSP_COPYINSTR);
    542 		break;
    543 	case RUMPSP_COPYOUT:
    544 	case RUMPSP_COPYOUTSTR:
    545 		/*LINTED*/
    546 		copydata = (struct rsp_copydata *)spc->spc_buf;
    547 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    548 		    copydata->rcp_addr, copydata->rcp_len));
    549 		/*LINTED*/
    550 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    551 		    copydata->rcp_len);
    552 		break;
    553 	case RUMPSP_ANONMMAP:
    554 		/*LINTED*/
    555 		maplen = *(size_t *)spc->spc_buf;
    556 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    557 		    MAP_ANON, -1, 0);
    558 		if (mapaddr == MAP_FAILED)
    559 			mapaddr = NULL;
    560 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    561 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    562 		break;
    563 	case RUMPSP_RAISE:
    564 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    565 		raise((int)rhdr->rsp_signo);
    566 		/*
    567 		 * We most likely have signals blocked, but the signal
    568 		 * will be handled soon enough when we return.
    569 		 */
    570 		break;
    571 	default:
    572 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    573 		abort();
    574 		break;
    575 	}
    576 
    577 	spcfreebuf(spc);
    578 }
    579 
    580 static unsigned ptab_idx;
    581 static struct sockaddr *serv_sa;
    582 
    583 /* dup until we get a "good" fd which does not collide with stdio */
    584 static int
    585 dupgood(int myfd, int mustchange)
    586 {
    587 	int ofds[4];
    588 	int i;
    589 
    590 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    591 		assert(i < __arraycount(ofds));
    592 		ofds[i] = myfd;
    593 		myfd = host_dup(myfd);
    594 		if (mustchange) {
    595 			i--; /* prevent closing old fd */
    596 			mustchange = 0;
    597 		}
    598 	}
    599 
    600 	for (i--; i >= 0; i--) {
    601 		host_close(ofds[i]);
    602 	}
    603 
    604 	return myfd;
    605 }
    606 
    607 static int
    608 doconnect(bool noisy)
    609 {
    610 	struct respwait rw;
    611 	struct rsp_hdr rhdr;
    612 	struct kevent kev[NSIG+1];
    613 	char banner[MAXBANNER];
    614 	struct pollfd pfd;
    615 	int s, error, flags, i;
    616 	ssize_t n;
    617 
    618 	if (kq != -1)
    619 		host_close(kq);
    620 	kq = -1;
    621 	s = -1;
    622 
    623 	if (clispc.spc_fd != -1)
    624 		host_close(clispc.spc_fd);
    625 	clispc.spc_fd = -1;
    626 
    627 	/*
    628 	 * for reconnect, gate everyone out of the receiver code
    629 	 */
    630 	putwait_locked(&clispc, &rw, &rhdr);
    631 
    632 	pthread_mutex_lock(&clispc.spc_mtx);
    633 	clispc.spc_reconnecting = 1;
    634 	pthread_cond_broadcast(&clispc.spc_cv);
    635 	clispc.spc_generation++;
    636 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    637 		clispc.spc_istatus = SPCSTATUS_WANTED;
    638 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    639 	}
    640 	kickall(&clispc);
    641 
    642 	/*
    643 	 * we can release it already since we hold the
    644 	 * send lock during reconnect
    645 	 * XXX: assert it
    646 	 */
    647 	clispc.spc_istatus = SPCSTATUS_FREE;
    648 	pthread_mutex_unlock(&clispc.spc_mtx);
    649 	unputwait_locked(&clispc, &rw);
    650 
    651 	free(clispc.spc_buf);
    652 	clispc.spc_off = 0;
    653 
    654 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    655 	if (s == -1)
    656 		return -1;
    657 
    658 	pfd.fd = s;
    659 	pfd.events = POLLIN;
    660 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
    661 		if (errno == EINTR)
    662 			continue;
    663 		error = errno;
    664 		if (noisy)
    665 			fprintf(stderr, "rump_sp: client connect failed: %s\n",
    666 			    strerror(errno));
    667 		errno = error;
    668 		return -1;
    669 	}
    670 
    671 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    672 		error = errno;
    673 		if (noisy)
    674 			fprintf(stderr, "rump_sp: connect hook failed\n");
    675 		errno = error;
    676 		return -1;
    677 	}
    678 
    679 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
    680 		error = errno;
    681 		if (noisy)
    682 			fprintf(stderr, "rump_sp: failed to read banner\n");
    683 		errno = error;
    684 		return -1;
    685 	}
    686 
    687 	if (banner[n-1] != '\n') {
    688 		if (noisy)
    689 			fprintf(stderr, "rump_sp: invalid banner\n");
    690 		errno = EINVAL;
    691 		return -1;
    692 	}
    693 	banner[n] = '\0';
    694 	/* parse the banner some day */
    695 
    696 	flags = host_fcntl(s, F_GETFL, 0);
    697 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    698 		if (noisy)
    699 			fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
    700 			    strerror(errno));
    701 		errno = EINVAL;
    702 		return -1;
    703 	}
    704 	clispc.spc_fd = s;
    705 	clispc.spc_state = SPCSTATE_RUNNING;
    706 	clispc.spc_reconnecting = 0;
    707 
    708 	/* setup kqueue, we want all signals and the fd */
    709 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    710 		error = errno;
    711 		if (noisy)
    712 			fprintf(stderr, "rump_sp: cannot setup kqueue");
    713 		errno = error;
    714 		return -1;
    715 	}
    716 
    717 	for (i = 0; i < NSIG; i++) {
    718 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    719 	}
    720 	EV_SET(&kev[NSIG], clispc.spc_fd,
    721 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    722 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    723 		error = errno;
    724 		if (noisy)
    725 			fprintf(stderr, "rump_sp: kevent() failed");
    726 		errno = error;
    727 		return -1;
    728 	}
    729 
    730 	return 0;
    731 }
    732 
    733 static int
    734 doinit(void)
    735 {
    736 
    737 	TAILQ_INIT(&clispc.spc_respwait);
    738 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    739 	pthread_cond_init(&clispc.spc_cv, NULL);
    740 
    741 	return 0;
    742 }
    743 
    744 void *rumpclient__dlsym(void *, const char *);
    745 void *rumphijack_dlsym(void *, const char *);
    746 void *
    747 rumpclient__dlsym(void *handle, const char *symbol)
    748 {
    749 
    750 	return dlsym(handle, symbol);
    751 }
    752 __weak_alias(rumphijack_dlsym,rumpclient__dlsym);
    753 
    754 static pid_t init_done = 0;
    755 
    756 int
    757 rumpclient_init()
    758 {
    759 	char *p;
    760 	int error;
    761 	int rv = -1;
    762 	int hstype;
    763 	pid_t mypid;
    764 
    765 	/*
    766 	 * Make sure we're not riding the context of a previous
    767 	 * host fork.  Note: it's *possible* that after n>1 forks
    768 	 * we have the same pid as one of our exited parents, but
    769 	 * I'm pretty sure there are 0 practical implications, since
    770 	 * it means generations would have to skip rumpclient init.
    771 	 */
    772 	if (init_done == (mypid = getpid()))
    773 		return 0;
    774 
    775 	/* kq does not traverse fork() */
    776 	if (init_done != 0)
    777 		kq = -1;
    778 	init_done = mypid;
    779 
    780 	sigfillset(&fullset);
    781 
    782 	/*
    783 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
    784 	 * wann wird man je verstehen?  wann wird man je verstehen?
    785 	 */
    786 #define FINDSYM2(_name_,_syscall_)					\
    787 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    788 	    #_syscall_)) == NULL) {					\
    789 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    790 			host_##_name_ = _name_; /* static fallback */	\
    791 		if (host_##_name_ == NULL)				\
    792 			errx(1, "cannot find %s: %s", #_syscall_,	\
    793 			    dlerror());					\
    794 	}
    795 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    796 	FINDSYM2(socket,__socket30)
    797 	FINDSYM(close)
    798 	FINDSYM(connect)
    799 	FINDSYM(fcntl)
    800 	FINDSYM(poll)
    801 	FINDSYM(read)
    802 	FINDSYM(sendmsg)
    803 	FINDSYM(setsockopt)
    804 	FINDSYM(dup)
    805 	FINDSYM(kqueue)
    806 	FINDSYM(execve)
    807 #if !__NetBSD_Prereq__(5,99,7)
    808 	FINDSYM(kevent)
    809 #else
    810 	FINDSYM2(kevent,_sys___kevent50)
    811 #endif
    812 #undef	FINDSYM
    813 #undef	FINDSY2
    814 
    815 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    816 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    817 			errno = ENOENT;
    818 			goto out;
    819 		}
    820 	}
    821 
    822 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    823 		errno = error;
    824 		goto out;
    825 	}
    826 
    827 	if (doinit() == -1)
    828 		goto out;
    829 
    830 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    831 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    832 		unsetenv("RUMPCLIENT__EXECFD");
    833 		hstype = HANDSHAKE_EXEC;
    834 	} else {
    835 		if (doconnect(true) == -1)
    836 			goto out;
    837 		hstype = HANDSHAKE_GUEST;
    838 	}
    839 
    840 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    841 	if (error) {
    842 		pthread_mutex_destroy(&clispc.spc_mtx);
    843 		pthread_cond_destroy(&clispc.spc_cv);
    844 		if (clispc.spc_fd != -1)
    845 			host_close(clispc.spc_fd);
    846 		errno = error;
    847 		goto out;
    848 	}
    849 	rv = 0;
    850 
    851  out:
    852 	if (rv == -1)
    853 		init_done = 0;
    854 	return rv;
    855 }
    856 
    857 struct rumpclient_fork {
    858 	uint32_t fork_auth[AUTHLEN];
    859 	struct spclient fork_spc;
    860 	int fork_kq;
    861 };
    862 
    863 struct rumpclient_fork *
    864 rumpclient_prefork(void)
    865 {
    866 	struct rumpclient_fork *rpf;
    867 	sigset_t omask;
    868 	void *resp;
    869 	int rv;
    870 
    871 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    872 	rpf = malloc(sizeof(*rpf));
    873 	if (rpf == NULL)
    874 		goto out;
    875 
    876 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    877 		free(rpf);
    878 		errno = rv;
    879 		rpf = NULL;
    880 		goto out;
    881 	}
    882 
    883 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    884 	free(resp);
    885 
    886 	rpf->fork_spc = clispc;
    887 	rpf->fork_kq = kq;
    888 
    889  out:
    890 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    891 	return rpf;
    892 }
    893 
    894 int
    895 rumpclient_fork_init(struct rumpclient_fork *rpf)
    896 {
    897 	int error;
    898 	int osock;
    899 
    900 	osock = clispc.spc_fd;
    901 	memset(&clispc, 0, sizeof(clispc));
    902 	clispc.spc_fd = osock;
    903 
    904 	kq = -1; /* kqueue descriptor is not copied over fork() */
    905 
    906 	if (doinit() == -1)
    907 		return -1;
    908 	if (doconnect(false) == -1)
    909 		return -1;
    910 
    911 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
    912 	    0, false);
    913 	if (error) {
    914 		pthread_mutex_destroy(&clispc.spc_mtx);
    915 		pthread_cond_destroy(&clispc.spc_cv);
    916 		errno = error;
    917 		return -1;
    918 	}
    919 
    920 	return 0;
    921 }
    922 
    923 /*ARGSUSED*/
    924 void
    925 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
    926 {
    927 
    928 	/* EUNIMPL */
    929 }
    930 
    931 void
    932 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
    933 {
    934 
    935 	clispc = rpf->fork_spc;
    936 	kq = rpf->fork_kq;
    937 }
    938 
    939 void
    940 rumpclient_setconnretry(time_t timeout)
    941 {
    942 
    943 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
    944 		return; /* gigo */
    945 
    946 	retrytimo = timeout;
    947 }
    948 
    949 int
    950 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
    951 {
    952 	int fd = *fdp;
    953 	int untilfd, rv;
    954 	int newfd;
    955 
    956 	switch (variant) {
    957 	case RUMPCLIENT_CLOSE_FCLOSEM:
    958 		untilfd = MAX(clispc.spc_fd, kq);
    959 		for (; fd <= untilfd; fd++) {
    960 			if (fd == clispc.spc_fd || fd == kq)
    961 				continue;
    962 			rv = host_close(fd);
    963 			if (rv == -1)
    964 				return -1;
    965 		}
    966 		*fdp = fd;
    967 		break;
    968 
    969 	case RUMPCLIENT_CLOSE_CLOSE:
    970 	case RUMPCLIENT_CLOSE_DUP2:
    971 		if (fd == clispc.spc_fd) {
    972 			struct kevent kev[2];
    973 
    974 			newfd = dupgood(clispc.spc_fd, 1);
    975 			if (newfd == -1)
    976 				return -1;
    977 			/*
    978 			 * now, we have a new socket number, so change
    979 			 * the file descriptor that kqueue is
    980 			 * monitoring.  remove old and add new.
    981 			 */
    982 			EV_SET(&kev[0], clispc.spc_fd,
    983 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
    984 			EV_SET(&kev[1], newfd,
    985 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    986 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
    987 				int sverrno = errno;
    988 				host_close(newfd);
    989 				errno = sverrno;
    990 				return -1;
    991 			}
    992 			clispc.spc_fd = newfd;
    993 		}
    994 		if (fd == kq) {
    995 			newfd = dupgood(kq, 1);
    996 			if (newfd == -1)
    997 				return -1;
    998 			kq = newfd;
    999 		}
   1000 		break;
   1001 	}
   1002 
   1003 	return 0;
   1004 }
   1005 
   1006 pid_t
   1007 rumpclient_fork()
   1008 {
   1009 
   1010 	return rumpclient__dofork(fork);
   1011 }
   1012 
   1013 /*
   1014  * Process is about to exec.  Save info about our existing connection
   1015  * in the env.  rumpclient will check for this info in init().
   1016  * This is mostly for the benefit of rumphijack, but regular applications
   1017  * may use it as well.
   1018  */
   1019 int
   1020 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1021 {
   1022 	char buf[4096];
   1023 	char **newenv;
   1024 	char *envstr, *envstr2;
   1025 	size_t nelem;
   1026 	int rv, sverrno;
   1027 
   1028 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1029 	    clispc.spc_fd, kq);
   1030 	envstr = malloc(strlen(buf)+1);
   1031 	if (envstr == NULL) {
   1032 		return ENOMEM;
   1033 	}
   1034 	strcpy(envstr, buf);
   1035 
   1036 	/* do we have a fully parsed url we want to forward in the env? */
   1037 	if (*parsedurl != '\0') {
   1038 		snprintf(buf, sizeof(buf),
   1039 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1040 		envstr2 = malloc(strlen(buf)+1);
   1041 		if (envstr2 == NULL) {
   1042 			free(envstr);
   1043 			return ENOMEM;
   1044 		}
   1045 		strcpy(envstr2, buf);
   1046 	} else {
   1047 		envstr2 = NULL;
   1048 	}
   1049 
   1050 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1051 		continue;
   1052 
   1053 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1054 	if (newenv == NULL) {
   1055 		free(envstr2);
   1056 		free(envstr);
   1057 		return ENOMEM;
   1058 	}
   1059 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1060 
   1061 	newenv[nelem] = envstr;
   1062 	newenv[nelem+1] = envstr2;
   1063 	newenv[nelem+2] = NULL;
   1064 
   1065 	rv = host_execve(path, argv, newenv);
   1066 
   1067 	_DIAGASSERT(rv != 0);
   1068 	sverrno = errno;
   1069 	free(envstr2);
   1070 	free(envstr);
   1071 	free(newenv);
   1072 	errno = sverrno;
   1073 	return rv;
   1074 }
   1075 
   1076 int
   1077 rumpclient_daemon(int nochdir, int noclose)
   1078 {
   1079 	struct rumpclient_fork *rf;
   1080 	int sverrno;
   1081 
   1082 	if ((rf = rumpclient_prefork()) == NULL)
   1083 		return -1;
   1084 
   1085 	if (daemon(nochdir, noclose) == -1) {
   1086 		sverrno = errno;
   1087 		rumpclient_fork_cancel(rf);
   1088 		errno = sverrno;
   1089 		return -1;
   1090 	}
   1091 
   1092 	if (rumpclient_fork_init(rf) == -1)
   1093 		return -1;
   1094 
   1095 	return 0;
   1096 }
   1097