Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.44
      1 /*      $NetBSD: rumpclient.c,v 1.44 2011/03/15 09:35:05 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __RCSID("$NetBSD: rumpclient.c,v 1.44 2011/03/15 09:35:05 pooka Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/event.h>
     37 #include <sys/mman.h>
     38 #include <sys/socket.h>
     39 
     40 #include <arpa/inet.h>
     41 #include <netinet/in.h>
     42 #include <netinet/tcp.h>
     43 
     44 #include <assert.h>
     45 #include <dlfcn.h>
     46 #include <err.h>
     47 #include <errno.h>
     48 #include <fcntl.h>
     49 #include <link.h>
     50 #include <poll.h>
     51 #include <pthread.h>
     52 #include <signal.h>
     53 #include <stdarg.h>
     54 #include <stdbool.h>
     55 #include <stdio.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include <unistd.h>
     59 
     60 #include <rump/rumpclient.h>
     61 
     62 #define HOSTOPS
     63 int	(*host_socket)(int, int, int);
     64 int	(*host_close)(int);
     65 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     66 int	(*host_fcntl)(int, int, ...);
     67 int	(*host_poll)(struct pollfd *, nfds_t, int);
     68 ssize_t	(*host_read)(int, void *, size_t);
     69 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     70 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     71 int	(*host_dup)(int);
     72 
     73 int	(*host_kqueue)(void);
     74 int	(*host_kevent)(int, const struct kevent *, size_t,
     75 		       struct kevent *, size_t, const struct timespec *);
     76 
     77 int	(*host_execve)(const char *, char *const[], char *const[]);
     78 
     79 #include "sp_common.c"
     80 
     81 static struct spclient clispc = {
     82 	.spc_fd = -1,
     83 };
     84 
     85 static int kq = -1;
     86 static sigset_t fullset;
     87 
     88 static int doconnect(void);
     89 static int handshake_req(struct spclient *, int, void *, int, bool);
     90 
     91 /*
     92  * Default: don't retry.  Most clients can't handle it
     93  * (consider e.g. fds suddenly going missing).
     94  */
     95 static time_t retrytimo = 0;
     96 
     97 /* always defined to nothingness for now */
     98 #define ERRLOG(a)
     99 
    100 static int
    101 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
    102 {
    103 	struct timeval starttime, curtime;
    104 	time_t prevreconmsg;
    105 	unsigned reconretries;
    106 	int rv;
    107 
    108 	for (prevreconmsg = 0, reconretries = 0;;) {
    109 		rv = dosend(spc, iov, iovlen);
    110 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    111 			/* no persistent connections */
    112 			if (retrytimo == 0) {
    113 				rv = ENOTCONN;
    114 				break;
    115 			}
    116 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    117 				_exit(1);
    118 
    119 			if (!prevreconmsg) {
    120 				prevreconmsg = time(NULL);
    121 				gettimeofday(&starttime, NULL);
    122 			}
    123 			if (reconretries == 1) {
    124 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    125 					rv = ENOTCONN;
    126 					break;
    127 				}
    128 				fprintf(stderr, "rump_sp: connection to "
    129 				    "kernel lost, trying to reconnect ...\n");
    130 			} else if (time(NULL) - prevreconmsg > 120) {
    131 				fprintf(stderr, "rump_sp: still trying to "
    132 				    "reconnect ...\n");
    133 				prevreconmsg = time(NULL);
    134 			}
    135 
    136 			/* check that we aren't over the limit */
    137 			if (retrytimo > 0) {
    138 				struct timeval tmp;
    139 
    140 				gettimeofday(&curtime, NULL);
    141 				timersub(&curtime, &starttime, &tmp);
    142 				if (tmp.tv_sec >= retrytimo) {
    143 					fprintf(stderr, "rump_sp: reconnect "
    144 					    "failed, %lld second timeout\n",
    145 					    (long long)retrytimo);
    146 					return ENOTCONN;
    147 				}
    148 			}
    149 
    150 			/* adhoc backoff timer */
    151 			if (reconretries < 10) {
    152 				usleep(100000 * reconretries);
    153 			} else {
    154 				sleep(MIN(10, reconretries-9));
    155 			}
    156 			reconretries++;
    157 
    158 			if ((rv = doconnect()) != 0)
    159 				continue;
    160 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    161 			    NULL, 0, true)) != 0)
    162 				continue;
    163 
    164 			/*
    165 			 * ok, reconnect succesful.  we need to return to
    166 			 * the upper layer to get the entire PDU resent.
    167 			 */
    168 			if (reconretries != 1)
    169 				fprintf(stderr, "rump_sp: reconnected!\n");
    170 			rv = EAGAIN;
    171 			break;
    172 		} else {
    173 			_DIAGASSERT(errno != EAGAIN);
    174 			break;
    175 		}
    176 	}
    177 
    178 	return rv;
    179 }
    180 
    181 static int
    182 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    183 	bool keeplock)
    184 {
    185 	uint64_t mygen;
    186 	bool imalive = true;
    187 
    188 	pthread_mutex_lock(&spc->spc_mtx);
    189 	if (!keeplock)
    190 		sendunlockl(spc);
    191 	mygen = spc->spc_generation;
    192 
    193 	rw->rw_error = 0;
    194 	while (!rw->rw_done && rw->rw_error == 0) {
    195 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    196 			break;
    197 
    198 		/* are we free to receive? */
    199 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    200 			struct kevent kev[8];
    201 			int gotresp, dosig, rv, i;
    202 
    203 			spc->spc_istatus = SPCSTATUS_BUSY;
    204 			pthread_mutex_unlock(&spc->spc_mtx);
    205 
    206 			dosig = 0;
    207 			for (gotresp = 0; !gotresp; ) {
    208 				/*
    209 				 * typically we don't have a frame waiting
    210 				 * when we come in here, so call kevent now
    211 				 */
    212 				rv = host_kevent(kq, NULL, 0,
    213 				    kev, __arraycount(kev), NULL);
    214 
    215 				if (__predict_false(rv == -1)) {
    216 					goto activity;
    217 				}
    218 
    219 				/*
    220 				 * XXX: don't know how this can happen
    221 				 * (timeout cannot expire since there
    222 				 * isn't one), but it does happen.
    223 				 * treat it as an expectional condition
    224 				 * and go through tryread to determine
    225 				 * alive status.
    226 				 */
    227 				if (__predict_false(rv == 0))
    228 					goto activity;
    229 
    230 				for (i = 0; i < rv; i++) {
    231 					if (kev[i].filter == EVFILT_SIGNAL)
    232 						dosig++;
    233 				}
    234 				if (dosig)
    235 					goto cleanup;
    236 
    237 				/*
    238 				 * ok, activity.  try to read a frame to
    239 				 * determine what happens next.
    240 				 */
    241  activity:
    242 				switch (readframe(spc)) {
    243 				case 0:
    244 					continue;
    245 				case -1:
    246 					imalive = false;
    247 					goto cleanup;
    248 				default:
    249 					/* case 1 */
    250 					break;
    251 				}
    252 
    253 				switch (spc->spc_hdr.rsp_class) {
    254 				case RUMPSP_RESP:
    255 				case RUMPSP_ERROR:
    256 					kickwaiter(spc);
    257 					gotresp = spc->spc_hdr.rsp_reqno ==
    258 					    rw->rw_reqno;
    259 					break;
    260 				case RUMPSP_REQ:
    261 					handlereq(spc);
    262 					break;
    263 				default:
    264 					/* panic */
    265 					break;
    266 				}
    267 			}
    268 
    269  cleanup:
    270 			pthread_mutex_lock(&spc->spc_mtx);
    271 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    272 				kickall(spc);
    273 			spc->spc_istatus = SPCSTATUS_FREE;
    274 
    275 			/* take one for the team */
    276 			if (dosig) {
    277 				pthread_mutex_unlock(&spc->spc_mtx);
    278 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    279 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    280 				pthread_mutex_lock(&spc->spc_mtx);
    281 			}
    282 		} else {
    283 			spc->spc_istatus = SPCSTATUS_WANTED;
    284 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    285 		}
    286 	}
    287 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    288 	pthread_mutex_unlock(&spc->spc_mtx);
    289 	pthread_cond_destroy(&rw->rw_cv);
    290 
    291 	if (spc->spc_generation != mygen || !imalive) {
    292 		return ENOTCONN;
    293 	}
    294 	return rw->rw_error;
    295 }
    296 
    297 static int
    298 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    299 	const void *data, size_t dlen, void **resp)
    300 {
    301 	struct rsp_hdr rhdr;
    302 	struct respwait rw;
    303 	struct iovec iov[2];
    304 	int rv;
    305 
    306 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    307 	rhdr.rsp_class = RUMPSP_REQ;
    308 	rhdr.rsp_type = RUMPSP_SYSCALL;
    309 	rhdr.rsp_sysnum = sysnum;
    310 
    311 	IOVPUT(iov[0], rhdr);
    312 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    313 
    314 	do {
    315 		putwait(spc, &rw, &rhdr);
    316 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    317 			unputwait(spc, &rw);
    318 			continue;
    319 		}
    320 
    321 		rv = cliwaitresp(spc, &rw, omask, false);
    322 		if (rv == ENOTCONN)
    323 			rv = EAGAIN;
    324 	} while (rv == EAGAIN);
    325 
    326 	*resp = rw.rw_data;
    327 	return rv;
    328 }
    329 
    330 static int
    331 handshake_req(struct spclient *spc, int type, void *data,
    332 	int cancel, bool haslock)
    333 {
    334 	struct handshake_fork rf;
    335 	const char *myprogname = NULL; /* XXXgcc */
    336 	struct rsp_hdr rhdr;
    337 	struct respwait rw;
    338 	sigset_t omask;
    339 	size_t bonus;
    340 	struct iovec iov[2];
    341 	int rv;
    342 
    343 	if (type == HANDSHAKE_FORK) {
    344 		bonus = sizeof(rf);
    345 	} else {
    346 		myprogname = getprogname();
    347 		bonus = strlen(myprogname)+1;
    348 	}
    349 
    350 	/* performs server handshake */
    351 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    352 	rhdr.rsp_class = RUMPSP_REQ;
    353 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    354 	rhdr.rsp_handshake = type;
    355 
    356 	IOVPUT(iov[0], rhdr);
    357 
    358 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    359 	if (haslock)
    360 		putwait_locked(spc, &rw, &rhdr);
    361 	else
    362 		putwait(spc, &rw, &rhdr);
    363 	if (type == HANDSHAKE_FORK) {
    364 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    365 		rf.rf_cancel = cancel;
    366 		IOVPUT(iov[1], rf);
    367 	} else {
    368 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
    369 	}
    370 	rv = send_with_recon(spc, iov, __arraycount(iov));
    371 	if (rv || cancel) {
    372 		if (haslock)
    373 			unputwait_locked(spc, &rw);
    374 		else
    375 			unputwait(spc, &rw);
    376 		if (cancel) {
    377 			goto out;
    378 		}
    379 	} else {
    380 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    381 	}
    382 	if (rv)
    383 		goto out;
    384 
    385 	rv = *(int *)rw.rw_data;
    386 	free(rw.rw_data);
    387 
    388  out:
    389 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    390 	return rv;
    391 }
    392 
    393 static int
    394 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    395 {
    396 	struct rsp_hdr rhdr;
    397 	struct respwait rw;
    398 	struct iovec iov[1];
    399 	int rv;
    400 
    401 	rhdr.rsp_len = sizeof(rhdr);
    402 	rhdr.rsp_class = RUMPSP_REQ;
    403 	rhdr.rsp_type = RUMPSP_PREFORK;
    404 	rhdr.rsp_error = 0;
    405 
    406 	IOVPUT(iov[0], rhdr);
    407 
    408 	do {
    409 		putwait(spc, &rw, &rhdr);
    410 		rv = send_with_recon(spc, iov, __arraycount(iov));
    411 		if (rv != 0) {
    412 			unputwait(spc, &rw);
    413 			continue;
    414 		}
    415 
    416 		rv = cliwaitresp(spc, &rw, omask, false);
    417 		if (rv == ENOTCONN)
    418 			rv = EAGAIN;
    419 	} while (rv == EAGAIN);
    420 
    421 	*resp = rw.rw_data;
    422 	return rv;
    423 }
    424 
    425 /*
    426  * prevent response code from deadlocking with reconnect code
    427  */
    428 static int
    429 resp_sendlock(struct spclient *spc)
    430 {
    431 	int rv = 0;
    432 
    433 	pthread_mutex_lock(&spc->spc_mtx);
    434 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    435 		if (__predict_false(spc->spc_reconnecting)) {
    436 			rv = EBUSY;
    437 			goto out;
    438 		}
    439 		spc->spc_ostatus = SPCSTATUS_WANTED;
    440 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    441 	}
    442 	spc->spc_ostatus = SPCSTATUS_BUSY;
    443 
    444  out:
    445 	pthread_mutex_unlock(&spc->spc_mtx);
    446 	return rv;
    447 }
    448 
    449 static void
    450 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    451 	int wantstr)
    452 {
    453 	struct rsp_hdr rhdr;
    454 	struct iovec iov[2];
    455 
    456 	if (wantstr)
    457 		dlen = MIN(dlen, strlen(data)+1);
    458 
    459 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    460 	rhdr.rsp_reqno = reqno;
    461 	rhdr.rsp_class = RUMPSP_RESP;
    462 	rhdr.rsp_type = RUMPSP_COPYIN;
    463 	rhdr.rsp_sysnum = 0;
    464 
    465 	IOVPUT(iov[0], rhdr);
    466 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    467 
    468 	if (resp_sendlock(spc) != 0)
    469 		return;
    470 	(void)SENDIOV(spc, iov);
    471 	sendunlock(spc);
    472 }
    473 
    474 static void
    475 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    476 {
    477 	struct rsp_hdr rhdr;
    478 	struct iovec iov[2];
    479 
    480 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    481 	rhdr.rsp_reqno = reqno;
    482 	rhdr.rsp_class = RUMPSP_RESP;
    483 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    484 	rhdr.rsp_sysnum = 0;
    485 
    486 	IOVPUT(iov[0], rhdr);
    487 	IOVPUT(iov[1], addr);
    488 
    489 	if (resp_sendlock(spc) != 0)
    490 		return;
    491 	(void)SENDIOV(spc, iov);
    492 	sendunlock(spc);
    493 }
    494 
    495 int
    496 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    497 	register_t *retval)
    498 {
    499 	struct rsp_sysresp *resp;
    500 	sigset_t omask;
    501 	void *rdata;
    502 	int rv;
    503 
    504 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    505 
    506 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    507 	    sysnum, data, dlen));
    508 
    509 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    510 	if (rv)
    511 		goto out;
    512 
    513 	resp = rdata;
    514 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    515 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    516 
    517 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    518 	rv = resp->rsys_error;
    519 	free(rdata);
    520 
    521  out:
    522 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    523 	return rv;
    524 }
    525 
    526 static void
    527 handlereq(struct spclient *spc)
    528 {
    529 	struct rsp_copydata *copydata;
    530 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    531 	void *mapaddr;
    532 	size_t maplen;
    533 	int reqtype = spc->spc_hdr.rsp_type;
    534 
    535 	switch (reqtype) {
    536 	case RUMPSP_COPYIN:
    537 	case RUMPSP_COPYINSTR:
    538 		/*LINTED*/
    539 		copydata = (struct rsp_copydata *)spc->spc_buf;
    540 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    541 		    copydata->rcp_addr, copydata->rcp_len));
    542 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    543 		    copydata->rcp_addr, copydata->rcp_len,
    544 		    reqtype == RUMPSP_COPYINSTR);
    545 		break;
    546 	case RUMPSP_COPYOUT:
    547 	case RUMPSP_COPYOUTSTR:
    548 		/*LINTED*/
    549 		copydata = (struct rsp_copydata *)spc->spc_buf;
    550 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    551 		    copydata->rcp_addr, copydata->rcp_len));
    552 		/*LINTED*/
    553 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    554 		    copydata->rcp_len);
    555 		break;
    556 	case RUMPSP_ANONMMAP:
    557 		/*LINTED*/
    558 		maplen = *(size_t *)spc->spc_buf;
    559 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    560 		    MAP_ANON, -1, 0);
    561 		if (mapaddr == MAP_FAILED)
    562 			mapaddr = NULL;
    563 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    564 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    565 		break;
    566 	case RUMPSP_RAISE:
    567 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    568 		raise((int)rhdr->rsp_signo);
    569 		/*
    570 		 * We most likely have signals blocked, but the signal
    571 		 * will be handled soon enough when we return.
    572 		 */
    573 		break;
    574 	default:
    575 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    576 		abort();
    577 		break;
    578 	}
    579 
    580 	spcfreebuf(spc);
    581 }
    582 
    583 static unsigned ptab_idx;
    584 static struct sockaddr *serv_sa;
    585 
    586 /* dup until we get a "good" fd which does not collide with stdio */
    587 static int
    588 dupgood(int myfd, int mustchange)
    589 {
    590 	int ofds[4];
    591 	int i;
    592 
    593 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    594 		assert(i < __arraycount(ofds));
    595 		ofds[i] = myfd;
    596 		myfd = host_dup(myfd);
    597 		if (mustchange) {
    598 			i--; /* prevent closing old fd */
    599 			mustchange = 0;
    600 		}
    601 	}
    602 
    603 	for (i--; i >= 0; i--) {
    604 		host_close(ofds[i]);
    605 	}
    606 
    607 	return myfd;
    608 }
    609 
    610 static int
    611 doconnect(void)
    612 {
    613 	struct respwait rw;
    614 	struct rsp_hdr rhdr;
    615 	struct kevent kev[NSIG+1];
    616 	char banner[MAXBANNER];
    617 	struct pollfd pfd;
    618 	int s, error, flags, i;
    619 	ssize_t n;
    620 
    621 	if (kq != -1)
    622 		host_close(kq);
    623 	kq = -1;
    624 	s = -1;
    625 
    626 	if (clispc.spc_fd != -1)
    627 		host_close(clispc.spc_fd);
    628 	clispc.spc_fd = -1;
    629 
    630 	/*
    631 	 * for reconnect, gate everyone out of the receiver code
    632 	 */
    633 	putwait_locked(&clispc, &rw, &rhdr);
    634 
    635 	pthread_mutex_lock(&clispc.spc_mtx);
    636 	clispc.spc_reconnecting = 1;
    637 	pthread_cond_broadcast(&clispc.spc_cv);
    638 	clispc.spc_generation++;
    639 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    640 		clispc.spc_istatus = SPCSTATUS_WANTED;
    641 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    642 	}
    643 	kickall(&clispc);
    644 
    645 	/*
    646 	 * we can release it already since we hold the
    647 	 * send lock during reconnect
    648 	 * XXX: assert it
    649 	 */
    650 	clispc.spc_istatus = SPCSTATUS_FREE;
    651 	pthread_mutex_unlock(&clispc.spc_mtx);
    652 	unputwait_locked(&clispc, &rw);
    653 
    654 	free(clispc.spc_buf);
    655 	clispc.spc_off = 0;
    656 
    657 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    658 	if (s == -1)
    659 		return -1;
    660 
    661 	pfd.fd = s;
    662 	pfd.events = POLLIN;
    663 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
    664 		if (errno == EINTR)
    665 			continue;
    666 		ERRLOG(("rump_sp: client connect failed: %s\n",
    667 		    strerror(errno)));
    668 		return -1;
    669 	}
    670 
    671 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    672 		ERRLOG(("rump_sp: connect hook failed\n"));
    673 		return -1;
    674 	}
    675 
    676 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
    677 		ERRLOG(("rump_sp: failed to read banner\n"));
    678 		return -1;
    679 	}
    680 
    681 	if (banner[n-1] != '\n') {
    682 		ERRLOG(("rump_sp: invalid banner\n"));
    683 		return -1;
    684 	}
    685 	banner[n] = '\0';
    686 	/* parse the banner some day */
    687 
    688 	flags = host_fcntl(s, F_GETFL, 0);
    689 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    690 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
    691 		return -1;
    692 	}
    693 	clispc.spc_fd = s;
    694 	clispc.spc_state = SPCSTATE_RUNNING;
    695 	clispc.spc_reconnecting = 0;
    696 
    697 	/* setup kqueue, we want all signals and the fd */
    698 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    699 		ERRLOG(("rump_sp: cannot setup kqueue"));
    700 		return -1;
    701 	}
    702 
    703 	for (i = 0; i < NSIG; i++) {
    704 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    705 	}
    706 	EV_SET(&kev[NSIG], clispc.spc_fd,
    707 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    708 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    709 		ERRLOG(("rump_sp: kevent() failed"));
    710 		return -1;
    711 	}
    712 
    713 	return 0;
    714 }
    715 
    716 static int
    717 doinit(void)
    718 {
    719 
    720 	TAILQ_INIT(&clispc.spc_respwait);
    721 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    722 	pthread_cond_init(&clispc.spc_cv, NULL);
    723 
    724 	return 0;
    725 }
    726 
    727 void *rumpclient__dlsym(void *, const char *);
    728 void *rumphijack_dlsym(void *, const char *);
    729 void *
    730 rumpclient__dlsym(void *handle, const char *symbol)
    731 {
    732 
    733 	return dlsym(handle, symbol);
    734 }
    735 __weak_alias(rumphijack_dlsym,rumpclient__dlsym);
    736 
    737 static pid_t init_done = 0;
    738 
    739 int
    740 rumpclient_init()
    741 {
    742 	char *p;
    743 	int error;
    744 	int rv = -1;
    745 	int hstype;
    746 	pid_t mypid;
    747 
    748 	/*
    749 	 * Make sure we're not riding the context of a previous
    750 	 * host fork.  Note: it's *possible* that after n>1 forks
    751 	 * we have the same pid as one of our exited parents, but
    752 	 * I'm pretty sure there are 0 practical implications, since
    753 	 * it means generations would have to skip rumpclient init.
    754 	 */
    755 	if (init_done == (mypid = getpid()))
    756 		return 0;
    757 
    758 	/* kq does not traverse fork() */
    759 	if (init_done != 0)
    760 		kq = -1;
    761 	init_done = mypid;
    762 
    763 	sigfillset(&fullset);
    764 
    765 	/*
    766 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
    767 	 * wann wird man je verstehen?  wann wird man je verstehen?
    768 	 */
    769 #define FINDSYM2(_name_,_syscall_)					\
    770 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    771 	    #_syscall_)) == NULL) {					\
    772 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    773 			host_##_name_ = _name_; /* static fallback */	\
    774 		if (host_##_name_ == NULL)				\
    775 			errx(1, "cannot find %s: %s", #_syscall_,	\
    776 			    dlerror());					\
    777 	}
    778 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    779 	FINDSYM2(socket,__socket30)
    780 	FINDSYM(close)
    781 	FINDSYM(connect)
    782 	FINDSYM(fcntl)
    783 	FINDSYM(poll)
    784 	FINDSYM(read)
    785 	FINDSYM(sendmsg)
    786 	FINDSYM(setsockopt)
    787 	FINDSYM(dup)
    788 	FINDSYM(kqueue)
    789 	FINDSYM(execve)
    790 #if !__NetBSD_Prereq__(5,99,7)
    791 	FINDSYM(kevent)
    792 #else
    793 	FINDSYM2(kevent,_sys___kevent50)
    794 #endif
    795 #undef	FINDSYM
    796 #undef	FINDSY2
    797 
    798 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    799 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    800 			errno = ENOENT;
    801 			goto out;
    802 		}
    803 	}
    804 
    805 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    806 		errno = error;
    807 		goto out;
    808 	}
    809 
    810 	if (doinit() == -1)
    811 		goto out;
    812 
    813 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    814 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    815 		unsetenv("RUMPCLIENT__EXECFD");
    816 		hstype = HANDSHAKE_EXEC;
    817 	} else {
    818 		if (doconnect() == -1)
    819 			goto out;
    820 		hstype = HANDSHAKE_GUEST;
    821 	}
    822 
    823 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    824 	if (error) {
    825 		pthread_mutex_destroy(&clispc.spc_mtx);
    826 		pthread_cond_destroy(&clispc.spc_cv);
    827 		if (clispc.spc_fd != -1)
    828 			host_close(clispc.spc_fd);
    829 		errno = error;
    830 		goto out;
    831 	}
    832 	rv = 0;
    833 
    834  out:
    835 	if (rv == -1)
    836 		init_done = 0;
    837 	return rv;
    838 }
    839 
    840 struct rumpclient_fork {
    841 	uint32_t fork_auth[AUTHLEN];
    842 	struct spclient fork_spc;
    843 	int fork_kq;
    844 };
    845 
    846 struct rumpclient_fork *
    847 rumpclient_prefork(void)
    848 {
    849 	struct rumpclient_fork *rpf;
    850 	sigset_t omask;
    851 	void *resp;
    852 	int rv;
    853 
    854 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    855 	rpf = malloc(sizeof(*rpf));
    856 	if (rpf == NULL)
    857 		goto out;
    858 
    859 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    860 		free(rpf);
    861 		errno = rv;
    862 		rpf = NULL;
    863 		goto out;
    864 	}
    865 
    866 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    867 	free(resp);
    868 
    869 	rpf->fork_spc = clispc;
    870 	rpf->fork_kq = kq;
    871 
    872  out:
    873 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    874 	return rpf;
    875 }
    876 
    877 int
    878 rumpclient_fork_init(struct rumpclient_fork *rpf)
    879 {
    880 	int error;
    881 	int osock;
    882 
    883 	osock = clispc.spc_fd;
    884 	memset(&clispc, 0, sizeof(clispc));
    885 	clispc.spc_fd = osock;
    886 
    887 	kq = -1; /* kqueue descriptor is not copied over fork() */
    888 
    889 	if (doinit() == -1)
    890 		return -1;
    891 	if (doconnect() == -1)
    892 		return -1;
    893 
    894 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
    895 	    0, false);
    896 	if (error) {
    897 		pthread_mutex_destroy(&clispc.spc_mtx);
    898 		pthread_cond_destroy(&clispc.spc_cv);
    899 		errno = error;
    900 		return -1;
    901 	}
    902 
    903 	return 0;
    904 }
    905 
    906 /*ARGSUSED*/
    907 void
    908 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
    909 {
    910 
    911 	/* EUNIMPL */
    912 }
    913 
    914 void
    915 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
    916 {
    917 
    918 	clispc = rpf->fork_spc;
    919 	kq = rpf->fork_kq;
    920 }
    921 
    922 void
    923 rumpclient_setconnretry(time_t timeout)
    924 {
    925 
    926 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
    927 		return; /* gigo */
    928 
    929 	retrytimo = timeout;
    930 }
    931 
    932 int
    933 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
    934 {
    935 	int fd = *fdp;
    936 	int untilfd, rv;
    937 	int newfd;
    938 
    939 	switch (variant) {
    940 	case RUMPCLIENT_CLOSE_FCLOSEM:
    941 		untilfd = MAX(clispc.spc_fd, kq);
    942 		for (; fd <= untilfd; fd++) {
    943 			if (fd == clispc.spc_fd || fd == kq)
    944 				continue;
    945 			rv = host_close(fd);
    946 			if (rv == -1)
    947 				return -1;
    948 		}
    949 		*fdp = fd;
    950 		break;
    951 
    952 	case RUMPCLIENT_CLOSE_CLOSE:
    953 	case RUMPCLIENT_CLOSE_DUP2:
    954 		if (fd == clispc.spc_fd) {
    955 			struct kevent kev[2];
    956 
    957 			newfd = dupgood(clispc.spc_fd, 1);
    958 			if (newfd == -1)
    959 				return -1;
    960 			/*
    961 			 * now, we have a new socket number, so change
    962 			 * the file descriptor that kqueue is
    963 			 * monitoring.  remove old and add new.
    964 			 */
    965 			EV_SET(&kev[0], clispc.spc_fd,
    966 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
    967 			EV_SET(&kev[1], newfd,
    968 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    969 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
    970 				int sverrno = errno;
    971 				host_close(newfd);
    972 				errno = sverrno;
    973 				return -1;
    974 			}
    975 			clispc.spc_fd = newfd;
    976 		}
    977 		if (fd == kq) {
    978 			newfd = dupgood(kq, 1);
    979 			if (newfd == -1)
    980 				return -1;
    981 			kq = newfd;
    982 		}
    983 		break;
    984 	}
    985 
    986 	return 0;
    987 }
    988 
    989 pid_t
    990 rumpclient_fork()
    991 {
    992 
    993 	return rumpclient__dofork(fork);
    994 }
    995 
    996 /*
    997  * Process is about to exec.  Save info about our existing connection
    998  * in the env.  rumpclient will check for this info in init().
    999  * This is mostly for the benefit of rumphijack, but regular applications
   1000  * may use it as well.
   1001  */
   1002 int
   1003 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1004 {
   1005 	char buf[4096];
   1006 	char **newenv;
   1007 	char *envstr, *envstr2;
   1008 	size_t nelem;
   1009 	int rv, sverrno;
   1010 
   1011 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1012 	    clispc.spc_fd, kq);
   1013 	envstr = malloc(strlen(buf)+1);
   1014 	if (envstr == NULL) {
   1015 		return ENOMEM;
   1016 	}
   1017 	strcpy(envstr, buf);
   1018 
   1019 	/* do we have a fully parsed url we want to forward in the env? */
   1020 	if (*parsedurl != '\0') {
   1021 		snprintf(buf, sizeof(buf),
   1022 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1023 		envstr2 = malloc(strlen(buf)+1);
   1024 		if (envstr2 == NULL) {
   1025 			free(envstr);
   1026 			return ENOMEM;
   1027 		}
   1028 		strcpy(envstr2, buf);
   1029 	} else {
   1030 		envstr2 = NULL;
   1031 	}
   1032 
   1033 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1034 		continue;
   1035 
   1036 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1037 	if (newenv == NULL) {
   1038 		free(envstr2);
   1039 		free(envstr);
   1040 		return ENOMEM;
   1041 	}
   1042 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1043 
   1044 	newenv[nelem] = envstr;
   1045 	newenv[nelem+1] = envstr2;
   1046 	newenv[nelem+2] = NULL;
   1047 
   1048 	rv = host_execve(path, argv, newenv);
   1049 
   1050 	_DIAGASSERT(rv != 0);
   1051 	sverrno = errno;
   1052 	free(envstr2);
   1053 	free(envstr);
   1054 	free(newenv);
   1055 	errno = sverrno;
   1056 	return rv;
   1057 }
   1058 
   1059 int
   1060 rumpclient_daemon(int nochdir, int noclose)
   1061 {
   1062 	struct rumpclient_fork *rf;
   1063 	int sverrno;
   1064 
   1065 	if ((rf = rumpclient_prefork()) == NULL)
   1066 		return -1;
   1067 
   1068 	if (daemon(nochdir, noclose) == -1) {
   1069 		sverrno = errno;
   1070 		rumpclient_fork_cancel(rf);
   1071 		errno = sverrno;
   1072 		return -1;
   1073 	}
   1074 
   1075 	if (rumpclient_fork_init(rf) == -1)
   1076 		return -1;
   1077 
   1078 	return 0;
   1079 }
   1080