Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.33
      1 /*      $NetBSD: rumpclient.c,v 1.33 2011/02/18 16:22:10 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __RCSID("$NetBSD");
     34 
     35 #include <sys/param.h>
     36 #include <sys/event.h>
     37 #include <sys/mman.h>
     38 #include <sys/socket.h>
     39 
     40 #include <arpa/inet.h>
     41 #include <netinet/in.h>
     42 #include <netinet/tcp.h>
     43 
     44 #include <assert.h>
     45 #include <dlfcn.h>
     46 #include <errno.h>
     47 #include <fcntl.h>
     48 #include <link.h>
     49 #include <poll.h>
     50 #include <pthread.h>
     51 #include <signal.h>
     52 #include <stdarg.h>
     53 #include <stdbool.h>
     54 #include <stdio.h>
     55 #include <stdlib.h>
     56 #include <string.h>
     57 #include <unistd.h>
     58 
     59 #include <rump/rumpclient.h>
     60 
     61 #define HOSTOPS
     62 int	(*host_socket)(int, int, int);
     63 int	(*host_close)(int);
     64 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     65 int	(*host_fcntl)(int, int, ...);
     66 int	(*host_poll)(struct pollfd *, nfds_t, int);
     67 ssize_t	(*host_read)(int, void *, size_t);
     68 ssize_t (*host_sendto)(int, const void *, size_t, int,
     69 		       const struct sockaddr *, socklen_t);
     70 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     71 int	(*host_dup)(int);
     72 
     73 int	(*host_kqueue)(void);
     74 int	(*host_kevent)(int, const struct kevent *, size_t,
     75 		       struct kevent *, size_t, const struct timespec *);
     76 
     77 int	(*host_execve)(const char *, char *const[], char *const[]);
     78 
     79 #include "sp_common.c"
     80 
     81 static struct spclient clispc = {
     82 	.spc_fd = -1,
     83 };
     84 
     85 static int kq = -1;
     86 static sigset_t fullset;
     87 
     88 static int doconnect(bool);
     89 static int handshake_req(struct spclient *, int, void *, int, bool);
     90 
     91 /*
     92  * Default: don't retry.  Most clients can't handle it
     93  * (consider e.g. fds suddenly going missing).
     94  */
     95 static time_t retrytimo = 0;
     96 
     97 static int
     98 send_with_recon(struct spclient *spc, const void *data, size_t dlen)
     99 {
    100 	struct timeval starttime, curtime;
    101 	time_t prevreconmsg;
    102 	unsigned reconretries;
    103 	int rv;
    104 
    105 	for (prevreconmsg = 0, reconretries = 0;;) {
    106 		rv = dosend(spc, data, dlen);
    107 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    108 			/* no persistent connections */
    109 			if (retrytimo == 0) {
    110 				rv = ENOTCONN;
    111 				break;
    112 			}
    113 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    114 				exit(1);
    115 
    116 			if (!prevreconmsg) {
    117 				prevreconmsg = time(NULL);
    118 				gettimeofday(&starttime, NULL);
    119 			}
    120 			if (reconretries == 1) {
    121 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    122 					rv = ENOTCONN;
    123 					break;
    124 				}
    125 				fprintf(stderr, "rump_sp: connection to "
    126 				    "kernel lost, trying to reconnect ...\n");
    127 			} else if (time(NULL) - prevreconmsg > 120) {
    128 				fprintf(stderr, "rump_sp: still trying to "
    129 				    "reconnect ...\n");
    130 				prevreconmsg = time(NULL);
    131 			}
    132 
    133 			/* check that we aren't over the limit */
    134 			if (retrytimo > 0) {
    135 				struct timeval tmp;
    136 
    137 				gettimeofday(&curtime, NULL);
    138 				timersub(&curtime, &starttime, &tmp);
    139 				if (tmp.tv_sec >= retrytimo) {
    140 					fprintf(stderr, "rump_sp: reconnect "
    141 					    "failed, %lld second timeout\n",
    142 					    (long long)retrytimo);
    143 					return ENOTCONN;
    144 				}
    145 			}
    146 
    147 			/* adhoc backoff timer */
    148 			if (reconretries < 10) {
    149 				usleep(100000 * reconretries);
    150 			} else {
    151 				sleep(MIN(10, reconretries-9));
    152 			}
    153 			reconretries++;
    154 
    155 			if ((rv = doconnect(false)) != 0)
    156 				continue;
    157 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    158 			    NULL, 0, true)) != 0)
    159 				continue;
    160 
    161 			/*
    162 			 * ok, reconnect succesful.  we need to return to
    163 			 * the upper layer to get the entire PDU resent.
    164 			 */
    165 			if (reconretries != 1)
    166 				fprintf(stderr, "rump_sp: reconnected!\n");
    167 			rv = EAGAIN;
    168 			break;
    169 		} else {
    170 			_DIAGASSERT(errno != EAGAIN);
    171 			break;
    172 		}
    173 	}
    174 
    175 	return rv;
    176 }
    177 
    178 static int
    179 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    180 	bool keeplock)
    181 {
    182 	uint64_t mygen;
    183 	bool imalive = true;
    184 
    185 	pthread_mutex_lock(&spc->spc_mtx);
    186 	if (!keeplock)
    187 		sendunlockl(spc);
    188 	mygen = spc->spc_generation;
    189 
    190 	rw->rw_error = 0;
    191 	while (!rw->rw_done && rw->rw_error == 0) {
    192 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    193 			break;
    194 
    195 		/* are we free to receive? */
    196 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    197 			struct kevent kev[8];
    198 			int gotresp, dosig, rv, i;
    199 
    200 			spc->spc_istatus = SPCSTATUS_BUSY;
    201 			pthread_mutex_unlock(&spc->spc_mtx);
    202 
    203 			dosig = 0;
    204 			for (gotresp = 0; !gotresp; ) {
    205 				switch (readframe(spc)) {
    206 				case 0:
    207 					rv = host_kevent(kq, NULL, 0,
    208 					    kev, __arraycount(kev), NULL);
    209 
    210 					if (__predict_false(rv == -1)) {
    211 						goto cleanup;
    212 					}
    213 
    214 					/*
    215 					 * XXX: don't know how this can
    216 					 * happen (timeout cannot expire
    217 					 * since there isn't one), but
    218 					 * it does happen
    219 					 */
    220 					if (__predict_false(rv == 0))
    221 						continue;
    222 
    223 					for (i = 0; i < rv; i++) {
    224 						if (kev[i].filter
    225 						    == EVFILT_SIGNAL)
    226 							dosig++;
    227 					}
    228 					if (dosig)
    229 						goto cleanup;
    230 
    231 					continue;
    232 				case -1:
    233 					imalive = false;
    234 					goto cleanup;
    235 				default:
    236 					break;
    237 				}
    238 
    239 				switch (spc->spc_hdr.rsp_class) {
    240 				case RUMPSP_RESP:
    241 				case RUMPSP_ERROR:
    242 					kickwaiter(spc);
    243 					gotresp = spc->spc_hdr.rsp_reqno ==
    244 					    rw->rw_reqno;
    245 					break;
    246 				case RUMPSP_REQ:
    247 					handlereq(spc);
    248 					break;
    249 				default:
    250 					/* panic */
    251 					break;
    252 				}
    253 			}
    254 
    255  cleanup:
    256 			pthread_mutex_lock(&spc->spc_mtx);
    257 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    258 				kickall(spc);
    259 			spc->spc_istatus = SPCSTATUS_FREE;
    260 
    261 			/* take one for the team */
    262 			if (dosig) {
    263 				pthread_mutex_unlock(&spc->spc_mtx);
    264 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    265 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    266 				pthread_mutex_lock(&spc->spc_mtx);
    267 			}
    268 		} else {
    269 			spc->spc_istatus = SPCSTATUS_WANTED;
    270 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    271 		}
    272 	}
    273 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    274 	pthread_mutex_unlock(&spc->spc_mtx);
    275 	pthread_cond_destroy(&rw->rw_cv);
    276 
    277 	if (spc->spc_generation != mygen || !imalive) {
    278 		return ENOTCONN;
    279 	}
    280 	return rw->rw_error;
    281 }
    282 
    283 static int
    284 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    285 	const void *data, size_t dlen, void **resp)
    286 {
    287 	struct rsp_hdr rhdr;
    288 	struct respwait rw;
    289 	int rv;
    290 
    291 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    292 	rhdr.rsp_class = RUMPSP_REQ;
    293 	rhdr.rsp_type = RUMPSP_SYSCALL;
    294 	rhdr.rsp_sysnum = sysnum;
    295 
    296 	do {
    297 		putwait(spc, &rw, &rhdr);
    298 		if ((rv = send_with_recon(spc, &rhdr, sizeof(rhdr))) != 0) {
    299 			unputwait(spc, &rw);
    300 			continue;
    301 		}
    302 		if ((rv = send_with_recon(spc, data, dlen)) != 0) {
    303 			unputwait(spc, &rw);
    304 			continue;
    305 		}
    306 
    307 		rv = cliwaitresp(spc, &rw, omask, false);
    308 		if (rv == ENOTCONN)
    309 			rv = EAGAIN;
    310 	} while (rv == EAGAIN);
    311 
    312 	*resp = rw.rw_data;
    313 	return rv;
    314 }
    315 
    316 static int
    317 handshake_req(struct spclient *spc, int type, void *data,
    318 	int cancel, bool haslock)
    319 {
    320 	struct handshake_fork rf;
    321 	struct rsp_hdr rhdr;
    322 	struct respwait rw;
    323 	sigset_t omask;
    324 	size_t bonus;
    325 	int rv;
    326 
    327 	if (type == HANDSHAKE_FORK) {
    328 		bonus = sizeof(rf);
    329 	} else {
    330 		bonus = strlen(getprogname())+1;
    331 	}
    332 
    333 	/* performs server handshake */
    334 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    335 	rhdr.rsp_class = RUMPSP_REQ;
    336 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    337 	rhdr.rsp_handshake = type;
    338 
    339 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    340 	if (haslock)
    341 		putwait_locked(spc, &rw, &rhdr);
    342 	else
    343 		putwait(spc, &rw, &rhdr);
    344 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    345 	if (type == HANDSHAKE_FORK) {
    346 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    347 		rf.rf_cancel = cancel;
    348 		rv = send_with_recon(spc, &rf, sizeof(rf));
    349 	} else {
    350 		rv = dosend(spc, getprogname(), strlen(getprogname())+1);
    351 	}
    352 	if (rv || cancel) {
    353 		if (haslock)
    354 			unputwait_locked(spc, &rw);
    355 		else
    356 			unputwait(spc, &rw);
    357 		if (cancel) {
    358 			goto out;
    359 		}
    360 	} else {
    361 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    362 	}
    363 	if (rv)
    364 		goto out;
    365 
    366 	rv = *(int *)rw.rw_data;
    367 	free(rw.rw_data);
    368 
    369  out:
    370 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    371 	return rv;
    372 }
    373 
    374 static int
    375 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    376 {
    377 	struct rsp_hdr rhdr;
    378 	struct respwait rw;
    379 	int rv;
    380 
    381 	rhdr.rsp_len = sizeof(rhdr);
    382 	rhdr.rsp_class = RUMPSP_REQ;
    383 	rhdr.rsp_type = RUMPSP_PREFORK;
    384 	rhdr.rsp_error = 0;
    385 
    386 	do {
    387 		putwait(spc, &rw, &rhdr);
    388 		rv = send_with_recon(spc, &rhdr, sizeof(rhdr));
    389 		if (rv != 0) {
    390 			unputwait(spc, &rw);
    391 			continue;
    392 		}
    393 
    394 		rv = cliwaitresp(spc, &rw, omask, false);
    395 		if (rv == ENOTCONN)
    396 			rv = EAGAIN;
    397 	} while (rv == EAGAIN);
    398 
    399 	*resp = rw.rw_data;
    400 	return rv;
    401 }
    402 
    403 /*
    404  * prevent response code from deadlocking with reconnect code
    405  */
    406 static int
    407 resp_sendlock(struct spclient *spc)
    408 {
    409 	int rv = 0;
    410 
    411 	pthread_mutex_lock(&spc->spc_mtx);
    412 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    413 		if (__predict_false(spc->spc_reconnecting)) {
    414 			rv = EBUSY;
    415 			goto out;
    416 		}
    417 		spc->spc_ostatus = SPCSTATUS_WANTED;
    418 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    419 	}
    420 	spc->spc_ostatus = SPCSTATUS_BUSY;
    421 
    422  out:
    423 	pthread_mutex_unlock(&spc->spc_mtx);
    424 	return rv;
    425 }
    426 
    427 static void
    428 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    429 	int wantstr)
    430 {
    431 	struct rsp_hdr rhdr;
    432 
    433 	if (wantstr)
    434 		dlen = MIN(dlen, strlen(data)+1);
    435 
    436 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    437 	rhdr.rsp_reqno = reqno;
    438 	rhdr.rsp_class = RUMPSP_RESP;
    439 	rhdr.rsp_type = RUMPSP_COPYIN;
    440 	rhdr.rsp_sysnum = 0;
    441 
    442 	if (resp_sendlock(spc) != 0)
    443 		return;
    444 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    445 	(void)dosend(spc, data, dlen);
    446 	sendunlock(spc);
    447 }
    448 
    449 static void
    450 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    451 {
    452 	struct rsp_hdr rhdr;
    453 
    454 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    455 	rhdr.rsp_reqno = reqno;
    456 	rhdr.rsp_class = RUMPSP_RESP;
    457 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    458 	rhdr.rsp_sysnum = 0;
    459 
    460 	if (resp_sendlock(spc) != 0)
    461 		return;
    462 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    463 	(void)dosend(spc, &addr, sizeof(addr));
    464 	sendunlock(spc);
    465 }
    466 
    467 int
    468 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    469 	register_t *retval)
    470 {
    471 	struct rsp_sysresp *resp;
    472 	sigset_t omask;
    473 	void *rdata;
    474 	int rv;
    475 
    476 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    477 
    478 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    479 	    sysnum, data, dlen));
    480 
    481 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    482 	if (rv)
    483 		goto out;
    484 
    485 	resp = rdata;
    486 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    487 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    488 
    489 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    490 	rv = resp->rsys_error;
    491 	free(rdata);
    492 
    493  out:
    494 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    495 	return rv;
    496 }
    497 
    498 static void
    499 handlereq(struct spclient *spc)
    500 {
    501 	struct rsp_copydata *copydata;
    502 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    503 	void *mapaddr;
    504 	size_t maplen;
    505 	int reqtype = spc->spc_hdr.rsp_type;
    506 
    507 	switch (reqtype) {
    508 	case RUMPSP_COPYIN:
    509 	case RUMPSP_COPYINSTR:
    510 		/*LINTED*/
    511 		copydata = (struct rsp_copydata *)spc->spc_buf;
    512 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    513 		    copydata->rcp_addr, copydata->rcp_len));
    514 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    515 		    copydata->rcp_addr, copydata->rcp_len,
    516 		    reqtype == RUMPSP_COPYINSTR);
    517 		break;
    518 	case RUMPSP_COPYOUT:
    519 	case RUMPSP_COPYOUTSTR:
    520 		/*LINTED*/
    521 		copydata = (struct rsp_copydata *)spc->spc_buf;
    522 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    523 		    copydata->rcp_addr, copydata->rcp_len));
    524 		/*LINTED*/
    525 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    526 		    copydata->rcp_len);
    527 		break;
    528 	case RUMPSP_ANONMMAP:
    529 		/*LINTED*/
    530 		maplen = *(size_t *)spc->spc_buf;
    531 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    532 		    MAP_ANON, -1, 0);
    533 		if (mapaddr == MAP_FAILED)
    534 			mapaddr = NULL;
    535 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    536 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    537 		break;
    538 	case RUMPSP_RAISE:
    539 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    540 		raise((int)rhdr->rsp_signo);
    541 		/*
    542 		 * We most likely have signals blocked, but the signal
    543 		 * will be handled soon enough when we return.
    544 		 */
    545 		break;
    546 	default:
    547 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    548 		abort();
    549 		break;
    550 	}
    551 
    552 	spcfreebuf(spc);
    553 }
    554 
    555 static unsigned ptab_idx;
    556 static struct sockaddr *serv_sa;
    557 
    558 /* dup until we get a "good" fd which does not collide with stdio */
    559 static int
    560 dupgood(int myfd, int mustchange)
    561 {
    562 	int ofds[4];
    563 	int i;
    564 
    565 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    566 		assert(i < __arraycount(ofds));
    567 		ofds[i] = myfd;
    568 		myfd = host_dup(myfd);
    569 		if (mustchange) {
    570 			i--; /* prevent closing old fd */
    571 			mustchange = 0;
    572 		}
    573 	}
    574 
    575 	for (i--; i >= 0; i--) {
    576 		host_close(ofds[i]);
    577 	}
    578 
    579 	return myfd;
    580 }
    581 
    582 static int
    583 doconnect(bool noisy)
    584 {
    585 	struct respwait rw;
    586 	struct rsp_hdr rhdr;
    587 	struct kevent kev[NSIG+1];
    588 	char banner[MAXBANNER];
    589 	struct pollfd pfd;
    590 	int s, error, flags, i;
    591 	ssize_t n;
    592 
    593 	if (kq != -1)
    594 		host_close(kq);
    595 	kq = -1;
    596 	s = -1;
    597 
    598 	if (clispc.spc_fd != -1)
    599 		host_close(clispc.spc_fd);
    600 	clispc.spc_fd = -1;
    601 
    602 	/*
    603 	 * for reconnect, gate everyone out of the receiver code
    604 	 */
    605 	putwait_locked(&clispc, &rw, &rhdr);
    606 
    607 	pthread_mutex_lock(&clispc.spc_mtx);
    608 	clispc.spc_reconnecting = 1;
    609 	pthread_cond_broadcast(&clispc.spc_cv);
    610 	clispc.spc_generation++;
    611 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    612 		clispc.spc_istatus = SPCSTATUS_WANTED;
    613 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    614 	}
    615 	kickall(&clispc);
    616 
    617 	/*
    618 	 * we can release it already since we hold the
    619 	 * send lock during reconnect
    620 	 * XXX: assert it
    621 	 */
    622 	clispc.spc_istatus = SPCSTATUS_FREE;
    623 	pthread_mutex_unlock(&clispc.spc_mtx);
    624 	unputwait_locked(&clispc, &rw);
    625 
    626 	free(clispc.spc_buf);
    627 	clispc.spc_off = 0;
    628 
    629 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    630 	if (s == -1)
    631 		return -1;
    632 
    633 	pfd.fd = s;
    634 	pfd.events = POLLIN;
    635 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
    636 		if (errno == EINTR)
    637 			continue;
    638 		error = errno;
    639 		if (noisy)
    640 			fprintf(stderr, "rump_sp: client connect failed: %s\n",
    641 			    strerror(errno));
    642 		errno = error;
    643 		return -1;
    644 	}
    645 
    646 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    647 		error = errno;
    648 		if (noisy)
    649 			fprintf(stderr, "rump_sp: connect hook failed\n");
    650 		errno = error;
    651 		return -1;
    652 	}
    653 
    654 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
    655 		error = errno;
    656 		if (noisy)
    657 			fprintf(stderr, "rump_sp: failed to read banner\n");
    658 		errno = error;
    659 		return -1;
    660 	}
    661 
    662 	if (banner[n-1] != '\n') {
    663 		if (noisy)
    664 			fprintf(stderr, "rump_sp: invalid banner\n");
    665 		errno = EINVAL;
    666 		return -1;
    667 	}
    668 	banner[n] = '\0';
    669 	/* parse the banner some day */
    670 
    671 	flags = host_fcntl(s, F_GETFL, 0);
    672 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    673 		if (noisy)
    674 			fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
    675 			    strerror(errno));
    676 		errno = EINVAL;
    677 		return -1;
    678 	}
    679 	clispc.spc_fd = s;
    680 	clispc.spc_state = SPCSTATE_RUNNING;
    681 	clispc.spc_reconnecting = 0;
    682 
    683 	/* setup kqueue, we want all signals and the fd */
    684 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    685 		error = errno;
    686 		if (noisy)
    687 			fprintf(stderr, "rump_sp: cannot setup kqueue");
    688 		errno = error;
    689 		return -1;
    690 	}
    691 
    692 	for (i = 0; i < NSIG; i++) {
    693 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    694 	}
    695 	EV_SET(&kev[NSIG], clispc.spc_fd,
    696 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    697 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    698 		error = errno;
    699 		if (noisy)
    700 			fprintf(stderr, "rump_sp: kevent() failed");
    701 		errno = error;
    702 		return -1;
    703 	}
    704 
    705 	return 0;
    706 }
    707 
    708 static int
    709 doinit(void)
    710 {
    711 
    712 	TAILQ_INIT(&clispc.spc_respwait);
    713 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    714 	pthread_cond_init(&clispc.spc_cv, NULL);
    715 
    716 	return 0;
    717 }
    718 
    719 void *(*rumpclient_dlsym)(void *, const char *);
    720 static int init_done = 0;
    721 
    722 int
    723 rumpclient_init()
    724 {
    725 	char *p;
    726 	int error;
    727 	int rv = -1;
    728 	int hstype;
    729 
    730 	if (init_done)
    731 		return 0;
    732 	init_done = 1;
    733 
    734 	sigfillset(&fullset);
    735 
    736 	/* dlsym overrided by rumphijack? */
    737 	if (!rumpclient_dlsym)
    738 		rumpclient_dlsym = dlsym;
    739 
    740 	/*
    741 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
    742 	 * wann wird man je verstehen?  wann wird man je verstehen?
    743 	 */
    744 #define FINDSYM2(_name_,_syscall_)					\
    745 	if ((host_##_name_ = rumpclient_dlsym(RTLD_NEXT,		\
    746 	    #_syscall_)) == NULL)					\
    747 		/* host_##_name_ = _syscall_ */;
    748 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    749 	FINDSYM2(socket,__socket30);
    750 	FINDSYM(close);
    751 	FINDSYM(connect);
    752 	FINDSYM(fcntl);
    753 	FINDSYM(poll);
    754 	FINDSYM(read);
    755 	FINDSYM(sendto);
    756 	FINDSYM(setsockopt);
    757 	FINDSYM(dup);
    758 	FINDSYM(kqueue);
    759 	FINDSYM(execve);
    760 #if !__NetBSD_Prereq__(5,99,7)
    761 	FINDSYM(kevent);
    762 #else
    763 	FINDSYM2(kevent,_sys___kevent50);
    764 #endif
    765 #undef	FINDSYM
    766 #undef	FINDSY2
    767 
    768 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    769 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    770 			errno = ENOENT;
    771 			goto out;
    772 		}
    773 	}
    774 
    775 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    776 		errno = error;
    777 		goto out;
    778 	}
    779 
    780 	if (doinit() == -1)
    781 		goto out;
    782 
    783 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    784 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    785 		unsetenv("RUMPCLIENT__EXECFD");
    786 		hstype = HANDSHAKE_EXEC;
    787 	} else {
    788 		if (doconnect(true) == -1)
    789 			goto out;
    790 		hstype = HANDSHAKE_GUEST;
    791 	}
    792 
    793 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    794 	if (error) {
    795 		pthread_mutex_destroy(&clispc.spc_mtx);
    796 		pthread_cond_destroy(&clispc.spc_cv);
    797 		if (clispc.spc_fd != -1)
    798 			host_close(clispc.spc_fd);
    799 		errno = error;
    800 		goto out;
    801 	}
    802 	rv = 0;
    803 
    804  out:
    805 	if (rv == -1)
    806 		init_done = 0;
    807 	return rv;
    808 }
    809 
    810 struct rumpclient_fork {
    811 	uint32_t fork_auth[AUTHLEN];
    812 	struct spclient fork_spc;
    813 	int fork_kq;
    814 };
    815 
    816 struct rumpclient_fork *
    817 rumpclient_prefork(void)
    818 {
    819 	struct rumpclient_fork *rpf;
    820 	sigset_t omask;
    821 	void *resp;
    822 	int rv;
    823 
    824 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    825 	rpf = malloc(sizeof(*rpf));
    826 	if (rpf == NULL)
    827 		goto out;
    828 
    829 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    830 		free(rpf);
    831 		errno = rv;
    832 		rpf = NULL;
    833 		goto out;
    834 	}
    835 
    836 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    837 	free(resp);
    838 
    839 	rpf->fork_spc = clispc;
    840 	rpf->fork_kq = kq;
    841 
    842  out:
    843 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    844 	return rpf;
    845 }
    846 
    847 int
    848 rumpclient_fork_init(struct rumpclient_fork *rpf)
    849 {
    850 	int error;
    851 	int osock;
    852 
    853 	osock = clispc.spc_fd;
    854 	memset(&clispc, 0, sizeof(clispc));
    855 	clispc.spc_fd = osock;
    856 
    857 	kq = -1; /* kqueue descriptor is not copied over fork() */
    858 
    859 	if (doinit() == -1)
    860 		return -1;
    861 	if (doconnect(false) == -1)
    862 		return -1;
    863 
    864 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
    865 	    0, false);
    866 	if (error) {
    867 		pthread_mutex_destroy(&clispc.spc_mtx);
    868 		pthread_cond_destroy(&clispc.spc_cv);
    869 		errno = error;
    870 		return -1;
    871 	}
    872 
    873 	return 0;
    874 }
    875 
    876 void
    877 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
    878 {
    879 
    880 	/* EUNIMPL */
    881 }
    882 
    883 void
    884 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
    885 {
    886 
    887 	clispc = rpf->fork_spc;
    888 	kq = rpf->fork_kq;
    889 }
    890 
    891 void
    892 rumpclient_setconnretry(time_t timeout)
    893 {
    894 
    895 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
    896 		return; /* gigo */
    897 
    898 	retrytimo = timeout;
    899 }
    900 
    901 int
    902 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
    903 {
    904 	int fd = *fdp;
    905 	int untilfd, rv;
    906 	int newfd;
    907 
    908 	switch (variant) {
    909 	case RUMPCLIENT_CLOSE_FCLOSEM:
    910 		untilfd = MAX(clispc.spc_fd, kq);
    911 		for (; fd <= untilfd; fd++) {
    912 			if (fd == clispc.spc_fd || fd == kq)
    913 				continue;
    914 			rv = host_close(fd);
    915 			if (rv == -1)
    916 				return -1;
    917 		}
    918 		*fdp = fd;
    919 		break;
    920 
    921 	case RUMPCLIENT_CLOSE_CLOSE:
    922 	case RUMPCLIENT_CLOSE_DUP2:
    923 		if (fd == clispc.spc_fd) {
    924 			struct kevent kev[2];
    925 
    926 			newfd = dupgood(clispc.spc_fd, 1);
    927 			if (newfd == -1)
    928 				return -1;
    929 			/*
    930 			 * now, we have a new socket number, so change
    931 			 * the file descriptor that kqueue is
    932 			 * monitoring.  remove old and add new.
    933 			 */
    934 			EV_SET(&kev[0], clispc.spc_fd,
    935 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
    936 			EV_SET(&kev[1], newfd,
    937 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    938 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
    939 				int sverrno = errno;
    940 				host_close(newfd);
    941 				errno = sverrno;
    942 				return -1;
    943 			}
    944 			clispc.spc_fd = newfd;
    945 		}
    946 		if (fd == kq) {
    947 			newfd = dupgood(kq, 1);
    948 			if (newfd == -1)
    949 				return -1;
    950 			kq = newfd;
    951 		}
    952 		break;
    953 	}
    954 
    955 	return 0;
    956 }
    957 
    958 pid_t
    959 rumpclient_fork()
    960 {
    961 
    962 	return rumpclient__dofork(fork);
    963 }
    964 
    965 /*
    966  * Process is about to exec.  Save info about our existing connection
    967  * in the env.  rumpclient will check for this info in init().
    968  * This is mostly for the benefit of rumphijack, but regular applications
    969  * may use it as well.
    970  */
    971 int
    972 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
    973 {
    974 	char buf[4096];
    975 	char **newenv;
    976 	char *envstr, *envstr2;
    977 	size_t nelem;
    978 	int rv, sverrno;
    979 
    980 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
    981 	    clispc.spc_fd, kq);
    982 	envstr = malloc(strlen(buf)+1);
    983 	if (envstr == NULL) {
    984 		return ENOMEM;
    985 	}
    986 	strcpy(envstr, buf);
    987 
    988 	/* do we have a fully parsed url we want to forward in the env? */
    989 	if (*parsedurl != '\0') {
    990 		snprintf(buf, sizeof(buf),
    991 		    "RUMP__PARSEDSERVER=%s", parsedurl);
    992 		envstr2 = malloc(strlen(buf)+1);
    993 		if (envstr2 == NULL) {
    994 			free(envstr);
    995 			return ENOMEM;
    996 		}
    997 		strcpy(envstr2, buf);
    998 	} else {
    999 		envstr2 = NULL;
   1000 	}
   1001 
   1002 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1003 		continue;
   1004 
   1005 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1006 	if (newenv == NULL) {
   1007 		free(envstr2);
   1008 		free(envstr);
   1009 		return ENOMEM;
   1010 	}
   1011 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1012 
   1013 	newenv[nelem] = envstr;
   1014 	newenv[nelem+1] = envstr2;
   1015 	newenv[nelem+2] = NULL;
   1016 
   1017 	rv = host_execve(path, argv, newenv);
   1018 
   1019 	_DIAGASSERT(rv != 0);
   1020 	sverrno = errno;
   1021 	free(envstr2);
   1022 	free(envstr);
   1023 	free(newenv);
   1024 	errno = sverrno;
   1025 	return rv;
   1026 }
   1027 
   1028 int
   1029 rumpclient_daemon(int nochdir, int noclose)
   1030 {
   1031 	struct rumpclient_fork *rf;
   1032 	int sverrno;
   1033 
   1034 	if ((rf = rumpclient_prefork()) == NULL)
   1035 		return -1;
   1036 
   1037 	if (daemon(nochdir, noclose) == -1) {
   1038 		sverrno = errno;
   1039 		rumpclient_fork_cancel(rf);
   1040 		errno = sverrno;
   1041 		return -1;
   1042 	}
   1043 
   1044 	if (rumpclient_fork_init(rf) == -1)
   1045 		return -1;
   1046 
   1047 	return 0;
   1048 }
   1049