Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.38
      1 /*      $NetBSD: rumpclient.c,v 1.38 2011/02/27 12:58:29 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __RCSID("$NetBSD: rumpclient.c,v 1.38 2011/02/27 12:58:29 pooka Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/event.h>
     37 #include <sys/mman.h>
     38 #include <sys/socket.h>
     39 
     40 #include <arpa/inet.h>
     41 #include <netinet/in.h>
     42 #include <netinet/tcp.h>
     43 
     44 #include <assert.h>
     45 #include <dlfcn.h>
     46 #include <err.h>
     47 #include <errno.h>
     48 #include <fcntl.h>
     49 #include <link.h>
     50 #include <poll.h>
     51 #include <pthread.h>
     52 #include <signal.h>
     53 #include <stdarg.h>
     54 #include <stdbool.h>
     55 #include <stdio.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include <unistd.h>
     59 
     60 #include <rump/rumpclient.h>
     61 
     62 #define HOSTOPS
     63 int	(*host_socket)(int, int, int);
     64 int	(*host_close)(int);
     65 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     66 int	(*host_fcntl)(int, int, ...);
     67 int	(*host_poll)(struct pollfd *, nfds_t, int);
     68 ssize_t	(*host_read)(int, void *, size_t);
     69 ssize_t (*host_sendto)(int, const void *, size_t, int,
     70 		       const struct sockaddr *, socklen_t);
     71 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     72 int	(*host_dup)(int);
     73 
     74 int	(*host_kqueue)(void);
     75 int	(*host_kevent)(int, const struct kevent *, size_t,
     76 		       struct kevent *, size_t, const struct timespec *);
     77 
     78 int	(*host_execve)(const char *, char *const[], char *const[]);
     79 
     80 #include "sp_common.c"
     81 
     82 static struct spclient clispc = {
     83 	.spc_fd = -1,
     84 };
     85 
     86 static int kq = -1;
     87 static sigset_t fullset;
     88 
     89 static int doconnect(bool);
     90 static int handshake_req(struct spclient *, int, void *, int, bool);
     91 
     92 /*
     93  * Default: don't retry.  Most clients can't handle it
     94  * (consider e.g. fds suddenly going missing).
     95  */
     96 static time_t retrytimo = 0;
     97 
     98 static int
     99 send_with_recon(struct spclient *spc, const void *data, size_t dlen)
    100 {
    101 	struct timeval starttime, curtime;
    102 	time_t prevreconmsg;
    103 	unsigned reconretries;
    104 	int rv;
    105 
    106 	for (prevreconmsg = 0, reconretries = 0;;) {
    107 		rv = dosend(spc, data, dlen);
    108 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    109 			/* no persistent connections */
    110 			if (retrytimo == 0) {
    111 				rv = ENOTCONN;
    112 				break;
    113 			}
    114 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    115 				exit(1);
    116 
    117 			if (!prevreconmsg) {
    118 				prevreconmsg = time(NULL);
    119 				gettimeofday(&starttime, NULL);
    120 			}
    121 			if (reconretries == 1) {
    122 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    123 					rv = ENOTCONN;
    124 					break;
    125 				}
    126 				fprintf(stderr, "rump_sp: connection to "
    127 				    "kernel lost, trying to reconnect ...\n");
    128 			} else if (time(NULL) - prevreconmsg > 120) {
    129 				fprintf(stderr, "rump_sp: still trying to "
    130 				    "reconnect ...\n");
    131 				prevreconmsg = time(NULL);
    132 			}
    133 
    134 			/* check that we aren't over the limit */
    135 			if (retrytimo > 0) {
    136 				struct timeval tmp;
    137 
    138 				gettimeofday(&curtime, NULL);
    139 				timersub(&curtime, &starttime, &tmp);
    140 				if (tmp.tv_sec >= retrytimo) {
    141 					fprintf(stderr, "rump_sp: reconnect "
    142 					    "failed, %lld second timeout\n",
    143 					    (long long)retrytimo);
    144 					return ENOTCONN;
    145 				}
    146 			}
    147 
    148 			/* adhoc backoff timer */
    149 			if (reconretries < 10) {
    150 				usleep(100000 * reconretries);
    151 			} else {
    152 				sleep(MIN(10, reconretries-9));
    153 			}
    154 			reconretries++;
    155 
    156 			if ((rv = doconnect(false)) != 0)
    157 				continue;
    158 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    159 			    NULL, 0, true)) != 0)
    160 				continue;
    161 
    162 			/*
    163 			 * ok, reconnect succesful.  we need to return to
    164 			 * the upper layer to get the entire PDU resent.
    165 			 */
    166 			if (reconretries != 1)
    167 				fprintf(stderr, "rump_sp: reconnected!\n");
    168 			rv = EAGAIN;
    169 			break;
    170 		} else {
    171 			_DIAGASSERT(errno != EAGAIN);
    172 			break;
    173 		}
    174 	}
    175 
    176 	return rv;
    177 }
    178 
    179 static int
    180 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    181 	bool keeplock)
    182 {
    183 	uint64_t mygen;
    184 	bool imalive = true;
    185 
    186 	pthread_mutex_lock(&spc->spc_mtx);
    187 	if (!keeplock)
    188 		sendunlockl(spc);
    189 	mygen = spc->spc_generation;
    190 
    191 	rw->rw_error = 0;
    192 	while (!rw->rw_done && rw->rw_error == 0) {
    193 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    194 			break;
    195 
    196 		/* are we free to receive? */
    197 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    198 			struct kevent kev[8];
    199 			int gotresp, dosig, rv, i;
    200 
    201 			spc->spc_istatus = SPCSTATUS_BUSY;
    202 			pthread_mutex_unlock(&spc->spc_mtx);
    203 
    204 			dosig = 0;
    205 			for (gotresp = 0; !gotresp; ) {
    206 				switch (readframe(spc)) {
    207 				case 0:
    208 					rv = host_kevent(kq, NULL, 0,
    209 					    kev, __arraycount(kev), NULL);
    210 
    211 					if (__predict_false(rv == -1)) {
    212 						goto cleanup;
    213 					}
    214 
    215 					/*
    216 					 * XXX: don't know how this can
    217 					 * happen (timeout cannot expire
    218 					 * since there isn't one), but
    219 					 * it does happen
    220 					 */
    221 					if (__predict_false(rv == 0))
    222 						continue;
    223 
    224 					for (i = 0; i < rv; i++) {
    225 						if (kev[i].filter
    226 						    == EVFILT_SIGNAL)
    227 							dosig++;
    228 					}
    229 					if (dosig)
    230 						goto cleanup;
    231 
    232 					continue;
    233 				case -1:
    234 					imalive = false;
    235 					goto cleanup;
    236 				default:
    237 					break;
    238 				}
    239 
    240 				switch (spc->spc_hdr.rsp_class) {
    241 				case RUMPSP_RESP:
    242 				case RUMPSP_ERROR:
    243 					kickwaiter(spc);
    244 					gotresp = spc->spc_hdr.rsp_reqno ==
    245 					    rw->rw_reqno;
    246 					break;
    247 				case RUMPSP_REQ:
    248 					handlereq(spc);
    249 					break;
    250 				default:
    251 					/* panic */
    252 					break;
    253 				}
    254 			}
    255 
    256  cleanup:
    257 			pthread_mutex_lock(&spc->spc_mtx);
    258 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    259 				kickall(spc);
    260 			spc->spc_istatus = SPCSTATUS_FREE;
    261 
    262 			/* take one for the team */
    263 			if (dosig) {
    264 				pthread_mutex_unlock(&spc->spc_mtx);
    265 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    266 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    267 				pthread_mutex_lock(&spc->spc_mtx);
    268 			}
    269 		} else {
    270 			spc->spc_istatus = SPCSTATUS_WANTED;
    271 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    272 		}
    273 	}
    274 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    275 	pthread_mutex_unlock(&spc->spc_mtx);
    276 	pthread_cond_destroy(&rw->rw_cv);
    277 
    278 	if (spc->spc_generation != mygen || !imalive) {
    279 		return ENOTCONN;
    280 	}
    281 	return rw->rw_error;
    282 }
    283 
    284 static int
    285 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    286 	const void *data, size_t dlen, void **resp)
    287 {
    288 	struct rsp_hdr rhdr;
    289 	struct respwait rw;
    290 	int rv;
    291 
    292 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    293 	rhdr.rsp_class = RUMPSP_REQ;
    294 	rhdr.rsp_type = RUMPSP_SYSCALL;
    295 	rhdr.rsp_sysnum = sysnum;
    296 
    297 	do {
    298 		putwait(spc, &rw, &rhdr);
    299 		if ((rv = send_with_recon(spc, &rhdr, sizeof(rhdr))) != 0) {
    300 			unputwait(spc, &rw);
    301 			continue;
    302 		}
    303 		if ((rv = send_with_recon(spc, data, dlen)) != 0) {
    304 			unputwait(spc, &rw);
    305 			continue;
    306 		}
    307 
    308 		rv = cliwaitresp(spc, &rw, omask, false);
    309 		if (rv == ENOTCONN)
    310 			rv = EAGAIN;
    311 	} while (rv == EAGAIN);
    312 
    313 	*resp = rw.rw_data;
    314 	return rv;
    315 }
    316 
    317 static int
    318 handshake_req(struct spclient *spc, int type, void *data,
    319 	int cancel, bool haslock)
    320 {
    321 	struct handshake_fork rf;
    322 	struct rsp_hdr rhdr;
    323 	struct respwait rw;
    324 	sigset_t omask;
    325 	size_t bonus;
    326 	int rv;
    327 
    328 	if (type == HANDSHAKE_FORK) {
    329 		bonus = sizeof(rf);
    330 	} else {
    331 		bonus = strlen(getprogname())+1;
    332 	}
    333 
    334 	/* performs server handshake */
    335 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    336 	rhdr.rsp_class = RUMPSP_REQ;
    337 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    338 	rhdr.rsp_handshake = type;
    339 
    340 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    341 	if (haslock)
    342 		putwait_locked(spc, &rw, &rhdr);
    343 	else
    344 		putwait(spc, &rw, &rhdr);
    345 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    346 	if (type == HANDSHAKE_FORK) {
    347 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    348 		rf.rf_cancel = cancel;
    349 		rv = send_with_recon(spc, &rf, sizeof(rf));
    350 	} else {
    351 		rv = dosend(spc, getprogname(), strlen(getprogname())+1);
    352 	}
    353 	if (rv || cancel) {
    354 		if (haslock)
    355 			unputwait_locked(spc, &rw);
    356 		else
    357 			unputwait(spc, &rw);
    358 		if (cancel) {
    359 			goto out;
    360 		}
    361 	} else {
    362 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    363 	}
    364 	if (rv)
    365 		goto out;
    366 
    367 	rv = *(int *)rw.rw_data;
    368 	free(rw.rw_data);
    369 
    370  out:
    371 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    372 	return rv;
    373 }
    374 
    375 static int
    376 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    377 {
    378 	struct rsp_hdr rhdr;
    379 	struct respwait rw;
    380 	int rv;
    381 
    382 	rhdr.rsp_len = sizeof(rhdr);
    383 	rhdr.rsp_class = RUMPSP_REQ;
    384 	rhdr.rsp_type = RUMPSP_PREFORK;
    385 	rhdr.rsp_error = 0;
    386 
    387 	do {
    388 		putwait(spc, &rw, &rhdr);
    389 		rv = send_with_recon(spc, &rhdr, sizeof(rhdr));
    390 		if (rv != 0) {
    391 			unputwait(spc, &rw);
    392 			continue;
    393 		}
    394 
    395 		rv = cliwaitresp(spc, &rw, omask, false);
    396 		if (rv == ENOTCONN)
    397 			rv = EAGAIN;
    398 	} while (rv == EAGAIN);
    399 
    400 	*resp = rw.rw_data;
    401 	return rv;
    402 }
    403 
    404 /*
    405  * prevent response code from deadlocking with reconnect code
    406  */
    407 static int
    408 resp_sendlock(struct spclient *spc)
    409 {
    410 	int rv = 0;
    411 
    412 	pthread_mutex_lock(&spc->spc_mtx);
    413 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    414 		if (__predict_false(spc->spc_reconnecting)) {
    415 			rv = EBUSY;
    416 			goto out;
    417 		}
    418 		spc->spc_ostatus = SPCSTATUS_WANTED;
    419 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    420 	}
    421 	spc->spc_ostatus = SPCSTATUS_BUSY;
    422 
    423  out:
    424 	pthread_mutex_unlock(&spc->spc_mtx);
    425 	return rv;
    426 }
    427 
    428 static void
    429 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    430 	int wantstr)
    431 {
    432 	struct rsp_hdr rhdr;
    433 
    434 	if (wantstr)
    435 		dlen = MIN(dlen, strlen(data)+1);
    436 
    437 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    438 	rhdr.rsp_reqno = reqno;
    439 	rhdr.rsp_class = RUMPSP_RESP;
    440 	rhdr.rsp_type = RUMPSP_COPYIN;
    441 	rhdr.rsp_sysnum = 0;
    442 
    443 	if (resp_sendlock(spc) != 0)
    444 		return;
    445 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    446 	(void)dosend(spc, data, dlen);
    447 	sendunlock(spc);
    448 }
    449 
    450 static void
    451 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    452 {
    453 	struct rsp_hdr rhdr;
    454 
    455 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    456 	rhdr.rsp_reqno = reqno;
    457 	rhdr.rsp_class = RUMPSP_RESP;
    458 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    459 	rhdr.rsp_sysnum = 0;
    460 
    461 	if (resp_sendlock(spc) != 0)
    462 		return;
    463 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    464 	(void)dosend(spc, &addr, sizeof(addr));
    465 	sendunlock(spc);
    466 }
    467 
    468 int
    469 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    470 	register_t *retval)
    471 {
    472 	struct rsp_sysresp *resp;
    473 	sigset_t omask;
    474 	void *rdata;
    475 	int rv;
    476 
    477 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    478 
    479 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    480 	    sysnum, data, dlen));
    481 
    482 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    483 	if (rv)
    484 		goto out;
    485 
    486 	resp = rdata;
    487 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    488 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    489 
    490 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    491 	rv = resp->rsys_error;
    492 	free(rdata);
    493 
    494  out:
    495 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    496 	return rv;
    497 }
    498 
    499 static void
    500 handlereq(struct spclient *spc)
    501 {
    502 	struct rsp_copydata *copydata;
    503 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    504 	void *mapaddr;
    505 	size_t maplen;
    506 	int reqtype = spc->spc_hdr.rsp_type;
    507 
    508 	switch (reqtype) {
    509 	case RUMPSP_COPYIN:
    510 	case RUMPSP_COPYINSTR:
    511 		/*LINTED*/
    512 		copydata = (struct rsp_copydata *)spc->spc_buf;
    513 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    514 		    copydata->rcp_addr, copydata->rcp_len));
    515 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    516 		    copydata->rcp_addr, copydata->rcp_len,
    517 		    reqtype == RUMPSP_COPYINSTR);
    518 		break;
    519 	case RUMPSP_COPYOUT:
    520 	case RUMPSP_COPYOUTSTR:
    521 		/*LINTED*/
    522 		copydata = (struct rsp_copydata *)spc->spc_buf;
    523 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    524 		    copydata->rcp_addr, copydata->rcp_len));
    525 		/*LINTED*/
    526 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    527 		    copydata->rcp_len);
    528 		break;
    529 	case RUMPSP_ANONMMAP:
    530 		/*LINTED*/
    531 		maplen = *(size_t *)spc->spc_buf;
    532 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    533 		    MAP_ANON, -1, 0);
    534 		if (mapaddr == MAP_FAILED)
    535 			mapaddr = NULL;
    536 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    537 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    538 		break;
    539 	case RUMPSP_RAISE:
    540 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    541 		raise((int)rhdr->rsp_signo);
    542 		/*
    543 		 * We most likely have signals blocked, but the signal
    544 		 * will be handled soon enough when we return.
    545 		 */
    546 		break;
    547 	default:
    548 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    549 		abort();
    550 		break;
    551 	}
    552 
    553 	spcfreebuf(spc);
    554 }
    555 
    556 static unsigned ptab_idx;
    557 static struct sockaddr *serv_sa;
    558 
    559 /* dup until we get a "good" fd which does not collide with stdio */
    560 static int
    561 dupgood(int myfd, int mustchange)
    562 {
    563 	int ofds[4];
    564 	int i;
    565 
    566 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    567 		assert(i < __arraycount(ofds));
    568 		ofds[i] = myfd;
    569 		myfd = host_dup(myfd);
    570 		if (mustchange) {
    571 			i--; /* prevent closing old fd */
    572 			mustchange = 0;
    573 		}
    574 	}
    575 
    576 	for (i--; i >= 0; i--) {
    577 		host_close(ofds[i]);
    578 	}
    579 
    580 	return myfd;
    581 }
    582 
    583 static int
    584 doconnect(bool noisy)
    585 {
    586 	struct respwait rw;
    587 	struct rsp_hdr rhdr;
    588 	struct kevent kev[NSIG+1];
    589 	char banner[MAXBANNER];
    590 	struct pollfd pfd;
    591 	int s, error, flags, i;
    592 	ssize_t n;
    593 
    594 	if (kq != -1)
    595 		host_close(kq);
    596 	kq = -1;
    597 	s = -1;
    598 
    599 	if (clispc.spc_fd != -1)
    600 		host_close(clispc.spc_fd);
    601 	clispc.spc_fd = -1;
    602 
    603 	/*
    604 	 * for reconnect, gate everyone out of the receiver code
    605 	 */
    606 	putwait_locked(&clispc, &rw, &rhdr);
    607 
    608 	pthread_mutex_lock(&clispc.spc_mtx);
    609 	clispc.spc_reconnecting = 1;
    610 	pthread_cond_broadcast(&clispc.spc_cv);
    611 	clispc.spc_generation++;
    612 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    613 		clispc.spc_istatus = SPCSTATUS_WANTED;
    614 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    615 	}
    616 	kickall(&clispc);
    617 
    618 	/*
    619 	 * we can release it already since we hold the
    620 	 * send lock during reconnect
    621 	 * XXX: assert it
    622 	 */
    623 	clispc.spc_istatus = SPCSTATUS_FREE;
    624 	pthread_mutex_unlock(&clispc.spc_mtx);
    625 	unputwait_locked(&clispc, &rw);
    626 
    627 	free(clispc.spc_buf);
    628 	clispc.spc_off = 0;
    629 
    630 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    631 	if (s == -1)
    632 		return -1;
    633 
    634 	pfd.fd = s;
    635 	pfd.events = POLLIN;
    636 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
    637 		if (errno == EINTR)
    638 			continue;
    639 		error = errno;
    640 		if (noisy)
    641 			fprintf(stderr, "rump_sp: client connect failed: %s\n",
    642 			    strerror(errno));
    643 		errno = error;
    644 		return -1;
    645 	}
    646 
    647 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    648 		error = errno;
    649 		if (noisy)
    650 			fprintf(stderr, "rump_sp: connect hook failed\n");
    651 		errno = error;
    652 		return -1;
    653 	}
    654 
    655 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
    656 		error = errno;
    657 		if (noisy)
    658 			fprintf(stderr, "rump_sp: failed to read banner\n");
    659 		errno = error;
    660 		return -1;
    661 	}
    662 
    663 	if (banner[n-1] != '\n') {
    664 		if (noisy)
    665 			fprintf(stderr, "rump_sp: invalid banner\n");
    666 		errno = EINVAL;
    667 		return -1;
    668 	}
    669 	banner[n] = '\0';
    670 	/* parse the banner some day */
    671 
    672 	flags = host_fcntl(s, F_GETFL, 0);
    673 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    674 		if (noisy)
    675 			fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
    676 			    strerror(errno));
    677 		errno = EINVAL;
    678 		return -1;
    679 	}
    680 	clispc.spc_fd = s;
    681 	clispc.spc_state = SPCSTATE_RUNNING;
    682 	clispc.spc_reconnecting = 0;
    683 
    684 	/* setup kqueue, we want all signals and the fd */
    685 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    686 		error = errno;
    687 		if (noisy)
    688 			fprintf(stderr, "rump_sp: cannot setup kqueue");
    689 		errno = error;
    690 		return -1;
    691 	}
    692 
    693 	for (i = 0; i < NSIG; i++) {
    694 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    695 	}
    696 	EV_SET(&kev[NSIG], clispc.spc_fd,
    697 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    698 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    699 		error = errno;
    700 		if (noisy)
    701 			fprintf(stderr, "rump_sp: kevent() failed");
    702 		errno = error;
    703 		return -1;
    704 	}
    705 
    706 	return 0;
    707 }
    708 
    709 static int
    710 doinit(void)
    711 {
    712 
    713 	TAILQ_INIT(&clispc.spc_respwait);
    714 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    715 	pthread_cond_init(&clispc.spc_cv, NULL);
    716 
    717 	return 0;
    718 }
    719 
    720 void *rumpclient__dlsym(void *, const char *);
    721 void *rumphijack_dlsym(void *, const char *);
    722 void *
    723 rumpclient__dlsym(void *handle, const char *symbol)
    724 {
    725 
    726 	return dlsym(handle, symbol);
    727 }
    728 __weak_alias(rumphijack_dlsym,rumpclient__dlsym);
    729 
    730 static pid_t init_done = 0;
    731 
    732 int
    733 rumpclient_init()
    734 {
    735 	char *p;
    736 	int error;
    737 	int rv = -1;
    738 	int hstype;
    739 	pid_t mypid;
    740 
    741 	/*
    742 	 * Make sure we're not riding the context of a previous
    743 	 * host fork.  Note: it's *possible* that after n>1 forks
    744 	 * we have the same pid as one of our exited parents, but
    745 	 * I'm pretty sure there are 0 practical implications, since
    746 	 * it means generations would have to skip rumpclient init.
    747 	 */
    748 	if (init_done == (mypid = getpid()))
    749 		return 0;
    750 
    751 	/* kq does not traverse fork() */
    752 	if (init_done != 0)
    753 		kq = -1;
    754 	init_done = mypid;
    755 
    756 	sigfillset(&fullset);
    757 
    758 	/*
    759 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
    760 	 * wann wird man je verstehen?  wann wird man je verstehen?
    761 	 */
    762 #define FINDSYM2(_name_,_syscall_)					\
    763 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    764 	    #_syscall_)) == NULL) {					\
    765 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    766 			host_##_name_ = _name_; /* static fallback */	\
    767 		if (host_##_name_ == NULL)				\
    768 			errx(1, "cannot find %s: %s", #_syscall_,	\
    769 			    dlerror());					\
    770 	}
    771 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    772 	FINDSYM2(socket,__socket30)
    773 	FINDSYM(close)
    774 	FINDSYM(connect)
    775 	FINDSYM(fcntl)
    776 	FINDSYM(poll)
    777 	FINDSYM(read)
    778 	FINDSYM(sendto)
    779 	FINDSYM(setsockopt)
    780 	FINDSYM(dup)
    781 	FINDSYM(kqueue)
    782 	FINDSYM(execve)
    783 #if !__NetBSD_Prereq__(5,99,7)
    784 	FINDSYM(kevent)
    785 #else
    786 	FINDSYM2(kevent,_sys___kevent50)
    787 #endif
    788 #undef	FINDSYM
    789 #undef	FINDSY2
    790 
    791 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    792 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    793 			errno = ENOENT;
    794 			goto out;
    795 		}
    796 	}
    797 
    798 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    799 		errno = error;
    800 		goto out;
    801 	}
    802 
    803 	if (doinit() == -1)
    804 		goto out;
    805 
    806 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    807 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    808 		unsetenv("RUMPCLIENT__EXECFD");
    809 		hstype = HANDSHAKE_EXEC;
    810 	} else {
    811 		if (doconnect(true) == -1)
    812 			goto out;
    813 		hstype = HANDSHAKE_GUEST;
    814 	}
    815 
    816 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    817 	if (error) {
    818 		pthread_mutex_destroy(&clispc.spc_mtx);
    819 		pthread_cond_destroy(&clispc.spc_cv);
    820 		if (clispc.spc_fd != -1)
    821 			host_close(clispc.spc_fd);
    822 		errno = error;
    823 		goto out;
    824 	}
    825 	rv = 0;
    826 
    827  out:
    828 	if (rv == -1)
    829 		init_done = 0;
    830 	return rv;
    831 }
    832 
    833 struct rumpclient_fork {
    834 	uint32_t fork_auth[AUTHLEN];
    835 	struct spclient fork_spc;
    836 	int fork_kq;
    837 };
    838 
    839 struct rumpclient_fork *
    840 rumpclient_prefork(void)
    841 {
    842 	struct rumpclient_fork *rpf;
    843 	sigset_t omask;
    844 	void *resp;
    845 	int rv;
    846 
    847 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    848 	rpf = malloc(sizeof(*rpf));
    849 	if (rpf == NULL)
    850 		goto out;
    851 
    852 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    853 		free(rpf);
    854 		errno = rv;
    855 		rpf = NULL;
    856 		goto out;
    857 	}
    858 
    859 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    860 	free(resp);
    861 
    862 	rpf->fork_spc = clispc;
    863 	rpf->fork_kq = kq;
    864 
    865  out:
    866 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    867 	return rpf;
    868 }
    869 
    870 int
    871 rumpclient_fork_init(struct rumpclient_fork *rpf)
    872 {
    873 	int error;
    874 	int osock;
    875 
    876 	osock = clispc.spc_fd;
    877 	memset(&clispc, 0, sizeof(clispc));
    878 	clispc.spc_fd = osock;
    879 
    880 	kq = -1; /* kqueue descriptor is not copied over fork() */
    881 
    882 	if (doinit() == -1)
    883 		return -1;
    884 	if (doconnect(false) == -1)
    885 		return -1;
    886 
    887 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
    888 	    0, false);
    889 	if (error) {
    890 		pthread_mutex_destroy(&clispc.spc_mtx);
    891 		pthread_cond_destroy(&clispc.spc_cv);
    892 		errno = error;
    893 		return -1;
    894 	}
    895 
    896 	return 0;
    897 }
    898 
    899 void
    900 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
    901 {
    902 
    903 	/* EUNIMPL */
    904 }
    905 
    906 void
    907 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
    908 {
    909 
    910 	clispc = rpf->fork_spc;
    911 	kq = rpf->fork_kq;
    912 }
    913 
    914 void
    915 rumpclient_setconnretry(time_t timeout)
    916 {
    917 
    918 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
    919 		return; /* gigo */
    920 
    921 	retrytimo = timeout;
    922 }
    923 
    924 int
    925 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
    926 {
    927 	int fd = *fdp;
    928 	int untilfd, rv;
    929 	int newfd;
    930 
    931 	switch (variant) {
    932 	case RUMPCLIENT_CLOSE_FCLOSEM:
    933 		untilfd = MAX(clispc.spc_fd, kq);
    934 		for (; fd <= untilfd; fd++) {
    935 			if (fd == clispc.spc_fd || fd == kq)
    936 				continue;
    937 			rv = host_close(fd);
    938 			if (rv == -1)
    939 				return -1;
    940 		}
    941 		*fdp = fd;
    942 		break;
    943 
    944 	case RUMPCLIENT_CLOSE_CLOSE:
    945 	case RUMPCLIENT_CLOSE_DUP2:
    946 		if (fd == clispc.spc_fd) {
    947 			struct kevent kev[2];
    948 
    949 			newfd = dupgood(clispc.spc_fd, 1);
    950 			if (newfd == -1)
    951 				return -1;
    952 			/*
    953 			 * now, we have a new socket number, so change
    954 			 * the file descriptor that kqueue is
    955 			 * monitoring.  remove old and add new.
    956 			 */
    957 			EV_SET(&kev[0], clispc.spc_fd,
    958 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
    959 			EV_SET(&kev[1], newfd,
    960 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    961 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
    962 				int sverrno = errno;
    963 				host_close(newfd);
    964 				errno = sverrno;
    965 				return -1;
    966 			}
    967 			clispc.spc_fd = newfd;
    968 		}
    969 		if (fd == kq) {
    970 			newfd = dupgood(kq, 1);
    971 			if (newfd == -1)
    972 				return -1;
    973 			kq = newfd;
    974 		}
    975 		break;
    976 	}
    977 
    978 	return 0;
    979 }
    980 
    981 pid_t
    982 rumpclient_fork()
    983 {
    984 
    985 	return rumpclient__dofork(fork);
    986 }
    987 
    988 /*
    989  * Process is about to exec.  Save info about our existing connection
    990  * in the env.  rumpclient will check for this info in init().
    991  * This is mostly for the benefit of rumphijack, but regular applications
    992  * may use it as well.
    993  */
    994 int
    995 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
    996 {
    997 	char buf[4096];
    998 	char **newenv;
    999 	char *envstr, *envstr2;
   1000 	size_t nelem;
   1001 	int rv, sverrno;
   1002 
   1003 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1004 	    clispc.spc_fd, kq);
   1005 	envstr = malloc(strlen(buf)+1);
   1006 	if (envstr == NULL) {
   1007 		return ENOMEM;
   1008 	}
   1009 	strcpy(envstr, buf);
   1010 
   1011 	/* do we have a fully parsed url we want to forward in the env? */
   1012 	if (*parsedurl != '\0') {
   1013 		snprintf(buf, sizeof(buf),
   1014 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1015 		envstr2 = malloc(strlen(buf)+1);
   1016 		if (envstr2 == NULL) {
   1017 			free(envstr);
   1018 			return ENOMEM;
   1019 		}
   1020 		strcpy(envstr2, buf);
   1021 	} else {
   1022 		envstr2 = NULL;
   1023 	}
   1024 
   1025 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1026 		continue;
   1027 
   1028 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1029 	if (newenv == NULL) {
   1030 		free(envstr2);
   1031 		free(envstr);
   1032 		return ENOMEM;
   1033 	}
   1034 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1035 
   1036 	newenv[nelem] = envstr;
   1037 	newenv[nelem+1] = envstr2;
   1038 	newenv[nelem+2] = NULL;
   1039 
   1040 	rv = host_execve(path, argv, newenv);
   1041 
   1042 	_DIAGASSERT(rv != 0);
   1043 	sverrno = errno;
   1044 	free(envstr2);
   1045 	free(envstr);
   1046 	free(newenv);
   1047 	errno = sverrno;
   1048 	return rv;
   1049 }
   1050 
   1051 int
   1052 rumpclient_daemon(int nochdir, int noclose)
   1053 {
   1054 	struct rumpclient_fork *rf;
   1055 	int sverrno;
   1056 
   1057 	if ((rf = rumpclient_prefork()) == NULL)
   1058 		return -1;
   1059 
   1060 	if (daemon(nochdir, noclose) == -1) {
   1061 		sverrno = errno;
   1062 		rumpclient_fork_cancel(rf);
   1063 		errno = sverrno;
   1064 		return -1;
   1065 	}
   1066 
   1067 	if (rumpclient_fork_init(rf) == -1)
   1068 		return -1;
   1069 
   1070 	return 0;
   1071 }
   1072