Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.51
      1 /*      $NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include "rumpuser_port.h"
     33 
     34 /*
     35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
     36  * use kqueue on other BSD's too, but I haven't tested those.  We
     37  * want to use kqueue because it will give us the ability to get signal
     38  * notifications but defer their handling to a stage where we do not
     39  * hold the communication lock.  Taking a signal while holding on to
     40  * that lock may cause a deadlock.  Therefore, block signals throughout
     41  * the RPC when using poll.  This unfortunately means that the normal
     42  * SIGINT way of stopping a process while it is undergoing rump kernel
     43  * RPC will not work.  If anyone know which Linux system call handles
     44  * the above scenario correctly, I'm all ears.
     45  */
     46 
     47 #ifdef __NetBSD__
     48 #define USE_KQUEUE
     49 #endif
     50 
     51 #include <sys/cdefs.h>
     52 __RCSID("$NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $");
     53 
     54 #include <sys/param.h>
     55 #include <sys/mman.h>
     56 #include <sys/socket.h>
     57 #include <sys/time.h>
     58 
     59 #ifdef USE_KQUEUE
     60 #include <sys/event.h>
     61 #endif
     62 
     63 #include <arpa/inet.h>
     64 #include <netinet/in.h>
     65 #include <netinet/tcp.h>
     66 
     67 #include <assert.h>
     68 #include <dlfcn.h>
     69 #include <err.h>
     70 #include <errno.h>
     71 #include <fcntl.h>
     72 #include <link.h>
     73 #include <poll.h>
     74 #include <pthread.h>
     75 #include <signal.h>
     76 #include <stdarg.h>
     77 #include <stdbool.h>
     78 #include <stdio.h>
     79 #include <stdlib.h>
     80 #include <string.h>
     81 #include <unistd.h>
     82 
     83 #include <rump/rumpclient.h>
     84 
     85 #define HOSTOPS
     86 int	(*host_socket)(int, int, int);
     87 int	(*host_close)(int);
     88 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     89 int	(*host_fcntl)(int, int, ...);
     90 int	(*host_poll)(struct pollfd *, nfds_t, int);
     91 ssize_t	(*host_read)(int, void *, size_t);
     92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     93 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     94 int	(*host_dup)(int);
     95 
     96 #ifdef USE_KQUEUE
     97 int	(*host_kqueue)(void);
     98 int	(*host_kevent)(int, const struct kevent *, size_t,
     99 		       struct kevent *, size_t, const struct timespec *);
    100 #endif
    101 
    102 int	(*host_execve)(const char *, char *const[], char *const[]);
    103 
    104 #include "sp_common.c"
    105 
    106 static struct spclient clispc = {
    107 	.spc_fd = -1,
    108 };
    109 
    110 static int kq = -1;
    111 static sigset_t fullset;
    112 
    113 static int doconnect(void);
    114 static int handshake_req(struct spclient *, int, void *, int, bool);
    115 
    116 /*
    117  * Default: don't retry.  Most clients can't handle it
    118  * (consider e.g. fds suddenly going missing).
    119  */
    120 static time_t retrytimo = 0;
    121 
    122 /* always defined to nothingness for now */
    123 #define ERRLOG(a)
    124 
    125 static int
    126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
    127 {
    128 	struct timeval starttime, curtime;
    129 	time_t prevreconmsg;
    130 	unsigned reconretries;
    131 	int rv;
    132 
    133 	for (prevreconmsg = 0, reconretries = 0;;) {
    134 		rv = dosend(spc, iov, iovlen);
    135 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    136 			/* no persistent connections */
    137 			if (retrytimo == 0) {
    138 				rv = ENOTCONN;
    139 				break;
    140 			}
    141 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    142 				_exit(1);
    143 
    144 			if (!prevreconmsg) {
    145 				prevreconmsg = time(NULL);
    146 				gettimeofday(&starttime, NULL);
    147 			}
    148 			if (reconretries == 1) {
    149 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    150 					rv = ENOTCONN;
    151 					break;
    152 				}
    153 				fprintf(stderr, "rump_sp: connection to "
    154 				    "kernel lost, trying to reconnect ...\n");
    155 			} else if (time(NULL) - prevreconmsg > 120) {
    156 				fprintf(stderr, "rump_sp: still trying to "
    157 				    "reconnect ...\n");
    158 				prevreconmsg = time(NULL);
    159 			}
    160 
    161 			/* check that we aren't over the limit */
    162 			if (retrytimo > 0) {
    163 				struct timeval tmp;
    164 
    165 				gettimeofday(&curtime, NULL);
    166 				timersub(&curtime, &starttime, &tmp);
    167 				if (tmp.tv_sec >= retrytimo) {
    168 					fprintf(stderr, "rump_sp: reconnect "
    169 					    "failed, %lld second timeout\n",
    170 					    (long long)retrytimo);
    171 					return ENOTCONN;
    172 				}
    173 			}
    174 
    175 			/* adhoc backoff timer */
    176 			if (reconretries < 10) {
    177 				usleep(100000 * reconretries);
    178 			} else {
    179 				sleep(MIN(10, reconretries-9));
    180 			}
    181 			reconretries++;
    182 
    183 			if ((rv = doconnect()) != 0)
    184 				continue;
    185 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    186 			    NULL, 0, true)) != 0)
    187 				continue;
    188 
    189 			/*
    190 			 * ok, reconnect succesful.  we need to return to
    191 			 * the upper layer to get the entire PDU resent.
    192 			 */
    193 			if (reconretries != 1)
    194 				fprintf(stderr, "rump_sp: reconnected!\n");
    195 			rv = EAGAIN;
    196 			break;
    197 		} else {
    198 			_DIAGASSERT(errno != EAGAIN);
    199 			break;
    200 		}
    201 	}
    202 
    203 	return rv;
    204 }
    205 
    206 static int
    207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    208 	bool keeplock)
    209 {
    210 	uint64_t mygen;
    211 	bool imalive = true;
    212 
    213 	pthread_mutex_lock(&spc->spc_mtx);
    214 	if (!keeplock)
    215 		sendunlockl(spc);
    216 	mygen = spc->spc_generation;
    217 
    218 	rw->rw_error = 0;
    219 	while (!rw->rw_done && rw->rw_error == 0) {
    220 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    221 			break;
    222 
    223 		/* are we free to receive? */
    224 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    225 			int gotresp, dosig, rv;
    226 
    227 			spc->spc_istatus = SPCSTATUS_BUSY;
    228 			pthread_mutex_unlock(&spc->spc_mtx);
    229 
    230 			dosig = 0;
    231 			for (gotresp = 0; !gotresp; ) {
    232 #ifdef USE_KQUEUE
    233 				struct kevent kev[8];
    234 				int i;
    235 
    236 				/*
    237 				 * typically we don't have a frame waiting
    238 				 * when we come in here, so call kevent now
    239 				 */
    240 				rv = host_kevent(kq, NULL, 0,
    241 				    kev, __arraycount(kev), NULL);
    242 
    243 				if (__predict_false(rv == -1)) {
    244 					goto activity;
    245 				}
    246 
    247 				/*
    248 				 * XXX: don't know how this can happen
    249 				 * (timeout cannot expire since there
    250 				 * isn't one), but it does happen.
    251 				 * treat it as an expectional condition
    252 				 * and go through tryread to determine
    253 				 * alive status.
    254 				 */
    255 				if (__predict_false(rv == 0))
    256 					goto activity;
    257 
    258 				for (i = 0; i < rv; i++) {
    259 					if (kev[i].filter == EVFILT_SIGNAL)
    260 						dosig++;
    261 				}
    262 				if (dosig)
    263 					goto cleanup;
    264 
    265 				/*
    266 				 * ok, activity.  try to read a frame to
    267 				 * determine what happens next.
    268 				 */
    269  activity:
    270 #else /* USE_KQUEUE */
    271 				struct pollfd pfd;
    272 
    273 				pfd.fd = clispc.spc_fd;
    274 				pfd.events = POLLIN;
    275 
    276 				rv = host_poll(&pfd, 1, -1);
    277 #endif /* !USE_KQUEUE */
    278 
    279 				switch (readframe(spc)) {
    280 				case 0:
    281 					continue;
    282 				case -1:
    283 					imalive = false;
    284 					goto cleanup;
    285 				default:
    286 					/* case 1 */
    287 					break;
    288 				}
    289 
    290 				switch (spc->spc_hdr.rsp_class) {
    291 				case RUMPSP_RESP:
    292 				case RUMPSP_ERROR:
    293 					kickwaiter(spc);
    294 					gotresp = spc->spc_hdr.rsp_reqno ==
    295 					    rw->rw_reqno;
    296 					break;
    297 				case RUMPSP_REQ:
    298 					handlereq(spc);
    299 					break;
    300 				default:
    301 					/* panic */
    302 					break;
    303 				}
    304 			}
    305 
    306  cleanup:
    307 			pthread_mutex_lock(&spc->spc_mtx);
    308 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    309 				kickall(spc);
    310 			spc->spc_istatus = SPCSTATUS_FREE;
    311 
    312 			/* take one for the team */
    313 			if (dosig) {
    314 				pthread_mutex_unlock(&spc->spc_mtx);
    315 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    316 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    317 				pthread_mutex_lock(&spc->spc_mtx);
    318 			}
    319 		} else {
    320 			spc->spc_istatus = SPCSTATUS_WANTED;
    321 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    322 		}
    323 	}
    324 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    325 	pthread_mutex_unlock(&spc->spc_mtx);
    326 	pthread_cond_destroy(&rw->rw_cv);
    327 
    328 	if (spc->spc_generation != mygen || !imalive) {
    329 		return ENOTCONN;
    330 	}
    331 	return rw->rw_error;
    332 }
    333 
    334 static int
    335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    336 	const void *data, size_t dlen, void **resp)
    337 {
    338 	struct rsp_hdr rhdr;
    339 	struct respwait rw;
    340 	struct iovec iov[2];
    341 	int rv;
    342 
    343 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    344 	rhdr.rsp_class = RUMPSP_REQ;
    345 	rhdr.rsp_type = RUMPSP_SYSCALL;
    346 	rhdr.rsp_sysnum = sysnum;
    347 
    348 	IOVPUT(iov[0], rhdr);
    349 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    350 
    351 	do {
    352 		putwait(spc, &rw, &rhdr);
    353 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    354 			unputwait(spc, &rw);
    355 			continue;
    356 		}
    357 
    358 		rv = cliwaitresp(spc, &rw, omask, false);
    359 		if (rv == ENOTCONN)
    360 			rv = EAGAIN;
    361 	} while (rv == EAGAIN);
    362 
    363 	*resp = rw.rw_data;
    364 	return rv;
    365 }
    366 
    367 static int
    368 handshake_req(struct spclient *spc, int type, void *data,
    369 	int cancel, bool haslock)
    370 {
    371 	struct handshake_fork rf;
    372 	const char *myprogname = NULL; /* XXXgcc */
    373 	struct rsp_hdr rhdr;
    374 	struct respwait rw;
    375 	sigset_t omask;
    376 	size_t bonus;
    377 	struct iovec iov[2];
    378 	int rv;
    379 
    380 	if (type == HANDSHAKE_FORK) {
    381 		bonus = sizeof(rf);
    382 	} else {
    383 #ifdef __NetBSD__
    384 		/* would procfs work on NetBSD too? */
    385 		myprogname = getprogname();
    386 #else
    387 		int fd = open("/proc/self/comm", O_RDONLY);
    388 		if (fd == -1) {
    389 			myprogname = "???";
    390 		} else {
    391 			static char commname[128];
    392 
    393 			memset(commname, 0, sizeof(commname));
    394 			if (read(fd, commname, sizeof(commname)) > 0) {
    395 				char *n;
    396 
    397 				n = strrchr(commname, '\n');
    398 				if (n)
    399 					*n = '\0';
    400 				myprogname = commname;
    401 			} else {
    402 				myprogname = "???";
    403 			}
    404 			close(fd);
    405 		}
    406 #endif
    407 		bonus = strlen(myprogname)+1;
    408 	}
    409 
    410 	/* performs server handshake */
    411 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    412 	rhdr.rsp_class = RUMPSP_REQ;
    413 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    414 	rhdr.rsp_handshake = type;
    415 
    416 	IOVPUT(iov[0], rhdr);
    417 
    418 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    419 	if (haslock)
    420 		putwait_locked(spc, &rw, &rhdr);
    421 	else
    422 		putwait(spc, &rw, &rhdr);
    423 	if (type == HANDSHAKE_FORK) {
    424 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    425 		rf.rf_cancel = cancel;
    426 		IOVPUT(iov[1], rf);
    427 	} else {
    428 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
    429 	}
    430 	rv = send_with_recon(spc, iov, __arraycount(iov));
    431 	if (rv || cancel) {
    432 		if (haslock)
    433 			unputwait_locked(spc, &rw);
    434 		else
    435 			unputwait(spc, &rw);
    436 		if (cancel) {
    437 			goto out;
    438 		}
    439 	} else {
    440 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    441 	}
    442 	if (rv)
    443 		goto out;
    444 
    445 	rv = *(int *)rw.rw_data;
    446 	free(rw.rw_data);
    447 
    448  out:
    449 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    450 	return rv;
    451 }
    452 
    453 static int
    454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    455 {
    456 	struct rsp_hdr rhdr;
    457 	struct respwait rw;
    458 	struct iovec iov[1];
    459 	int rv;
    460 
    461 	rhdr.rsp_len = sizeof(rhdr);
    462 	rhdr.rsp_class = RUMPSP_REQ;
    463 	rhdr.rsp_type = RUMPSP_PREFORK;
    464 	rhdr.rsp_error = 0;
    465 
    466 	IOVPUT(iov[0], rhdr);
    467 
    468 	do {
    469 		putwait(spc, &rw, &rhdr);
    470 		rv = send_with_recon(spc, iov, __arraycount(iov));
    471 		if (rv != 0) {
    472 			unputwait(spc, &rw);
    473 			continue;
    474 		}
    475 
    476 		rv = cliwaitresp(spc, &rw, omask, false);
    477 		if (rv == ENOTCONN)
    478 			rv = EAGAIN;
    479 	} while (rv == EAGAIN);
    480 
    481 	*resp = rw.rw_data;
    482 	return rv;
    483 }
    484 
    485 /*
    486  * prevent response code from deadlocking with reconnect code
    487  */
    488 static int
    489 resp_sendlock(struct spclient *spc)
    490 {
    491 	int rv = 0;
    492 
    493 	pthread_mutex_lock(&spc->spc_mtx);
    494 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    495 		if (__predict_false(spc->spc_reconnecting)) {
    496 			rv = EBUSY;
    497 			goto out;
    498 		}
    499 		spc->spc_ostatus = SPCSTATUS_WANTED;
    500 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    501 	}
    502 	spc->spc_ostatus = SPCSTATUS_BUSY;
    503 
    504  out:
    505 	pthread_mutex_unlock(&spc->spc_mtx);
    506 	return rv;
    507 }
    508 
    509 static void
    510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    511 	int wantstr)
    512 {
    513 	struct rsp_hdr rhdr;
    514 	struct iovec iov[2];
    515 
    516 	if (wantstr)
    517 		dlen = MIN(dlen, strlen(data)+1);
    518 
    519 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    520 	rhdr.rsp_reqno = reqno;
    521 	rhdr.rsp_class = RUMPSP_RESP;
    522 	rhdr.rsp_type = RUMPSP_COPYIN;
    523 	rhdr.rsp_sysnum = 0;
    524 
    525 	IOVPUT(iov[0], rhdr);
    526 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    527 
    528 	if (resp_sendlock(spc) != 0)
    529 		return;
    530 	(void)SENDIOV(spc, iov);
    531 	sendunlock(spc);
    532 }
    533 
    534 static void
    535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    536 {
    537 	struct rsp_hdr rhdr;
    538 	struct iovec iov[2];
    539 
    540 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    541 	rhdr.rsp_reqno = reqno;
    542 	rhdr.rsp_class = RUMPSP_RESP;
    543 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    544 	rhdr.rsp_sysnum = 0;
    545 
    546 	IOVPUT(iov[0], rhdr);
    547 	IOVPUT(iov[1], addr);
    548 
    549 	if (resp_sendlock(spc) != 0)
    550 		return;
    551 	(void)SENDIOV(spc, iov);
    552 	sendunlock(spc);
    553 }
    554 
    555 int
    556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    557 	register_t *retval)
    558 {
    559 	struct rsp_sysresp *resp;
    560 	sigset_t omask;
    561 	void *rdata;
    562 	int rv;
    563 
    564 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    565 
    566 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    567 	    sysnum, data, dlen));
    568 
    569 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    570 	if (rv)
    571 		goto out;
    572 
    573 	resp = rdata;
    574 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    575 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    576 
    577 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    578 	rv = resp->rsys_error;
    579 	free(rdata);
    580 
    581  out:
    582 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    583 	return rv;
    584 }
    585 
    586 static void
    587 handlereq(struct spclient *spc)
    588 {
    589 	struct rsp_copydata *copydata;
    590 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    591 	void *mapaddr;
    592 	size_t maplen;
    593 	int reqtype = spc->spc_hdr.rsp_type;
    594 
    595 	switch (reqtype) {
    596 	case RUMPSP_COPYIN:
    597 	case RUMPSP_COPYINSTR:
    598 		/*LINTED*/
    599 		copydata = (struct rsp_copydata *)spc->spc_buf;
    600 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    601 		    copydata->rcp_addr, copydata->rcp_len));
    602 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    603 		    copydata->rcp_addr, copydata->rcp_len,
    604 		    reqtype == RUMPSP_COPYINSTR);
    605 		break;
    606 	case RUMPSP_COPYOUT:
    607 	case RUMPSP_COPYOUTSTR:
    608 		/*LINTED*/
    609 		copydata = (struct rsp_copydata *)spc->spc_buf;
    610 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    611 		    copydata->rcp_addr, copydata->rcp_len));
    612 		/*LINTED*/
    613 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    614 		    copydata->rcp_len);
    615 		break;
    616 	case RUMPSP_ANONMMAP:
    617 		/*LINTED*/
    618 		maplen = *(size_t *)spc->spc_buf;
    619 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    620 		    MAP_ANON, -1, 0);
    621 		if (mapaddr == MAP_FAILED)
    622 			mapaddr = NULL;
    623 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    624 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    625 		break;
    626 	case RUMPSP_RAISE:
    627 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    628 		raise((int)rhdr->rsp_signo);
    629 		/*
    630 		 * We most likely have signals blocked, but the signal
    631 		 * will be handled soon enough when we return.
    632 		 */
    633 		break;
    634 	default:
    635 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    636 		abort();
    637 		break;
    638 	}
    639 
    640 	spcfreebuf(spc);
    641 }
    642 
    643 static unsigned ptab_idx;
    644 static struct sockaddr *serv_sa;
    645 
    646 /* dup until we get a "good" fd which does not collide with stdio */
    647 static int
    648 dupgood(int myfd, int mustchange)
    649 {
    650 	int ofds[4];
    651 	int sverrno;
    652 	unsigned int i;
    653 
    654 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    655 		assert(i < __arraycount(ofds));
    656 		ofds[i] = myfd;
    657 		myfd = host_dup(myfd);
    658 		if (mustchange) {
    659 			i--; /* prevent closing old fd */
    660 			mustchange = 0;
    661 		}
    662 	}
    663 
    664 	sverrno = 0;
    665 	if (myfd == -1 && i > 0)
    666 		sverrno = errno;
    667 
    668 	while (i-- > 0) {
    669 		host_close(ofds[i]);
    670 	}
    671 
    672 	if (sverrno)
    673 		errno = sverrno;
    674 
    675 	return myfd;
    676 }
    677 
    678 static int
    679 doconnect(void)
    680 {
    681 	struct respwait rw;
    682 	struct rsp_hdr rhdr;
    683 	char banner[MAXBANNER];
    684 	int s, error, flags;
    685 	ssize_t n;
    686 
    687 	if (kq != -1)
    688 		host_close(kq);
    689 	kq = -1;
    690 	s = -1;
    691 
    692 	if (clispc.spc_fd != -1)
    693 		host_close(clispc.spc_fd);
    694 	clispc.spc_fd = -1;
    695 
    696 	/*
    697 	 * for reconnect, gate everyone out of the receiver code
    698 	 */
    699 	putwait_locked(&clispc, &rw, &rhdr);
    700 
    701 	pthread_mutex_lock(&clispc.spc_mtx);
    702 	clispc.spc_reconnecting = 1;
    703 	pthread_cond_broadcast(&clispc.spc_cv);
    704 	clispc.spc_generation++;
    705 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    706 		clispc.spc_istatus = SPCSTATUS_WANTED;
    707 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    708 	}
    709 	kickall(&clispc);
    710 
    711 	/*
    712 	 * we can release it already since we hold the
    713 	 * send lock during reconnect
    714 	 * XXX: assert it
    715 	 */
    716 	clispc.spc_istatus = SPCSTATUS_FREE;
    717 	pthread_mutex_unlock(&clispc.spc_mtx);
    718 	unputwait_locked(&clispc, &rw);
    719 
    720 	free(clispc.spc_buf);
    721 	clispc.spc_off = 0;
    722 
    723 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    724 	if (s == -1)
    725 		return -1;
    726 
    727 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
    728 		if (errno == EINTR)
    729 			continue;
    730 		ERRLOG(("rump_sp: client connect failed: %s\n",
    731 		    strerror(errno)));
    732 		return -1;
    733 	}
    734 
    735 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    736 		ERRLOG(("rump_sp: connect hook failed\n"));
    737 		return -1;
    738 	}
    739 
    740 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
    741 		ERRLOG(("rump_sp: failed to read banner\n"));
    742 		return -1;
    743 	}
    744 
    745 	if (banner[n-1] != '\n') {
    746 		ERRLOG(("rump_sp: invalid banner\n"));
    747 		return -1;
    748 	}
    749 	banner[n] = '\0';
    750 	/* XXX parse the banner some day */
    751 
    752 	flags = host_fcntl(s, F_GETFL, 0);
    753 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    754 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
    755 		return -1;
    756 	}
    757 	clispc.spc_fd = s;
    758 	clispc.spc_state = SPCSTATE_RUNNING;
    759 	clispc.spc_reconnecting = 0;
    760 
    761 #ifdef USE_KQUEUE
    762 {
    763 	struct kevent kev[NSIG+1];
    764 	int i;
    765 
    766 	/* setup kqueue, we want all signals and the fd */
    767 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    768 		ERRLOG(("rump_sp: cannot setup kqueue"));
    769 		return -1;
    770 	}
    771 
    772 	for (i = 0; i < NSIG; i++) {
    773 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    774 	}
    775 	EV_SET(&kev[NSIG], clispc.spc_fd,
    776 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    777 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    778 		ERRLOG(("rump_sp: kevent() failed"));
    779 		return -1;
    780 	}
    781 }
    782 #endif /* USE_KQUEUE */
    783 
    784 	return 0;
    785 }
    786 
    787 static int
    788 doinit(void)
    789 {
    790 
    791 	TAILQ_INIT(&clispc.spc_respwait);
    792 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    793 	pthread_cond_init(&clispc.spc_cv, NULL);
    794 
    795 	return 0;
    796 }
    797 
    798 void *rumpclient__dlsym(void *, const char *);
    799 void *
    800 rumpclient__dlsym(void *handle, const char *symbol)
    801 {
    802 
    803 	return dlsym(handle, symbol);
    804 }
    805 void *rumphijack_dlsym(void *, const char *)
    806     __attribute__((__weak__, alias("rumpclient__dlsym")));
    807 
    808 static pid_t init_done = 0;
    809 
    810 int
    811 rumpclient_init(void)
    812 {
    813 	char *p;
    814 	int error;
    815 	int rv = -1;
    816 	int hstype;
    817 	pid_t mypid;
    818 
    819 	/*
    820 	 * Make sure we're not riding the context of a previous
    821 	 * host fork.  Note: it's *possible* that after n>1 forks
    822 	 * we have the same pid as one of our exited parents, but
    823 	 * I'm pretty sure there are 0 practical implications, since
    824 	 * it means generations would have to skip rumpclient init.
    825 	 */
    826 	if (init_done == (mypid = getpid()))
    827 		return 0;
    828 
    829 	/* kq does not traverse fork() */
    830 	if (init_done != 0)
    831 		kq = -1;
    832 	init_done = mypid;
    833 
    834 	sigfillset(&fullset);
    835 
    836 	/*
    837 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
    838 	 * wann wird man je verstehen?  wann wird man je verstehen?
    839 	 */
    840 #define FINDSYM2(_name_,_syscall_)					\
    841 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    842 	    #_syscall_)) == NULL) {					\
    843 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    844 			host_##_name_ = _name_; /* static fallback */	\
    845 		if (host_##_name_ == NULL)				\
    846 			errx(1, "cannot find %s: %s", #_syscall_,	\
    847 			    dlerror());					\
    848 	}
    849 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    850 #ifdef __NetBSD__
    851 	FINDSYM2(socket,__socket30)
    852 #else
    853 	FINDSYM(socket)
    854 #endif
    855 
    856 	FINDSYM(close)
    857 	FINDSYM(connect)
    858 	FINDSYM(fcntl)
    859 	FINDSYM(poll)
    860 	FINDSYM(read)
    861 	FINDSYM(sendmsg)
    862 	FINDSYM(setsockopt)
    863 	FINDSYM(dup)
    864 	FINDSYM(execve)
    865 
    866 #ifdef USE_KQUEUE
    867 	FINDSYM(kqueue)
    868 #if !__NetBSD_Prereq__(5,99,7)
    869 	FINDSYM(kevent)
    870 #else
    871 	FINDSYM2(kevent,_sys___kevent50)
    872 #endif
    873 #endif /* USE_KQUEUE */
    874 
    875 #undef	FINDSYM
    876 #undef	FINDSY2
    877 
    878 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    879 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    880 			errno = ENOENT;
    881 			goto out;
    882 		}
    883 	}
    884 
    885 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    886 		errno = error;
    887 		goto out;
    888 	}
    889 
    890 	if (doinit() == -1)
    891 		goto out;
    892 
    893 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    894 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    895 		unsetenv("RUMPCLIENT__EXECFD");
    896 		hstype = HANDSHAKE_EXEC;
    897 	} else {
    898 		if (doconnect() == -1)
    899 			goto out;
    900 		hstype = HANDSHAKE_GUEST;
    901 	}
    902 
    903 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    904 	if (error) {
    905 		pthread_mutex_destroy(&clispc.spc_mtx);
    906 		pthread_cond_destroy(&clispc.spc_cv);
    907 		if (clispc.spc_fd != -1)
    908 			host_close(clispc.spc_fd);
    909 		errno = error;
    910 		goto out;
    911 	}
    912 	rv = 0;
    913 
    914  out:
    915 	if (rv == -1)
    916 		init_done = 0;
    917 	return rv;
    918 }
    919 
    920 struct rumpclient_fork {
    921 	uint32_t fork_auth[AUTHLEN];
    922 	struct spclient fork_spc;
    923 	int fork_kq;
    924 };
    925 
    926 struct rumpclient_fork *
    927 rumpclient_prefork(void)
    928 {
    929 	struct rumpclient_fork *rpf;
    930 	sigset_t omask;
    931 	void *resp;
    932 	int rv;
    933 
    934 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    935 	rpf = malloc(sizeof(*rpf));
    936 	if (rpf == NULL)
    937 		goto out;
    938 
    939 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    940 		free(rpf);
    941 		errno = rv;
    942 		rpf = NULL;
    943 		goto out;
    944 	}
    945 
    946 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    947 	free(resp);
    948 
    949 	rpf->fork_spc = clispc;
    950 	rpf->fork_kq = kq;
    951 
    952  out:
    953 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    954 	return rpf;
    955 }
    956 
    957 int
    958 rumpclient_fork_init(struct rumpclient_fork *rpf)
    959 {
    960 	int error;
    961 	int osock;
    962 
    963 	osock = clispc.spc_fd;
    964 	memset(&clispc, 0, sizeof(clispc));
    965 	clispc.spc_fd = osock;
    966 
    967 	kq = -1; /* kqueue descriptor is not copied over fork() */
    968 
    969 	if (doinit() == -1)
    970 		return -1;
    971 	if (doconnect() == -1)
    972 		return -1;
    973 
    974 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
    975 	    0, false);
    976 	if (error) {
    977 		pthread_mutex_destroy(&clispc.spc_mtx);
    978 		pthread_cond_destroy(&clispc.spc_cv);
    979 		errno = error;
    980 		return -1;
    981 	}
    982 
    983 	return 0;
    984 }
    985 
    986 /*ARGSUSED*/
    987 void
    988 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
    989 {
    990 
    991 	/* EUNIMPL */
    992 }
    993 
    994 void
    995 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
    996 {
    997 
    998 	clispc = rpf->fork_spc;
    999 	kq = rpf->fork_kq;
   1000 }
   1001 
   1002 void
   1003 rumpclient_setconnretry(time_t timeout)
   1004 {
   1005 
   1006 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
   1007 		return; /* gigo */
   1008 
   1009 	retrytimo = timeout;
   1010 }
   1011 
   1012 int
   1013 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
   1014 {
   1015 	int fd = *fdp;
   1016 	int untilfd, rv;
   1017 	int newfd;
   1018 
   1019 	switch (variant) {
   1020 	case RUMPCLIENT_CLOSE_FCLOSEM:
   1021 		untilfd = MAX(clispc.spc_fd, kq);
   1022 		for (; fd <= untilfd; fd++) {
   1023 			if (fd == clispc.spc_fd || fd == kq)
   1024 				continue;
   1025 			rv = host_close(fd);
   1026 			if (rv == -1)
   1027 				return -1;
   1028 		}
   1029 		*fdp = fd;
   1030 		break;
   1031 
   1032 	case RUMPCLIENT_CLOSE_CLOSE:
   1033 	case RUMPCLIENT_CLOSE_DUP2:
   1034 		if (fd == clispc.spc_fd) {
   1035 			newfd = dupgood(clispc.spc_fd, 1);
   1036 			if (newfd == -1)
   1037 				return -1;
   1038 
   1039 #ifdef USE_KQUEUE
   1040 			{
   1041 			struct kevent kev[2];
   1042 
   1043 			/*
   1044 			 * now, we have a new socket number, so change
   1045 			 * the file descriptor that kqueue is
   1046 			 * monitoring.  remove old and add new.
   1047 			 */
   1048 			EV_SET(&kev[0], clispc.spc_fd,
   1049 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
   1050 			EV_SET(&kev[1], newfd,
   1051 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
   1052 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
   1053 				int sverrno = errno;
   1054 				host_close(newfd);
   1055 				errno = sverrno;
   1056 				return -1;
   1057 			}
   1058 			clispc.spc_fd = newfd;
   1059 			}
   1060 		}
   1061 		if (fd == kq) {
   1062 			newfd = dupgood(kq, 1);
   1063 			if (newfd == -1)
   1064 				return -1;
   1065 			kq = newfd;
   1066 #else /* USE_KQUEUE */
   1067 			clispc.spc_fd = newfd;
   1068 #endif /* !USE_KQUEUE */
   1069 		}
   1070 		break;
   1071 	}
   1072 
   1073 	return 0;
   1074 }
   1075 
   1076 pid_t
   1077 rumpclient_fork(void)
   1078 {
   1079 
   1080 	return rumpclient__dofork(fork);
   1081 }
   1082 
   1083 /*
   1084  * Process is about to exec.  Save info about our existing connection
   1085  * in the env.  rumpclient will check for this info in init().
   1086  * This is mostly for the benefit of rumphijack, but regular applications
   1087  * may use it as well.
   1088  */
   1089 int
   1090 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1091 {
   1092 	char buf[4096];
   1093 	char **newenv;
   1094 	char *envstr, *envstr2;
   1095 	size_t nelem;
   1096 	int rv, sverrno;
   1097 
   1098 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1099 	    clispc.spc_fd, kq);
   1100 	envstr = malloc(strlen(buf)+1);
   1101 	if (envstr == NULL) {
   1102 		return ENOMEM;
   1103 	}
   1104 	strcpy(envstr, buf);
   1105 
   1106 	/* do we have a fully parsed url we want to forward in the env? */
   1107 	if (*parsedurl != '\0') {
   1108 		snprintf(buf, sizeof(buf),
   1109 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1110 		envstr2 = malloc(strlen(buf)+1);
   1111 		if (envstr2 == NULL) {
   1112 			free(envstr);
   1113 			return ENOMEM;
   1114 		}
   1115 		strcpy(envstr2, buf);
   1116 	} else {
   1117 		envstr2 = NULL;
   1118 	}
   1119 
   1120 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1121 		continue;
   1122 
   1123 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1124 	if (newenv == NULL) {
   1125 		free(envstr2);
   1126 		free(envstr);
   1127 		return ENOMEM;
   1128 	}
   1129 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1130 
   1131 	newenv[nelem] = envstr;
   1132 	newenv[nelem+1] = envstr2;
   1133 	newenv[nelem+2] = NULL;
   1134 
   1135 	rv = host_execve(path, argv, newenv);
   1136 
   1137 	_DIAGASSERT(rv != 0);
   1138 	sverrno = errno;
   1139 	free(envstr2);
   1140 	free(envstr);
   1141 	free(newenv);
   1142 	errno = sverrno;
   1143 	return rv;
   1144 }
   1145 
   1146 int
   1147 rumpclient_daemon(int nochdir, int noclose)
   1148 {
   1149 	struct rumpclient_fork *rf;
   1150 	int sverrno;
   1151 
   1152 	if ((rf = rumpclient_prefork()) == NULL)
   1153 		return -1;
   1154 
   1155 	if (daemon(nochdir, noclose) == -1) {
   1156 		sverrno = errno;
   1157 		rumpclient_fork_cancel(rf);
   1158 		errno = sverrno;
   1159 		return -1;
   1160 	}
   1161 
   1162 	if (rumpclient_fork_init(rf) == -1)
   1163 		return -1;
   1164 
   1165 	return 0;
   1166 }
   1167