Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.59
      1 /*      $NetBSD: rumpclient.c,v 1.59 2014/04/02 15:04:19 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <rump/rumpuser_port.h>
     33 
     34 /*
     35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
     36  * use kqueue on other BSD's too, but I haven't tested those.  We
     37  * want to use kqueue because it will give us the ability to get signal
     38  * notifications but defer their handling to a stage where we do not
     39  * hold the communication lock.  Taking a signal while holding on to
     40  * that lock may cause a deadlock.  Therefore, block signals throughout
     41  * the RPC when using poll.  On Linux, we use signalfd in the same role
     42  * as kqueue on NetBSD to be able to take signals while waiting for a
     43  * response from the server.
     44  */
     45 
     46 #ifdef __NetBSD__
     47 #define USE_KQUEUE
     48 #endif
     49 #ifdef __linux__
     50 #define USE_SIGNALFD
     51 #endif
     52 
     53 __RCSID("$NetBSD: rumpclient.c,v 1.59 2014/04/02 15:04:19 pooka Exp $");
     54 
     55 #include <sys/param.h>
     56 #include <sys/mman.h>
     57 #include <sys/socket.h>
     58 #include <sys/time.h>
     59 
     60 #ifdef USE_KQUEUE
     61 #include <sys/event.h>
     62 #endif
     63 
     64 #include <arpa/inet.h>
     65 #include <netinet/in.h>
     66 #include <netinet/tcp.h>
     67 
     68 #include <assert.h>
     69 #include <dlfcn.h>
     70 #include <errno.h>
     71 #include <fcntl.h>
     72 #include <poll.h>
     73 #include <pthread.h>
     74 #include <signal.h>
     75 #include <stdarg.h>
     76 #include <stdbool.h>
     77 #include <stdio.h>
     78 #include <stdlib.h>
     79 #include <string.h>
     80 #include <unistd.h>
     81 
     82 #include <rump/rumpclient.h>
     83 
     84 #define HOSTOPS
     85 int	(*host_socket)(int, int, int);
     86 int	(*host_close)(int);
     87 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     88 int	(*host_fcntl)(int, int, ...);
     89 int	(*host_poll)(struct pollfd *, nfds_t, int);
     90 ssize_t	(*host_read)(int, void *, size_t);
     91 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     92 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     93 int	(*host_dup)(int);
     94 
     95 #ifdef USE_KQUEUE
     96 int	(*host_kqueue)(void);
     97 int	(*host_kevent)(int, const struct kevent *, size_t,
     98 		       struct kevent *, size_t, const struct timespec *);
     99 #endif
    100 
    101 #ifdef USE_SIGNALFD
    102 #include <sys/signalfd.h>
    103 
    104 int	(*host_signalfd)(int, const sigset_t *, int);
    105 #endif
    106 
    107 int	(*host_execve)(const char *, char *const[], char *const[]);
    108 
    109 #include "sp_common.c"
    110 #include "rumpuser_sigtrans.c"
    111 
    112 static struct spclient clispc = {
    113 	.spc_fd = -1,
    114 };
    115 
    116 static int holyfd;
    117 static sigset_t fullset;
    118 
    119 static int doconnect(void);
    120 static int handshake_req(struct spclient *, int, void *, int, bool);
    121 
    122 /*
    123  * Default: don't retry.  Most clients can't handle it
    124  * (consider e.g. fds suddenly going missing).
    125  */
    126 static time_t retrytimo = 0;
    127 
    128 /* always defined to nothingness for now */
    129 #define ERRLOG(a)
    130 
    131 static int
    132 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
    133 {
    134 	struct timeval starttime, curtime;
    135 	time_t prevreconmsg;
    136 	unsigned reconretries;
    137 	int rv;
    138 
    139 	for (prevreconmsg = 0, reconretries = 0;;) {
    140 		rv = dosend(spc, iov, iovlen);
    141 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    142 			/* no persistent connections */
    143 			if (retrytimo == 0) {
    144 				rv = ENOTCONN;
    145 				break;
    146 			}
    147 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    148 				_exit(1);
    149 
    150 			if (!prevreconmsg) {
    151 				prevreconmsg = time(NULL);
    152 				gettimeofday(&starttime, NULL);
    153 			}
    154 			if (reconretries == 1) {
    155 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    156 					rv = ENOTCONN;
    157 					break;
    158 				}
    159 				fprintf(stderr, "rump_sp: connection to "
    160 				    "kernel lost, trying to reconnect ...\n");
    161 			} else if (time(NULL) - prevreconmsg > 120) {
    162 				fprintf(stderr, "rump_sp: still trying to "
    163 				    "reconnect ...\n");
    164 				prevreconmsg = time(NULL);
    165 			}
    166 
    167 			/* check that we aren't over the limit */
    168 			if (retrytimo > 0) {
    169 				time_t tdiff;
    170 
    171 				gettimeofday(&curtime, NULL);
    172 				tdiff = curtime.tv_sec - starttime.tv_sec;
    173 				if (starttime.tv_usec > curtime.tv_usec)
    174 					tdiff--;
    175 				if (tdiff >= retrytimo) {
    176 					fprintf(stderr, "rump_sp: reconnect "
    177 					    "failed, %lld second timeout\n",
    178 					    (long long)retrytimo);
    179 					return ENOTCONN;
    180 				}
    181 			}
    182 
    183 			/* adhoc backoff timer */
    184 			if (reconretries < 10) {
    185 				usleep(100000 * reconretries);
    186 			} else {
    187 				sleep(MIN(10, reconretries-9));
    188 			}
    189 			reconretries++;
    190 
    191 			if ((rv = doconnect()) != 0)
    192 				continue;
    193 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    194 			    NULL, 0, true)) != 0)
    195 				continue;
    196 
    197 			/*
    198 			 * ok, reconnect succesful.  we need to return to
    199 			 * the upper layer to get the entire PDU resent.
    200 			 */
    201 			if (reconretries != 1)
    202 				fprintf(stderr, "rump_sp: reconnected!\n");
    203 			rv = EAGAIN;
    204 			break;
    205 		} else {
    206 			_DIAGASSERT(errno != EAGAIN);
    207 			break;
    208 		}
    209 	}
    210 
    211 	return rv;
    212 }
    213 
    214 static int
    215 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    216 	bool keeplock)
    217 {
    218 	uint64_t mygen;
    219 	bool imalive = true;
    220 
    221 	pthread_mutex_lock(&spc->spc_mtx);
    222 	if (!keeplock)
    223 		sendunlockl(spc);
    224 	mygen = spc->spc_generation;
    225 
    226 	rw->rw_error = 0;
    227 	while (!rw->rw_done && rw->rw_error == 0) {
    228 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    229 			break;
    230 
    231 		/* are we free to receive? */
    232 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    233 			int gotresp, dosig, rv;
    234 
    235 			spc->spc_istatus = SPCSTATUS_BUSY;
    236 			pthread_mutex_unlock(&spc->spc_mtx);
    237 
    238 			dosig = 0;
    239 			for (gotresp = 0; !gotresp; ) {
    240 #ifdef USE_KQUEUE
    241 				struct kevent kev[8];
    242 				int i;
    243 
    244 				/*
    245 				 * typically we don't have a frame waiting
    246 				 * when we come in here, so call kevent now
    247 				 */
    248 				rv = host_kevent(holyfd, NULL, 0,
    249 				    kev, __arraycount(kev), NULL);
    250 
    251 				if (__predict_false(rv == -1)) {
    252 					goto activity;
    253 				}
    254 
    255 				/*
    256 				 * XXX: don't know how this can happen
    257 				 * (timeout cannot expire since there
    258 				 * isn't one), but it does happen.
    259 				 * treat it as an expectional condition
    260 				 * and go through tryread to determine
    261 				 * alive status.
    262 				 */
    263 				if (__predict_false(rv == 0))
    264 					goto activity;
    265 
    266 				for (i = 0; i < rv; i++) {
    267 					if (kev[i].filter == EVFILT_SIGNAL)
    268 						dosig++;
    269 				}
    270 				if (dosig)
    271 					goto cleanup;
    272 
    273 				/*
    274 				 * ok, activity.  try to read a frame to
    275 				 * determine what happens next.
    276 				 */
    277  activity:
    278 #else /* !USE_KQUEUE */
    279 				struct pollfd pfd[2];
    280 
    281 				pfd[0].fd = clispc.spc_fd;
    282 				pfd[0].events = POLLIN;
    283 				pfd[1].fd = holyfd;
    284 				pfd[1].events = POLLIN;
    285 
    286 				rv = host_poll(pfd, 2, -1);
    287 				if (pfd[1].revents & POLLIN) {
    288 					dosig = 1;
    289 					goto cleanup;
    290 				}
    291 #endif /* !USE_KQUEUE */
    292 
    293 				switch (readframe(spc)) {
    294 				case 0:
    295 					continue;
    296 				case -1:
    297 					imalive = false;
    298 					goto cleanup;
    299 				default:
    300 					/* case 1 */
    301 					break;
    302 				}
    303 
    304 				switch (spc->spc_hdr.rsp_class) {
    305 				case RUMPSP_RESP:
    306 				case RUMPSP_ERROR:
    307 					kickwaiter(spc);
    308 					gotresp = spc->spc_hdr.rsp_reqno ==
    309 					    rw->rw_reqno;
    310 					break;
    311 				case RUMPSP_REQ:
    312 					handlereq(spc);
    313 					break;
    314 				default:
    315 					/* panic */
    316 					break;
    317 				}
    318 			}
    319 
    320  cleanup:
    321 			pthread_mutex_lock(&spc->spc_mtx);
    322 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    323 				kickall(spc);
    324 			spc->spc_istatus = SPCSTATUS_FREE;
    325 
    326 			/* take one for the team */
    327 			if (dosig) {
    328 				pthread_mutex_unlock(&spc->spc_mtx);
    329 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    330 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    331 				pthread_mutex_lock(&spc->spc_mtx);
    332 			}
    333 		} else {
    334 			spc->spc_istatus = SPCSTATUS_WANTED;
    335 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    336 		}
    337 	}
    338 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    339 	pthread_mutex_unlock(&spc->spc_mtx);
    340 	pthread_cond_destroy(&rw->rw_cv);
    341 
    342 	if (spc->spc_generation != mygen || !imalive) {
    343 		return ENOTCONN;
    344 	}
    345 	return rw->rw_error;
    346 }
    347 
    348 static int
    349 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    350 	const void *data, size_t dlen, void **resp)
    351 {
    352 	struct rsp_hdr rhdr;
    353 	struct respwait rw;
    354 	struct iovec iov[2];
    355 	int rv;
    356 
    357 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    358 	rhdr.rsp_class = RUMPSP_REQ;
    359 	rhdr.rsp_type = RUMPSP_SYSCALL;
    360 	rhdr.rsp_sysnum = sysnum;
    361 
    362 	IOVPUT(iov[0], rhdr);
    363 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    364 
    365 	do {
    366 		putwait(spc, &rw, &rhdr);
    367 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    368 			unputwait(spc, &rw);
    369 			continue;
    370 		}
    371 
    372 		rv = cliwaitresp(spc, &rw, omask, false);
    373 		if (rv == ENOTCONN)
    374 			rv = EAGAIN;
    375 	} while (rv == EAGAIN);
    376 
    377 	*resp = rw.rw_data;
    378 	return rv;
    379 }
    380 
    381 static int
    382 handshake_req(struct spclient *spc, int type, void *data,
    383 	int cancel, bool haslock)
    384 {
    385 	struct handshake_fork rf;
    386 	const char *myprogname = NULL; /* XXXgcc */
    387 	struct rsp_hdr rhdr;
    388 	struct respwait rw;
    389 	sigset_t omask;
    390 	size_t bonus;
    391 	struct iovec iov[2];
    392 	int rv;
    393 
    394 	if (type == HANDSHAKE_FORK) {
    395 		bonus = sizeof(rf);
    396 	} else {
    397 #ifdef __NetBSD__
    398 		/* would procfs work on NetBSD too? */
    399 		myprogname = getprogname();
    400 #else
    401 		int fd = open("/proc/self/comm", O_RDONLY);
    402 		if (fd == -1) {
    403 			myprogname = "???";
    404 		} else {
    405 			static char commname[128];
    406 
    407 			memset(commname, 0, sizeof(commname));
    408 			if (read(fd, commname, sizeof(commname)) > 0) {
    409 				char *n;
    410 
    411 				n = strrchr(commname, '\n');
    412 				if (n)
    413 					*n = '\0';
    414 				myprogname = commname;
    415 			} else {
    416 				myprogname = "???";
    417 			}
    418 			close(fd);
    419 		}
    420 #endif
    421 		bonus = strlen(myprogname)+1;
    422 	}
    423 
    424 	/* performs server handshake */
    425 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    426 	rhdr.rsp_class = RUMPSP_REQ;
    427 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    428 	rhdr.rsp_handshake = type;
    429 
    430 	IOVPUT(iov[0], rhdr);
    431 
    432 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    433 	if (haslock)
    434 		putwait_locked(spc, &rw, &rhdr);
    435 	else
    436 		putwait(spc, &rw, &rhdr);
    437 	if (type == HANDSHAKE_FORK) {
    438 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    439 		rf.rf_cancel = cancel;
    440 		IOVPUT(iov[1], rf);
    441 	} else {
    442 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
    443 	}
    444 	rv = send_with_recon(spc, iov, __arraycount(iov));
    445 	if (rv || cancel) {
    446 		if (haslock)
    447 			unputwait_locked(spc, &rw);
    448 		else
    449 			unputwait(spc, &rw);
    450 		if (cancel) {
    451 			goto out;
    452 		}
    453 	} else {
    454 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    455 	}
    456 	if (rv)
    457 		goto out;
    458 
    459 	rv = *(int *)rw.rw_data;
    460 	free(rw.rw_data);
    461 
    462  out:
    463 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    464 	return rv;
    465 }
    466 
    467 static int
    468 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    469 {
    470 	struct rsp_hdr rhdr;
    471 	struct respwait rw;
    472 	struct iovec iov[1];
    473 	int rv;
    474 
    475 	rhdr.rsp_len = sizeof(rhdr);
    476 	rhdr.rsp_class = RUMPSP_REQ;
    477 	rhdr.rsp_type = RUMPSP_PREFORK;
    478 	rhdr.rsp_error = 0;
    479 
    480 	IOVPUT(iov[0], rhdr);
    481 
    482 	do {
    483 		putwait(spc, &rw, &rhdr);
    484 		rv = send_with_recon(spc, iov, __arraycount(iov));
    485 		if (rv != 0) {
    486 			unputwait(spc, &rw);
    487 			continue;
    488 		}
    489 
    490 		rv = cliwaitresp(spc, &rw, omask, false);
    491 		if (rv == ENOTCONN)
    492 			rv = EAGAIN;
    493 	} while (rv == EAGAIN);
    494 
    495 	*resp = rw.rw_data;
    496 	return rv;
    497 }
    498 
    499 /*
    500  * prevent response code from deadlocking with reconnect code
    501  */
    502 static int
    503 resp_sendlock(struct spclient *spc)
    504 {
    505 	int rv = 0;
    506 
    507 	pthread_mutex_lock(&spc->spc_mtx);
    508 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    509 		if (__predict_false(spc->spc_reconnecting)) {
    510 			rv = EBUSY;
    511 			goto out;
    512 		}
    513 		spc->spc_ostatus = SPCSTATUS_WANTED;
    514 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    515 	}
    516 	spc->spc_ostatus = SPCSTATUS_BUSY;
    517 
    518  out:
    519 	pthread_mutex_unlock(&spc->spc_mtx);
    520 	return rv;
    521 }
    522 
    523 static void
    524 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    525 	int wantstr)
    526 {
    527 	struct rsp_hdr rhdr;
    528 	struct iovec iov[2];
    529 
    530 	if (wantstr)
    531 		dlen = MIN(dlen, strlen(data)+1);
    532 
    533 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    534 	rhdr.rsp_reqno = reqno;
    535 	rhdr.rsp_class = RUMPSP_RESP;
    536 	rhdr.rsp_type = RUMPSP_COPYIN;
    537 	rhdr.rsp_sysnum = 0;
    538 
    539 	IOVPUT(iov[0], rhdr);
    540 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    541 
    542 	if (resp_sendlock(spc) != 0)
    543 		return;
    544 	(void)SENDIOV(spc, iov);
    545 	sendunlock(spc);
    546 }
    547 
    548 static void
    549 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    550 {
    551 	struct rsp_hdr rhdr;
    552 	struct iovec iov[2];
    553 
    554 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    555 	rhdr.rsp_reqno = reqno;
    556 	rhdr.rsp_class = RUMPSP_RESP;
    557 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    558 	rhdr.rsp_sysnum = 0;
    559 
    560 	IOVPUT(iov[0], rhdr);
    561 	IOVPUT(iov[1], addr);
    562 
    563 	if (resp_sendlock(spc) != 0)
    564 		return;
    565 	(void)SENDIOV(spc, iov);
    566 	sendunlock(spc);
    567 }
    568 
    569 int
    570 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    571 	register_t *retval)
    572 {
    573 	struct rsp_sysresp *resp;
    574 	sigset_t omask;
    575 	void *rdata;
    576 	int rv;
    577 
    578 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    579 
    580 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    581 	    sysnum, data, dlen));
    582 
    583 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    584 	if (rv)
    585 		goto out;
    586 
    587 	resp = rdata;
    588 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    589 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    590 
    591 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    592 	rv = resp->rsys_error;
    593 	free(rdata);
    594 
    595  out:
    596 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    597 	return rv;
    598 }
    599 
    600 static void
    601 handlereq(struct spclient *spc)
    602 {
    603 	struct rsp_copydata *copydata;
    604 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    605 	void *mapaddr;
    606 	size_t maplen;
    607 	int reqtype = spc->spc_hdr.rsp_type;
    608 	int sig;
    609 
    610 	switch (reqtype) {
    611 	case RUMPSP_COPYIN:
    612 	case RUMPSP_COPYINSTR:
    613 		/*LINTED*/
    614 		copydata = (struct rsp_copydata *)spc->spc_buf;
    615 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    616 		    copydata->rcp_addr, copydata->rcp_len));
    617 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    618 		    copydata->rcp_addr, copydata->rcp_len,
    619 		    reqtype == RUMPSP_COPYINSTR);
    620 		break;
    621 	case RUMPSP_COPYOUT:
    622 	case RUMPSP_COPYOUTSTR:
    623 		/*LINTED*/
    624 		copydata = (struct rsp_copydata *)spc->spc_buf;
    625 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    626 		    copydata->rcp_addr, copydata->rcp_len));
    627 		/*LINTED*/
    628 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    629 		    copydata->rcp_len);
    630 		break;
    631 	case RUMPSP_ANONMMAP:
    632 		/*LINTED*/
    633 		maplen = *(size_t *)spc->spc_buf;
    634 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    635 		    MAP_ANON|MAP_PRIVATE, -1, 0);
    636 		if (mapaddr == MAP_FAILED)
    637 			mapaddr = NULL;
    638 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    639 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    640 		break;
    641 	case RUMPSP_RAISE:
    642 		sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
    643 		DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
    644 		raise(sig);
    645 		/*
    646 		 * We most likely have signals blocked, but the signal
    647 		 * will be handled soon enough when we return.
    648 		 */
    649 		break;
    650 	default:
    651 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    652 		abort();
    653 		break;
    654 	}
    655 
    656 	spcfreebuf(spc);
    657 }
    658 
    659 static unsigned ptab_idx;
    660 static struct sockaddr *serv_sa;
    661 
    662 /* dup until we get a "good" fd which does not collide with stdio */
    663 static int
    664 dupgood(int myfd, int mustchange)
    665 {
    666 	int ofds[4];
    667 	int sverrno;
    668 	unsigned int i;
    669 
    670 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    671 		assert(i < __arraycount(ofds));
    672 		ofds[i] = myfd;
    673 		myfd = host_dup(myfd);
    674 		if (mustchange) {
    675 			i--; /* prevent closing old fd */
    676 			mustchange = 0;
    677 		}
    678 	}
    679 
    680 	sverrno = 0;
    681 	if (myfd == -1 && i > 0)
    682 		sverrno = errno;
    683 
    684 	while (i-- > 0) {
    685 		host_close(ofds[i]);
    686 	}
    687 
    688 	if (sverrno)
    689 		errno = sverrno;
    690 
    691 	return myfd;
    692 }
    693 
    694 #if defined(USE_KQUEUE)
    695 
    696 static int
    697 makeholyfd(void)
    698 {
    699 	struct kevent kev[NSIG+1];
    700 	int i, fd;
    701 
    702 	/* setup kqueue, we want all signals and the fd */
    703 	if ((fd = dupgood(host_kqueue(), 0)) == -1) {
    704 		ERRLOG(("rump_sp: cannot setup kqueue"));
    705 		return -1;
    706 	}
    707 
    708 	for (i = 0; i < NSIG; i++) {
    709 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    710 	}
    711 	EV_SET(&kev[NSIG], clispc.spc_fd,
    712 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    713 	if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
    714 		ERRLOG(("rump_sp: kevent() failed"));
    715 		host_close(fd);
    716 		return -1;
    717 	}
    718 
    719 	return fd;
    720 }
    721 
    722 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
    723 
    724 static int
    725 makeholyfd(void)
    726 {
    727 
    728 	return host_signalfd(-1, &fullset, 0);
    729 }
    730 
    731 #else /* !USE_KQUEUE && !USE_SIGNALFD */
    732 
    733 static int
    734 makeholyfd(void)
    735 {
    736 
    737 	return -1;
    738 }
    739 
    740 #endif
    741 
    742 static int
    743 doconnect(void)
    744 {
    745 	struct respwait rw;
    746 	struct rsp_hdr rhdr;
    747 	char banner[MAXBANNER];
    748 	int s, error, flags;
    749 	ssize_t n;
    750 
    751 	if (holyfd != -1)
    752 		host_close(holyfd);
    753 	holyfd = -1;
    754 	s = -1;
    755 
    756 	if (clispc.spc_fd != -1)
    757 		host_close(clispc.spc_fd);
    758 	clispc.spc_fd = -1;
    759 
    760 	/*
    761 	 * for reconnect, gate everyone out of the receiver code
    762 	 */
    763 	putwait_locked(&clispc, &rw, &rhdr);
    764 
    765 	pthread_mutex_lock(&clispc.spc_mtx);
    766 	clispc.spc_reconnecting = 1;
    767 	pthread_cond_broadcast(&clispc.spc_cv);
    768 	clispc.spc_generation++;
    769 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    770 		clispc.spc_istatus = SPCSTATUS_WANTED;
    771 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    772 	}
    773 	kickall(&clispc);
    774 
    775 	/*
    776 	 * we can release it already since we hold the
    777 	 * send lock during reconnect
    778 	 * XXX: assert it
    779 	 */
    780 	clispc.spc_istatus = SPCSTATUS_FREE;
    781 	pthread_mutex_unlock(&clispc.spc_mtx);
    782 	unputwait_locked(&clispc, &rw);
    783 
    784 	free(clispc.spc_buf);
    785 	clispc.spc_off = 0;
    786 
    787 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    788 	if (s == -1)
    789 		return -1;
    790 
    791 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
    792 		if (errno == EINTR)
    793 			continue;
    794 		ERRLOG(("rump_sp: client connect failed: %s\n",
    795 		    strerror(errno)));
    796 		return -1;
    797 	}
    798 
    799 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    800 		ERRLOG(("rump_sp: connect hook failed\n"));
    801 		return -1;
    802 	}
    803 
    804 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
    805 		ERRLOG(("rump_sp: failed to read banner\n"));
    806 		return -1;
    807 	}
    808 
    809 	if (banner[n-1] != '\n') {
    810 		ERRLOG(("rump_sp: invalid banner\n"));
    811 		return -1;
    812 	}
    813 	banner[n] = '\0';
    814 	/* XXX parse the banner some day */
    815 
    816 	flags = host_fcntl(s, F_GETFL, 0);
    817 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    818 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
    819 		return -1;
    820 	}
    821 	clispc.spc_fd = s;
    822 	clispc.spc_state = SPCSTATE_RUNNING;
    823 	clispc.spc_reconnecting = 0;
    824 	holyfd = makeholyfd();
    825 
    826 	return 0;
    827 }
    828 
    829 static int
    830 doinit(void)
    831 {
    832 
    833 	TAILQ_INIT(&clispc.spc_respwait);
    834 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    835 	pthread_cond_init(&clispc.spc_cv, NULL);
    836 
    837 	return 0;
    838 }
    839 
    840 #ifdef RTLD_NEXT
    841 void *rumpclient__dlsym(void *, const char *);
    842 void *
    843 rumpclient__dlsym(void *handle, const char *symbol)
    844 {
    845 
    846 	return dlsym(handle, symbol);
    847 }
    848 void *rumphijack_dlsym(void *, const char *)
    849     __attribute__((__weak__, alias("rumpclient__dlsym")));
    850 #endif
    851 
    852 static pid_t init_done = 0;
    853 
    854 int
    855 rumpclient_init(void)
    856 {
    857 	char *p;
    858 	int error;
    859 	int rv = -1;
    860 	int hstype;
    861 	pid_t mypid;
    862 
    863 	/*
    864 	 * Make sure we're not riding the context of a previous
    865 	 * host fork.  Note: it's *possible* that after n>1 forks
    866 	 * we have the same pid as one of our exited parents, but
    867 	 * I'm pretty sure there are 0 practical implications, since
    868 	 * it means generations would have to skip rumpclient init.
    869 	 */
    870 	if (init_done == (mypid = getpid()))
    871 		return 0;
    872 
    873 	/* kq does not traverse fork() */
    874 #ifdef USE_KQUEUE
    875 	if (init_done != 0)
    876 		holyfd = -1;
    877 #endif
    878 	init_done = mypid;
    879 
    880 	sigfillset(&fullset);
    881 
    882 	/*
    883 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
    884 	 * wann wird man je verstehen?  wann wird man je verstehen?
    885 	 */
    886 #ifdef RTLD_NEXT
    887 #define FINDSYM2(_name_,_syscall_)					\
    888 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    889 	    #_syscall_)) == NULL) {					\
    890 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    891 			host_##_name_ = _name_; /* static fallback */	\
    892 		if (host_##_name_ == NULL) {				\
    893 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
    894 			    dlerror());					\
    895 			exit(1);					\
    896 		}							\
    897 	}
    898 #else
    899 #define FINDSYM2(_name_,_syscall)					\
    900 	host_##_name_ = _name_;
    901 #endif
    902 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    903 #ifdef __NetBSD__
    904 	FINDSYM2(socket,__socket30)
    905 #else
    906 	FINDSYM(socket)
    907 #endif
    908 
    909 	FINDSYM(close)
    910 	FINDSYM(connect)
    911 	FINDSYM(fcntl)
    912 	FINDSYM(poll)
    913 	FINDSYM(read)
    914 	FINDSYM(sendmsg)
    915 	FINDSYM(setsockopt)
    916 	FINDSYM(dup)
    917 	FINDSYM(execve)
    918 
    919 #ifdef USE_KQUEUE
    920 	FINDSYM(kqueue)
    921 #if !__NetBSD_Prereq__(5,99,7)
    922 	FINDSYM(kevent)
    923 #else
    924 	FINDSYM2(kevent,_sys___kevent50)
    925 #endif
    926 #endif /* USE_KQUEUE */
    927 
    928 #ifdef USE_SIGNALFD
    929 	FINDSYM(signalfd)
    930 #endif
    931 
    932 #undef	FINDSYM
    933 #undef	FINDSY2
    934 
    935 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    936 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    937 			fprintf(stderr, "error: RUMP_SERVER not set\n");
    938 			errno = ENOENT;
    939 			goto out;
    940 		}
    941 	}
    942 
    943 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    944 		errno = error;
    945 		goto out;
    946 	}
    947 
    948 	if (doinit() == -1)
    949 		goto out;
    950 
    951 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    952 		sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
    953 		unsetenv("RUMPCLIENT__EXECFD");
    954 		hstype = HANDSHAKE_EXEC;
    955 	} else {
    956 		if (doconnect() == -1)
    957 			goto out;
    958 		hstype = HANDSHAKE_GUEST;
    959 	}
    960 
    961 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    962 	if (error) {
    963 		pthread_mutex_destroy(&clispc.spc_mtx);
    964 		pthread_cond_destroy(&clispc.spc_cv);
    965 		if (clispc.spc_fd != -1)
    966 			host_close(clispc.spc_fd);
    967 		errno = error;
    968 		goto out;
    969 	}
    970 	rv = 0;
    971 
    972  out:
    973 	if (rv == -1)
    974 		init_done = 0;
    975 	return rv;
    976 }
    977 
    978 struct rumpclient_fork {
    979 	uint32_t fork_auth[AUTHLEN];
    980 	struct spclient fork_spc;
    981 	int fork_holyfd;
    982 };
    983 
    984 struct rumpclient_fork *
    985 rumpclient_prefork(void)
    986 {
    987 	struct rumpclient_fork *rpf;
    988 	sigset_t omask;
    989 	void *resp;
    990 	int rv;
    991 
    992 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    993 	rpf = malloc(sizeof(*rpf));
    994 	if (rpf == NULL)
    995 		goto out;
    996 
    997 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    998 		free(rpf);
    999 		errno = rv;
   1000 		rpf = NULL;
   1001 		goto out;
   1002 	}
   1003 
   1004 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
   1005 	free(resp);
   1006 
   1007 	rpf->fork_spc = clispc;
   1008 	rpf->fork_holyfd = holyfd;
   1009 
   1010  out:
   1011 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
   1012 	return rpf;
   1013 }
   1014 
   1015 int
   1016 rumpclient_fork_init(struct rumpclient_fork *rpf)
   1017 {
   1018 	int error;
   1019 	int osock;
   1020 
   1021 	osock = clispc.spc_fd;
   1022 	memset(&clispc, 0, sizeof(clispc));
   1023 	clispc.spc_fd = osock;
   1024 
   1025 #ifdef USE_KQUEUE
   1026 	holyfd = -1; /* kqueue descriptor is not copied over fork() */
   1027 #else
   1028 	if (holyfd != -1) {
   1029 		host_close(holyfd);
   1030 		holyfd = -1;
   1031 	}
   1032 #endif
   1033 
   1034 	if (doinit() == -1)
   1035 		return -1;
   1036 	if (doconnect() == -1)
   1037 		return -1;
   1038 
   1039 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
   1040 	    0, false);
   1041 	if (error) {
   1042 		pthread_mutex_destroy(&clispc.spc_mtx);
   1043 		pthread_cond_destroy(&clispc.spc_cv);
   1044 		errno = error;
   1045 		return -1;
   1046 	}
   1047 
   1048 	return 0;
   1049 }
   1050 
   1051 /*ARGSUSED*/
   1052 void
   1053 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
   1054 {
   1055 
   1056 	/* EUNIMPL */
   1057 }
   1058 
   1059 void
   1060 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
   1061 {
   1062 
   1063 	clispc = rpf->fork_spc;
   1064 	holyfd = rpf->fork_holyfd;
   1065 }
   1066 
   1067 void
   1068 rumpclient_setconnretry(time_t timeout)
   1069 {
   1070 
   1071 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
   1072 		return; /* gigo */
   1073 
   1074 	retrytimo = timeout;
   1075 }
   1076 
   1077 int
   1078 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
   1079 {
   1080 	int fd = *fdp;
   1081 	int untilfd, rv;
   1082 	int newfd;
   1083 
   1084 	switch (variant) {
   1085 	case RUMPCLIENT_CLOSE_FCLOSEM:
   1086 		untilfd = MAX(clispc.spc_fd, holyfd);
   1087 		for (; fd <= untilfd; fd++) {
   1088 			if (fd == clispc.spc_fd || fd == holyfd)
   1089 				continue;
   1090 			rv = host_close(fd);
   1091 			if (rv == -1)
   1092 				return -1;
   1093 		}
   1094 		*fdp = fd;
   1095 		break;
   1096 
   1097 	case RUMPCLIENT_CLOSE_CLOSE:
   1098 	case RUMPCLIENT_CLOSE_DUP2:
   1099 		if (fd == clispc.spc_fd) {
   1100 			newfd = dupgood(clispc.spc_fd, 1);
   1101 			if (newfd == -1)
   1102 				return -1;
   1103 
   1104 #ifdef USE_KQUEUE
   1105 			{
   1106 			struct kevent kev[2];
   1107 
   1108 			/*
   1109 			 * now, we have a new socket number, so change
   1110 			 * the file descriptor that kqueue is
   1111 			 * monitoring.  remove old and add new.
   1112 			 */
   1113 			EV_SET(&kev[0], clispc.spc_fd,
   1114 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
   1115 			EV_SET(&kev[1], newfd,
   1116 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
   1117 			if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
   1118 				int sverrno = errno;
   1119 				host_close(newfd);
   1120 				errno = sverrno;
   1121 				return -1;
   1122 			}}
   1123 #endif /* !USE_KQUEUE */
   1124 			clispc.spc_fd = newfd;
   1125 		}
   1126 		if (holyfd != -1 && fd == holyfd) {
   1127 			newfd = dupgood(holyfd, 1);
   1128 			if (newfd == -1)
   1129 				return -1;
   1130 			holyfd = newfd;
   1131 		}
   1132 		break;
   1133 	}
   1134 
   1135 	return 0;
   1136 }
   1137 
   1138 pid_t
   1139 rumpclient_fork(void)
   1140 {
   1141 
   1142 	return rumpclient__dofork(fork);
   1143 }
   1144 
   1145 /*
   1146  * Process is about to exec.  Save info about our existing connection
   1147  * in the env.  rumpclient will check for this info in init().
   1148  * This is mostly for the benefit of rumphijack, but regular applications
   1149  * may use it as well.
   1150  */
   1151 int
   1152 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1153 {
   1154 	char buf[4096];
   1155 	char **newenv;
   1156 	char *envstr, *envstr2;
   1157 	size_t nelem;
   1158 	int rv, sverrno;
   1159 
   1160 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1161 	    clispc.spc_fd, holyfd);
   1162 	envstr = malloc(strlen(buf)+1);
   1163 	if (envstr == NULL) {
   1164 		return ENOMEM;
   1165 	}
   1166 	strcpy(envstr, buf);
   1167 
   1168 	/* do we have a fully parsed url we want to forward in the env? */
   1169 	if (*parsedurl != '\0') {
   1170 		snprintf(buf, sizeof(buf),
   1171 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1172 		envstr2 = malloc(strlen(buf)+1);
   1173 		if (envstr2 == NULL) {
   1174 			free(envstr);
   1175 			return ENOMEM;
   1176 		}
   1177 		strcpy(envstr2, buf);
   1178 	} else {
   1179 		envstr2 = NULL;
   1180 	}
   1181 
   1182 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1183 		continue;
   1184 
   1185 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1186 	if (newenv == NULL) {
   1187 		free(envstr2);
   1188 		free(envstr);
   1189 		return ENOMEM;
   1190 	}
   1191 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1192 
   1193 	newenv[nelem] = envstr;
   1194 	newenv[nelem+1] = envstr2;
   1195 	newenv[nelem+2] = NULL;
   1196 
   1197 	rv = host_execve(path, argv, newenv);
   1198 
   1199 	_DIAGASSERT(rv != 0);
   1200 	sverrno = errno;
   1201 	free(envstr2);
   1202 	free(envstr);
   1203 	free(newenv);
   1204 	errno = sverrno;
   1205 	return rv;
   1206 }
   1207 
   1208 /*
   1209  * daemon() is handwritten for the benefit of platforms which
   1210  * do not support daemon().
   1211  */
   1212 int
   1213 rumpclient_daemon(int nochdir, int noclose)
   1214 {
   1215 	struct rumpclient_fork *rf;
   1216 	int sverrno;
   1217 
   1218 	if ((rf = rumpclient_prefork()) == NULL)
   1219 		return -1;
   1220 
   1221 	switch (fork()) {
   1222 	case 0:
   1223 		break;
   1224 	case -1:
   1225 		goto daemonerr;
   1226 	default:
   1227 		_exit(0);
   1228 	}
   1229 
   1230 	if (setsid() == -1)
   1231 		goto daemonerr;
   1232 	if (!nochdir && chdir("/") == -1)
   1233 		goto daemonerr;
   1234 	if (!noclose) {
   1235 		int fd = open("/dev/null", O_RDWR);
   1236 		dup2(fd, 0);
   1237 		dup2(fd, 1);
   1238 		dup2(fd, 2);
   1239 		if (fd > 2)
   1240 			close(fd);
   1241 	}
   1242 
   1243 	/* note: fork is either completed or cancelled by the call */
   1244 	if (rumpclient_fork_init(rf) == -1)
   1245 		return -1;
   1246 
   1247 	return 0;
   1248 
   1249  daemonerr:
   1250 	sverrno = errno;
   1251 	rumpclient_fork_cancel(rf);
   1252 	errno = sverrno;
   1253 	return -1;
   1254 }
   1255