Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.63
      1 /*      $NetBSD: rumpclient.c,v 1.63 2014/12/08 01:10:07 justin Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <rump/rumpuser_port.h>
     33 
     34 /*
     35  * We use kqueue on NetBSD and FreeBSD, poll elsewhere.  We could
     36  * use kqueue on other BSD's too, but I haven't tested those.  We
     37  * want to use kqueue because it will give us the ability to get signal
     38  * notifications but defer their handling to a stage where we do not
     39  * hold the communication lock.  Taking a signal while holding on to
     40  * that lock may cause a deadlock.  Therefore, block signals throughout
     41  * the RPC when using poll.  On Linux, we use signalfd in the same role
     42  * as kqueue on NetBSD to be able to take signals while waiting for a
     43  * response from the server.
     44  */
     45 
     46 #if defined(__NetBSD__) || defined(__FreeBSD__)
     47 #define USE_KQUEUE
     48 #endif
     49 #if defined(__linux__) && !defined(__ANDROID__)
     50 #define USE_SIGNALFD
     51 #endif
     52 
     53 __RCSID("$NetBSD: rumpclient.c,v 1.63 2014/12/08 01:10:07 justin Exp $");
     54 
     55 #include <sys/param.h>
     56 #include <sys/mman.h>
     57 #include <sys/socket.h>
     58 #include <sys/time.h>
     59 
     60 #ifdef USE_KQUEUE
     61 #include <sys/event.h>
     62 #endif
     63 
     64 #include <arpa/inet.h>
     65 #include <netinet/in.h>
     66 #include <netinet/tcp.h>
     67 
     68 #include <assert.h>
     69 #include <dlfcn.h>
     70 #include <errno.h>
     71 #include <fcntl.h>
     72 #include <poll.h>
     73 #include <pthread.h>
     74 #include <signal.h>
     75 #include <stdarg.h>
     76 #include <stdbool.h>
     77 #include <stdio.h>
     78 #include <stdlib.h>
     79 #include <string.h>
     80 #include <unistd.h>
     81 
     82 #include <rump/rumpclient.h>
     83 
     84 #define HOSTOPS
     85 int	(*host_socket)(int, int, int);
     86 int	(*host_close)(int);
     87 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     88 int	(*host_fcntl)(int, int, ...);
     89 #ifdef __ANDROID__
     90 int	(*host_poll)(struct pollfd *, nfds_t, long);
     91 #else
     92 int	(*host_poll)(struct pollfd *, nfds_t, int);
     93 #endif
     94 ssize_t	(*host_read)(int, void *, size_t);
     95 #ifdef __ANDROID__
     96 int	(*host_sendmsg)(int, const struct msghdr *, unsigned int);
     97 #else
     98 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     99 #endif
    100 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
    101 int	(*host_dup)(int);
    102 
    103 #ifdef USE_KQUEUE
    104 int	(*host_kqueue)(void);
    105 #ifdef __NetBSD__
    106 int	(*host_kevent)(int, const struct kevent *, size_t,
    107 		       struct kevent *, size_t, const struct timespec *);
    108 #else
    109 int	(*host_kevent)(int, const struct kevent *, int,
    110 		       struct kevent *, int, const struct timespec *);
    111 #endif
    112 #endif
    113 
    114 #ifdef USE_SIGNALFD
    115 #include <sys/signalfd.h>
    116 
    117 int	(*host_signalfd)(int, const sigset_t *, int);
    118 #endif
    119 
    120 int	(*host_execve)(const char *, char *const[], char *const[]);
    121 
    122 #include "sp_common.c"
    123 #include "rumpuser_sigtrans.c"
    124 
    125 static struct spclient clispc = {
    126 	.spc_fd = -1,
    127 };
    128 
    129 static int holyfd = -1;
    130 static sigset_t fullset;
    131 
    132 static int doconnect(void);
    133 static int handshake_req(struct spclient *, int, void *, int, bool);
    134 
    135 /*
    136  * Default: don't retry.  Most clients can't handle it
    137  * (consider e.g. fds suddenly going missing).
    138  */
    139 static time_t retrytimo = 0;
    140 
    141 /* always defined to nothingness for now */
    142 #define ERRLOG(a)
    143 
    144 static int
    145 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
    146 {
    147 	struct timeval starttime, curtime;
    148 	time_t prevreconmsg;
    149 	unsigned reconretries;
    150 	int rv;
    151 
    152 	for (prevreconmsg = 0, reconretries = 0;;) {
    153 		rv = dosend(spc, iov, iovlen);
    154 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    155 			/* no persistent connections */
    156 			if (retrytimo == 0) {
    157 				rv = ENOTCONN;
    158 				break;
    159 			}
    160 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    161 				_exit(1);
    162 
    163 			if (!prevreconmsg) {
    164 				prevreconmsg = time(NULL);
    165 				gettimeofday(&starttime, NULL);
    166 			}
    167 			if (reconretries == 1) {
    168 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    169 					rv = ENOTCONN;
    170 					break;
    171 				}
    172 				fprintf(stderr, "rump_sp: connection to "
    173 				    "kernel lost, trying to reconnect ...\n");
    174 			} else if (time(NULL) - prevreconmsg > 120) {
    175 				fprintf(stderr, "rump_sp: still trying to "
    176 				    "reconnect ...\n");
    177 				prevreconmsg = time(NULL);
    178 			}
    179 
    180 			/* check that we aren't over the limit */
    181 			if (retrytimo > 0) {
    182 				time_t tdiff;
    183 
    184 				gettimeofday(&curtime, NULL);
    185 				tdiff = curtime.tv_sec - starttime.tv_sec;
    186 				if (starttime.tv_usec > curtime.tv_usec)
    187 					tdiff--;
    188 				if (tdiff >= retrytimo) {
    189 					fprintf(stderr, "rump_sp: reconnect "
    190 					    "failed, %lld second timeout\n",
    191 					    (long long)retrytimo);
    192 					return ENOTCONN;
    193 				}
    194 			}
    195 
    196 			/* adhoc backoff timer */
    197 			if (reconretries < 10) {
    198 				usleep(100000 * reconretries);
    199 			} else {
    200 				sleep(MIN(10, reconretries-9));
    201 			}
    202 			reconretries++;
    203 
    204 			if ((rv = doconnect()) != 0)
    205 				continue;
    206 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    207 			    NULL, 0, true)) != 0)
    208 				continue;
    209 
    210 			/*
    211 			 * ok, reconnect succesful.  we need to return to
    212 			 * the upper layer to get the entire PDU resent.
    213 			 */
    214 			if (reconretries != 1)
    215 				fprintf(stderr, "rump_sp: reconnected!\n");
    216 			rv = EAGAIN;
    217 			break;
    218 		} else {
    219 			_DIAGASSERT(errno != EAGAIN);
    220 			break;
    221 		}
    222 	}
    223 
    224 	return rv;
    225 }
    226 
    227 static int
    228 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    229 	bool keeplock)
    230 {
    231 	uint64_t mygen;
    232 	bool imalive = true;
    233 
    234 	pthread_mutex_lock(&spc->spc_mtx);
    235 	if (!keeplock)
    236 		sendunlockl(spc);
    237 	mygen = spc->spc_generation;
    238 
    239 	rw->rw_error = 0;
    240 	while (!rw->rw_done && rw->rw_error == 0) {
    241 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    242 			break;
    243 
    244 		/* are we free to receive? */
    245 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    246 			int gotresp, dosig, rv;
    247 
    248 			spc->spc_istatus = SPCSTATUS_BUSY;
    249 			pthread_mutex_unlock(&spc->spc_mtx);
    250 
    251 			dosig = 0;
    252 			for (gotresp = 0; !gotresp; ) {
    253 #ifdef USE_KQUEUE
    254 				struct kevent kev[8];
    255 				int i;
    256 
    257 				/*
    258 				 * typically we don't have a frame waiting
    259 				 * when we come in here, so call kevent now
    260 				 */
    261 				rv = host_kevent(holyfd, NULL, 0,
    262 				    kev, __arraycount(kev), NULL);
    263 
    264 				if (__predict_false(rv == -1)) {
    265 					goto activity;
    266 				}
    267 
    268 				/*
    269 				 * XXX: don't know how this can happen
    270 				 * (timeout cannot expire since there
    271 				 * isn't one), but it does happen.
    272 				 * treat it as an expectional condition
    273 				 * and go through tryread to determine
    274 				 * alive status.
    275 				 */
    276 				if (__predict_false(rv == 0))
    277 					goto activity;
    278 
    279 				for (i = 0; i < rv; i++) {
    280 					if (kev[i].filter == EVFILT_SIGNAL)
    281 						dosig++;
    282 				}
    283 				if (dosig)
    284 					goto cleanup;
    285 
    286 				/*
    287 				 * ok, activity.  try to read a frame to
    288 				 * determine what happens next.
    289 				 */
    290  activity:
    291 #else /* !USE_KQUEUE */
    292 				struct pollfd pfd[2];
    293 
    294 				pfd[0].fd = clispc.spc_fd;
    295 				pfd[0].events = POLLIN;
    296 				pfd[1].fd = holyfd;
    297 				pfd[1].events = POLLIN;
    298 
    299 				rv = host_poll(pfd, 2, -1);
    300 				if (rv >= 1 && pfd[1].revents & POLLIN) {
    301 					dosig = 1;
    302 					goto cleanup;
    303 				}
    304 #endif /* !USE_KQUEUE */
    305 
    306 				switch (readframe(spc)) {
    307 				case 0:
    308 					continue;
    309 				case -1:
    310 					imalive = false;
    311 					goto cleanup;
    312 				default:
    313 					/* case 1 */
    314 					break;
    315 				}
    316 
    317 				switch (spc->spc_hdr.rsp_class) {
    318 				case RUMPSP_RESP:
    319 				case RUMPSP_ERROR:
    320 					kickwaiter(spc);
    321 					gotresp = spc->spc_hdr.rsp_reqno ==
    322 					    rw->rw_reqno;
    323 					break;
    324 				case RUMPSP_REQ:
    325 					handlereq(spc);
    326 					break;
    327 				default:
    328 					/* panic */
    329 					break;
    330 				}
    331 			}
    332 
    333  cleanup:
    334 			pthread_mutex_lock(&spc->spc_mtx);
    335 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    336 				kickall(spc);
    337 			spc->spc_istatus = SPCSTATUS_FREE;
    338 
    339 			/* take one for the team */
    340 			if (dosig) {
    341 				pthread_mutex_unlock(&spc->spc_mtx);
    342 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    343 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    344 				pthread_mutex_lock(&spc->spc_mtx);
    345 			}
    346 		} else {
    347 			spc->spc_istatus = SPCSTATUS_WANTED;
    348 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    349 		}
    350 	}
    351 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    352 	pthread_mutex_unlock(&spc->spc_mtx);
    353 	pthread_cond_destroy(&rw->rw_cv);
    354 
    355 	if (spc->spc_generation != mygen || !imalive) {
    356 		return ENOTCONN;
    357 	}
    358 	return rw->rw_error;
    359 }
    360 
    361 static int
    362 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    363 	const void *data, size_t dlen, void **resp)
    364 {
    365 	struct rsp_hdr rhdr;
    366 	struct respwait rw;
    367 	struct iovec iov[2];
    368 	int rv;
    369 
    370 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    371 	rhdr.rsp_class = RUMPSP_REQ;
    372 	rhdr.rsp_type = RUMPSP_SYSCALL;
    373 	rhdr.rsp_sysnum = sysnum;
    374 
    375 	IOVPUT(iov[0], rhdr);
    376 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    377 
    378 	do {
    379 		putwait(spc, &rw, &rhdr);
    380 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    381 			unputwait(spc, &rw);
    382 			continue;
    383 		}
    384 
    385 		rv = cliwaitresp(spc, &rw, omask, false);
    386 		if (rv == ENOTCONN)
    387 			rv = EAGAIN;
    388 	} while (rv == EAGAIN);
    389 
    390 	*resp = rw.rw_data;
    391 	return rv;
    392 }
    393 
    394 static int
    395 handshake_req(struct spclient *spc, int type, void *data,
    396 	int cancel, bool haslock)
    397 {
    398 	struct handshake_fork rf;
    399 	const char *myprogname = NULL; /* XXXgcc */
    400 	struct rsp_hdr rhdr;
    401 	struct respwait rw;
    402 	sigset_t omask;
    403 	size_t bonus;
    404 	struct iovec iov[2];
    405 	int rv;
    406 
    407 	if (type == HANDSHAKE_FORK) {
    408 		bonus = sizeof(rf);
    409 	} else {
    410 #ifdef __NetBSD__
    411 		/* would procfs work on NetBSD too? */
    412 		myprogname = getprogname();
    413 #else
    414 		int fd = open("/proc/self/comm", O_RDONLY);
    415 		if (fd == -1) {
    416 			myprogname = "???";
    417 		} else {
    418 			static char commname[128];
    419 
    420 			memset(commname, 0, sizeof(commname));
    421 			if (read(fd, commname, sizeof(commname)) > 0) {
    422 				char *n;
    423 
    424 				n = strrchr(commname, '\n');
    425 				if (n)
    426 					*n = '\0';
    427 				myprogname = commname;
    428 			} else {
    429 				myprogname = "???";
    430 			}
    431 			close(fd);
    432 		}
    433 #endif
    434 		bonus = strlen(myprogname)+1;
    435 	}
    436 
    437 	/* performs server handshake */
    438 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    439 	rhdr.rsp_class = RUMPSP_REQ;
    440 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    441 	rhdr.rsp_handshake = type;
    442 
    443 	IOVPUT(iov[0], rhdr);
    444 
    445 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    446 	if (haslock)
    447 		putwait_locked(spc, &rw, &rhdr);
    448 	else
    449 		putwait(spc, &rw, &rhdr);
    450 	if (type == HANDSHAKE_FORK) {
    451 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    452 		rf.rf_cancel = cancel;
    453 		IOVPUT(iov[1], rf);
    454 	} else {
    455 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
    456 	}
    457 	rv = send_with_recon(spc, iov, __arraycount(iov));
    458 	if (rv || cancel) {
    459 		if (haslock)
    460 			unputwait_locked(spc, &rw);
    461 		else
    462 			unputwait(spc, &rw);
    463 		if (cancel) {
    464 			goto out;
    465 		}
    466 	} else {
    467 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    468 	}
    469 	if (rv)
    470 		goto out;
    471 
    472 	rv = *(int *)rw.rw_data;
    473 	free(rw.rw_data);
    474 
    475  out:
    476 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    477 	return rv;
    478 }
    479 
    480 static int
    481 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    482 {
    483 	struct rsp_hdr rhdr;
    484 	struct respwait rw;
    485 	struct iovec iov[1];
    486 	int rv;
    487 
    488 	rhdr.rsp_len = sizeof(rhdr);
    489 	rhdr.rsp_class = RUMPSP_REQ;
    490 	rhdr.rsp_type = RUMPSP_PREFORK;
    491 	rhdr.rsp_error = 0;
    492 
    493 	IOVPUT(iov[0], rhdr);
    494 
    495 	do {
    496 		putwait(spc, &rw, &rhdr);
    497 		rv = send_with_recon(spc, iov, __arraycount(iov));
    498 		if (rv != 0) {
    499 			unputwait(spc, &rw);
    500 			continue;
    501 		}
    502 
    503 		rv = cliwaitresp(spc, &rw, omask, false);
    504 		if (rv == ENOTCONN)
    505 			rv = EAGAIN;
    506 	} while (rv == EAGAIN);
    507 
    508 	*resp = rw.rw_data;
    509 	return rv;
    510 }
    511 
    512 /*
    513  * prevent response code from deadlocking with reconnect code
    514  */
    515 static int
    516 resp_sendlock(struct spclient *spc)
    517 {
    518 	int rv = 0;
    519 
    520 	pthread_mutex_lock(&spc->spc_mtx);
    521 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    522 		if (__predict_false(spc->spc_reconnecting)) {
    523 			rv = EBUSY;
    524 			goto out;
    525 		}
    526 		spc->spc_ostatus = SPCSTATUS_WANTED;
    527 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    528 	}
    529 	spc->spc_ostatus = SPCSTATUS_BUSY;
    530 
    531  out:
    532 	pthread_mutex_unlock(&spc->spc_mtx);
    533 	return rv;
    534 }
    535 
    536 static void
    537 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    538 	int wantstr)
    539 {
    540 	struct rsp_hdr rhdr;
    541 	struct iovec iov[2];
    542 
    543 	if (wantstr)
    544 		dlen = MIN(dlen, strlen(data)+1);
    545 
    546 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    547 	rhdr.rsp_reqno = reqno;
    548 	rhdr.rsp_class = RUMPSP_RESP;
    549 	rhdr.rsp_type = RUMPSP_COPYIN;
    550 	rhdr.rsp_sysnum = 0;
    551 
    552 	IOVPUT(iov[0], rhdr);
    553 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    554 
    555 	if (resp_sendlock(spc) != 0)
    556 		return;
    557 	(void)SENDIOV(spc, iov);
    558 	sendunlock(spc);
    559 }
    560 
    561 static void
    562 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    563 {
    564 	struct rsp_hdr rhdr;
    565 	struct iovec iov[2];
    566 
    567 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    568 	rhdr.rsp_reqno = reqno;
    569 	rhdr.rsp_class = RUMPSP_RESP;
    570 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    571 	rhdr.rsp_sysnum = 0;
    572 
    573 	IOVPUT(iov[0], rhdr);
    574 	IOVPUT(iov[1], addr);
    575 
    576 	if (resp_sendlock(spc) != 0)
    577 		return;
    578 	(void)SENDIOV(spc, iov);
    579 	sendunlock(spc);
    580 }
    581 
    582 int
    583 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    584 	register_t *retval)
    585 {
    586 	struct rsp_sysresp *resp;
    587 	sigset_t omask;
    588 	void *rdata;
    589 	int rv;
    590 
    591 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    592 
    593 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    594 	    sysnum, data, dlen));
    595 
    596 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    597 	if (rv)
    598 		goto out;
    599 
    600 	resp = rdata;
    601 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    602 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    603 
    604 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    605 	rv = resp->rsys_error;
    606 	free(rdata);
    607 
    608  out:
    609 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    610 	return rv;
    611 }
    612 
    613 static void
    614 handlereq(struct spclient *spc)
    615 {
    616 	struct rsp_copydata *copydata;
    617 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    618 	void *mapaddr;
    619 	size_t maplen;
    620 	int reqtype = spc->spc_hdr.rsp_type;
    621 	int sig;
    622 
    623 	switch (reqtype) {
    624 	case RUMPSP_COPYIN:
    625 	case RUMPSP_COPYINSTR:
    626 		/*LINTED*/
    627 		copydata = (struct rsp_copydata *)spc->spc_buf;
    628 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    629 		    copydata->rcp_addr, copydata->rcp_len));
    630 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    631 		    copydata->rcp_addr, copydata->rcp_len,
    632 		    reqtype == RUMPSP_COPYINSTR);
    633 		break;
    634 	case RUMPSP_COPYOUT:
    635 	case RUMPSP_COPYOUTSTR:
    636 		/*LINTED*/
    637 		copydata = (struct rsp_copydata *)spc->spc_buf;
    638 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    639 		    copydata->rcp_addr, copydata->rcp_len));
    640 		/*LINTED*/
    641 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    642 		    copydata->rcp_len);
    643 		break;
    644 	case RUMPSP_ANONMMAP:
    645 		/*LINTED*/
    646 		maplen = *(size_t *)spc->spc_buf;
    647 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    648 		    MAP_ANON|MAP_PRIVATE, -1, 0);
    649 		if (mapaddr == MAP_FAILED)
    650 			mapaddr = NULL;
    651 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    652 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    653 		break;
    654 	case RUMPSP_RAISE:
    655 		sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
    656 		DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
    657 		raise(sig);
    658 		/*
    659 		 * We most likely have signals blocked, but the signal
    660 		 * will be handled soon enough when we return.
    661 		 */
    662 		break;
    663 	default:
    664 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    665 		abort();
    666 		break;
    667 	}
    668 
    669 	spcfreebuf(spc);
    670 }
    671 
    672 static unsigned ptab_idx;
    673 static struct sockaddr *serv_sa;
    674 
    675 /* dup until we get a "good" fd which does not collide with stdio */
    676 static int
    677 dupgood(int myfd, int mustchange)
    678 {
    679 	int ofds[4];
    680 	int sverrno;
    681 	unsigned int i;
    682 
    683 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    684 		assert(i < __arraycount(ofds));
    685 		ofds[i] = myfd;
    686 		myfd = host_dup(myfd);
    687 		if (mustchange) {
    688 			i--; /* prevent closing old fd */
    689 			mustchange = 0;
    690 		}
    691 	}
    692 
    693 	sverrno = 0;
    694 	if (myfd == -1 && i > 0)
    695 		sverrno = errno;
    696 
    697 	while (i-- > 0) {
    698 		host_close(ofds[i]);
    699 	}
    700 
    701 	if (sverrno)
    702 		errno = sverrno;
    703 
    704 	return myfd;
    705 }
    706 
    707 #if defined(USE_KQUEUE)
    708 
    709 static int
    710 makeholyfd(void)
    711 {
    712 	struct kevent kev[NSIG+1];
    713 	int i, fd;
    714 
    715 	/* setup kqueue, we want all signals and the fd */
    716 	if ((fd = dupgood(host_kqueue(), 0)) == -1) {
    717 		ERRLOG(("rump_sp: cannot setup kqueue"));
    718 		return -1;
    719 	}
    720 
    721 	for (i = 0; i < NSIG; i++) {
    722 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    723 	}
    724 	EV_SET(&kev[NSIG], clispc.spc_fd,
    725 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    726 	if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
    727 		ERRLOG(("rump_sp: kevent() failed"));
    728 		host_close(fd);
    729 		return -1;
    730 	}
    731 
    732 	return fd;
    733 }
    734 
    735 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
    736 
    737 static int
    738 makeholyfd(void)
    739 {
    740 
    741 	return host_signalfd(-1, &fullset, 0);
    742 }
    743 
    744 #else /* !USE_KQUEUE && !USE_SIGNALFD */
    745 
    746 static int
    747 makeholyfd(void)
    748 {
    749 
    750 	return -1;
    751 }
    752 
    753 #endif
    754 
    755 static int
    756 doconnect(void)
    757 {
    758 	struct respwait rw;
    759 	struct rsp_hdr rhdr;
    760 	char banner[MAXBANNER];
    761 	int s, error, flags;
    762 	ssize_t n;
    763 
    764 	if (holyfd != -1)
    765 		host_close(holyfd);
    766 	holyfd = -1;
    767 	s = -1;
    768 
    769 	if (clispc.spc_fd != -1)
    770 		host_close(clispc.spc_fd);
    771 	clispc.spc_fd = -1;
    772 
    773 	/*
    774 	 * for reconnect, gate everyone out of the receiver code
    775 	 */
    776 	putwait_locked(&clispc, &rw, &rhdr);
    777 
    778 	pthread_mutex_lock(&clispc.spc_mtx);
    779 	clispc.spc_reconnecting = 1;
    780 	pthread_cond_broadcast(&clispc.spc_cv);
    781 	clispc.spc_generation++;
    782 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    783 		clispc.spc_istatus = SPCSTATUS_WANTED;
    784 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    785 	}
    786 	kickall(&clispc);
    787 
    788 	/*
    789 	 * we can release it already since we hold the
    790 	 * send lock during reconnect
    791 	 * XXX: assert it
    792 	 */
    793 	clispc.spc_istatus = SPCSTATUS_FREE;
    794 	pthread_mutex_unlock(&clispc.spc_mtx);
    795 	unputwait_locked(&clispc, &rw);
    796 
    797 	free(clispc.spc_buf);
    798 	clispc.spc_off = 0;
    799 
    800 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    801 	if (s == -1)
    802 		return -1;
    803 
    804 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
    805 		if (errno == EINTR)
    806 			continue;
    807 		ERRLOG(("rump_sp: client connect failed: %s\n",
    808 		    strerror(errno)));
    809 		return -1;
    810 	}
    811 
    812 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    813 		ERRLOG(("rump_sp: connect hook failed\n"));
    814 		return -1;
    815 	}
    816 
    817 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
    818 		ERRLOG(("rump_sp: failed to read banner\n"));
    819 		return -1;
    820 	}
    821 
    822 	if (banner[n-1] != '\n') {
    823 		ERRLOG(("rump_sp: invalid banner\n"));
    824 		return -1;
    825 	}
    826 	banner[n] = '\0';
    827 	/* XXX parse the banner some day */
    828 
    829 	flags = host_fcntl(s, F_GETFL, 0);
    830 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    831 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
    832 		return -1;
    833 	}
    834 	clispc.spc_fd = s;
    835 	clispc.spc_state = SPCSTATE_RUNNING;
    836 	clispc.spc_reconnecting = 0;
    837 	holyfd = makeholyfd();
    838 
    839 	return 0;
    840 }
    841 
    842 static int
    843 doinit(void)
    844 {
    845 
    846 	TAILQ_INIT(&clispc.spc_respwait);
    847 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    848 	pthread_cond_init(&clispc.spc_cv, NULL);
    849 
    850 	return 0;
    851 }
    852 
    853 #ifdef RTLD_NEXT
    854 void *rumpclient__dlsym(void *, const char *);
    855 void *
    856 rumpclient__dlsym(void *handle, const char *symbol)
    857 {
    858 
    859 	return dlsym(handle, symbol);
    860 }
    861 void *rumphijack_dlsym(void *, const char *)
    862     __attribute__((__weak__, alias("rumpclient__dlsym")));
    863 #endif
    864 
    865 static pid_t init_done = 0;
    866 
    867 int
    868 rumpclient_init(void)
    869 {
    870 	char *p;
    871 	int error;
    872 	int rv = -1;
    873 	int hstype;
    874 	pid_t mypid;
    875 
    876 	/*
    877 	 * Make sure we're not riding the context of a previous
    878 	 * host fork.  Note: it's *possible* that after n>1 forks
    879 	 * we have the same pid as one of our exited parents, but
    880 	 * I'm pretty sure there are 0 practical implications, since
    881 	 * it means generations would have to skip rumpclient init.
    882 	 */
    883 	if (init_done == (mypid = getpid()))
    884 		return 0;
    885 
    886 #ifdef USE_KQUEUE
    887 	/* kq does not traverse fork() */
    888 	holyfd = -1;
    889 #endif
    890 	init_done = mypid;
    891 
    892 	sigfillset(&fullset);
    893 
    894 	/*
    895 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
    896 	 * wann wird man je verstehen?  wann wird man je verstehen?
    897 	 */
    898 #ifdef RTLD_NEXT
    899 #define FINDSYM2(_name_,_syscall_)					\
    900 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    901 	    #_syscall_)) == NULL) {					\
    902 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    903 			host_##_name_ = _name_; /* static fallback */	\
    904 		if (host_##_name_ == NULL) {				\
    905 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
    906 			    dlerror());					\
    907 			exit(1);					\
    908 		}							\
    909 	}
    910 #else
    911 #define FINDSYM2(_name_,_syscall)					\
    912 	host_##_name_ = _name_;
    913 #endif
    914 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    915 #ifdef __NetBSD__
    916 	FINDSYM2(socket,__socket30)
    917 #else
    918 	FINDSYM(socket)
    919 #endif
    920 
    921 	FINDSYM(close)
    922 	FINDSYM(connect)
    923 	FINDSYM(fcntl)
    924 	FINDSYM(poll)
    925 	FINDSYM(read)
    926 	FINDSYM(sendmsg)
    927 	FINDSYM(setsockopt)
    928 	FINDSYM(dup)
    929 	FINDSYM(execve)
    930 
    931 #ifdef USE_KQUEUE
    932 	FINDSYM(kqueue)
    933 #ifdef __NetBSD__
    934 #if !__NetBSD_Prereq__(5,99,7)
    935 	FINDSYM(kevent)
    936 #else
    937 	FINDSYM2(kevent,_sys___kevent50)
    938 #endif
    939 #else
    940 	FINDSYM(kevent)
    941 #endif
    942 #endif /* USE_KQUEUE */
    943 
    944 #ifdef USE_SIGNALFD
    945 	FINDSYM(signalfd)
    946 #endif
    947 
    948 #undef	FINDSYM
    949 #undef	FINDSY2
    950 
    951 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    952 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    953 			fprintf(stderr, "error: RUMP_SERVER not set\n");
    954 			errno = ENOENT;
    955 			goto out;
    956 		}
    957 	}
    958 
    959 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    960 		errno = error;
    961 		goto out;
    962 	}
    963 
    964 	if (doinit() == -1)
    965 		goto out;
    966 
    967 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    968 		sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
    969 		unsetenv("RUMPCLIENT__EXECFD");
    970 		hstype = HANDSHAKE_EXEC;
    971 	} else {
    972 		if (doconnect() == -1)
    973 			goto out;
    974 		hstype = HANDSHAKE_GUEST;
    975 	}
    976 
    977 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    978 	if (error) {
    979 		pthread_mutex_destroy(&clispc.spc_mtx);
    980 		pthread_cond_destroy(&clispc.spc_cv);
    981 		if (clispc.spc_fd != -1)
    982 			host_close(clispc.spc_fd);
    983 		errno = error;
    984 		goto out;
    985 	}
    986 	rv = 0;
    987 
    988  out:
    989 	if (rv == -1)
    990 		init_done = 0;
    991 	return rv;
    992 }
    993 
    994 struct rumpclient_fork {
    995 	uint32_t fork_auth[AUTHLEN];
    996 	struct spclient fork_spc;
    997 	int fork_holyfd;
    998 };
    999 
   1000 struct rumpclient_fork *
   1001 rumpclient_prefork(void)
   1002 {
   1003 	struct rumpclient_fork *rpf;
   1004 	sigset_t omask;
   1005 	void *resp;
   1006 	int rv;
   1007 
   1008 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
   1009 	rpf = malloc(sizeof(*rpf));
   1010 	if (rpf == NULL)
   1011 		goto out;
   1012 
   1013 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
   1014 		free(rpf);
   1015 		errno = rv;
   1016 		rpf = NULL;
   1017 		goto out;
   1018 	}
   1019 
   1020 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
   1021 	free(resp);
   1022 
   1023 	rpf->fork_spc = clispc;
   1024 	rpf->fork_holyfd = holyfd;
   1025 
   1026  out:
   1027 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
   1028 	return rpf;
   1029 }
   1030 
   1031 int
   1032 rumpclient_fork_init(struct rumpclient_fork *rpf)
   1033 {
   1034 	int error;
   1035 	int osock;
   1036 
   1037 	osock = clispc.spc_fd;
   1038 	memset(&clispc, 0, sizeof(clispc));
   1039 	clispc.spc_fd = osock;
   1040 
   1041 #ifdef USE_KQUEUE
   1042 	holyfd = -1; /* kqueue descriptor is not copied over fork() */
   1043 #else
   1044 	if (holyfd != -1) {
   1045 		host_close(holyfd);
   1046 		holyfd = -1;
   1047 	}
   1048 #endif
   1049 
   1050 	if (doinit() == -1)
   1051 		return -1;
   1052 	if (doconnect() == -1)
   1053 		return -1;
   1054 
   1055 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
   1056 	    0, false);
   1057 	if (error) {
   1058 		pthread_mutex_destroy(&clispc.spc_mtx);
   1059 		pthread_cond_destroy(&clispc.spc_cv);
   1060 		errno = error;
   1061 		return -1;
   1062 	}
   1063 
   1064 	return 0;
   1065 }
   1066 
   1067 /*ARGSUSED*/
   1068 void
   1069 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
   1070 {
   1071 
   1072 	/* EUNIMPL */
   1073 }
   1074 
   1075 void
   1076 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
   1077 {
   1078 
   1079 	clispc = rpf->fork_spc;
   1080 	holyfd = rpf->fork_holyfd;
   1081 }
   1082 
   1083 void
   1084 rumpclient_setconnretry(time_t timeout)
   1085 {
   1086 
   1087 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
   1088 		return; /* gigo */
   1089 
   1090 	retrytimo = timeout;
   1091 }
   1092 
   1093 int
   1094 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
   1095 {
   1096 	int fd = *fdp;
   1097 	int untilfd, rv;
   1098 	int newfd;
   1099 
   1100 	switch (variant) {
   1101 	case RUMPCLIENT_CLOSE_FCLOSEM:
   1102 		untilfd = MAX(clispc.spc_fd, holyfd);
   1103 		for (; fd <= untilfd; fd++) {
   1104 			if (fd == clispc.spc_fd || fd == holyfd)
   1105 				continue;
   1106 			rv = host_close(fd);
   1107 			if (rv == -1)
   1108 				return -1;
   1109 		}
   1110 		*fdp = fd;
   1111 		break;
   1112 
   1113 	case RUMPCLIENT_CLOSE_CLOSE:
   1114 	case RUMPCLIENT_CLOSE_DUP2:
   1115 		if (fd == clispc.spc_fd) {
   1116 			newfd = dupgood(clispc.spc_fd, 1);
   1117 			if (newfd == -1)
   1118 				return -1;
   1119 
   1120 #ifdef USE_KQUEUE
   1121 			{
   1122 			struct kevent kev[2];
   1123 
   1124 			/*
   1125 			 * now, we have a new socket number, so change
   1126 			 * the file descriptor that kqueue is
   1127 			 * monitoring.  remove old and add new.
   1128 			 */
   1129 			EV_SET(&kev[0], clispc.spc_fd,
   1130 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
   1131 			EV_SET(&kev[1], newfd,
   1132 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
   1133 			if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
   1134 				int sverrno = errno;
   1135 				host_close(newfd);
   1136 				errno = sverrno;
   1137 				return -1;
   1138 			}}
   1139 #endif /* !USE_KQUEUE */
   1140 			clispc.spc_fd = newfd;
   1141 		}
   1142 		if (holyfd != -1 && fd == holyfd) {
   1143 			newfd = dupgood(holyfd, 1);
   1144 			if (newfd == -1)
   1145 				return -1;
   1146 			holyfd = newfd;
   1147 		}
   1148 		break;
   1149 	}
   1150 
   1151 	return 0;
   1152 }
   1153 
   1154 pid_t
   1155 rumpclient_fork(void)
   1156 {
   1157 
   1158 	return rumpclient__dofork(fork);
   1159 }
   1160 
   1161 /*
   1162  * Process is about to exec.  Save info about our existing connection
   1163  * in the env.  rumpclient will check for this info in init().
   1164  * This is mostly for the benefit of rumphijack, but regular applications
   1165  * may use it as well.
   1166  */
   1167 int
   1168 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1169 {
   1170 	char buf[4096];
   1171 	char **newenv;
   1172 	char *envstr, *envstr2;
   1173 	size_t nelem;
   1174 	int rv, sverrno;
   1175 
   1176 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1177 	    clispc.spc_fd, holyfd);
   1178 	envstr = malloc(strlen(buf)+1);
   1179 	if (envstr == NULL) {
   1180 		return ENOMEM;
   1181 	}
   1182 	strcpy(envstr, buf);
   1183 
   1184 	/* do we have a fully parsed url we want to forward in the env? */
   1185 	if (*parsedurl != '\0') {
   1186 		snprintf(buf, sizeof(buf),
   1187 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1188 		envstr2 = malloc(strlen(buf)+1);
   1189 		if (envstr2 == NULL) {
   1190 			free(envstr);
   1191 			return ENOMEM;
   1192 		}
   1193 		strcpy(envstr2, buf);
   1194 	} else {
   1195 		envstr2 = NULL;
   1196 	}
   1197 
   1198 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1199 		continue;
   1200 
   1201 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1202 	if (newenv == NULL) {
   1203 		free(envstr2);
   1204 		free(envstr);
   1205 		return ENOMEM;
   1206 	}
   1207 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1208 
   1209 	newenv[nelem] = envstr;
   1210 	newenv[nelem+1] = envstr2;
   1211 	newenv[nelem+2] = NULL;
   1212 
   1213 	rv = host_execve(path, argv, newenv);
   1214 
   1215 	_DIAGASSERT(rv != 0);
   1216 	sverrno = errno;
   1217 	free(envstr2);
   1218 	free(envstr);
   1219 	free(newenv);
   1220 	errno = sverrno;
   1221 	return rv;
   1222 }
   1223 
   1224 /*
   1225  * daemon() is handwritten for the benefit of platforms which
   1226  * do not support daemon().
   1227  */
   1228 int
   1229 rumpclient_daemon(int nochdir, int noclose)
   1230 {
   1231 	struct rumpclient_fork *rf;
   1232 	int sverrno;
   1233 
   1234 	if ((rf = rumpclient_prefork()) == NULL)
   1235 		return -1;
   1236 
   1237 	switch (fork()) {
   1238 	case 0:
   1239 		break;
   1240 	case -1:
   1241 		goto daemonerr;
   1242 	default:
   1243 		_exit(0);
   1244 	}
   1245 
   1246 	if (setsid() == -1)
   1247 		goto daemonerr;
   1248 	if (!nochdir && chdir("/") == -1)
   1249 		goto daemonerr;
   1250 	if (!noclose) {
   1251 		int fd = open("/dev/null", O_RDWR);
   1252 		dup2(fd, 0);
   1253 		dup2(fd, 1);
   1254 		dup2(fd, 2);
   1255 		if (fd > 2)
   1256 			close(fd);
   1257 	}
   1258 
   1259 	/* note: fork is either completed or cancelled by the call */
   1260 	if (rumpclient_fork_init(rf) == -1)
   1261 		return -1;
   1262 
   1263 	return 0;
   1264 
   1265  daemonerr:
   1266 	sverrno = errno;
   1267 	rumpclient_fork_cancel(rf);
   1268 	errno = sverrno;
   1269 	return -1;
   1270 }
   1271