Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.45.4.3
      1 /*      $NetBSD: rumpclient.c,v 1.45.4.3 2013/01/23 00:05:26 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include "rumpuser_port.h"
     33 
     34 /*
     35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
     36  * use kqueue on other BSD's too, but I haven't tested those.  We
     37  * want to use kqueue because it will give us the ability to get signal
     38  * notifications but defer their handling to a stage where we do not
     39  * hold the communication lock.  Taking a signal while holding on to
     40  * that lock may cause a deadlock.  Therefore, block signals throughout
     41  * the RPC when using poll.  This unfortunately means that the normal
     42  * SIGINT way of stopping a process while it is undergoing rump kernel
     43  * RPC will not work.  If anyone know which Linux system call handles
     44  * the above scenario correctly, I'm all ears.
     45  */
     46 
     47 #ifdef __NetBSD__
     48 #define USE_KQUEUE
     49 #endif
     50 
     51 __RCSID("$NetBSD: rumpclient.c,v 1.45.4.3 2013/01/23 00:05:26 yamt Exp $");
     52 
     53 #include <sys/param.h>
     54 #include <sys/mman.h>
     55 #include <sys/socket.h>
     56 #include <sys/time.h>
     57 
     58 #ifdef USE_KQUEUE
     59 #include <sys/event.h>
     60 #endif
     61 
     62 #include <arpa/inet.h>
     63 #include <netinet/in.h>
     64 #include <netinet/tcp.h>
     65 
     66 #include <assert.h>
     67 #include <dlfcn.h>
     68 #include <errno.h>
     69 #include <fcntl.h>
     70 #include <poll.h>
     71 #include <pthread.h>
     72 #include <signal.h>
     73 #include <stdarg.h>
     74 #include <stdbool.h>
     75 #include <stdio.h>
     76 #include <stdlib.h>
     77 #include <string.h>
     78 #include <unistd.h>
     79 
     80 #include <rump/rumpclient.h>
     81 
     82 #define HOSTOPS
     83 int	(*host_socket)(int, int, int);
     84 int	(*host_close)(int);
     85 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     86 int	(*host_fcntl)(int, int, ...);
     87 int	(*host_poll)(struct pollfd *, nfds_t, int);
     88 ssize_t	(*host_read)(int, void *, size_t);
     89 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     90 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     91 int	(*host_dup)(int);
     92 
     93 #ifdef USE_KQUEUE
     94 int	(*host_kqueue)(void);
     95 int	(*host_kevent)(int, const struct kevent *, size_t,
     96 		       struct kevent *, size_t, const struct timespec *);
     97 #endif
     98 
     99 int	(*host_execve)(const char *, char *const[], char *const[]);
    100 
    101 #include "sp_common.c"
    102 
    103 static struct spclient clispc = {
    104 	.spc_fd = -1,
    105 };
    106 
    107 static int kq = -1;
    108 static sigset_t fullset;
    109 
    110 static int doconnect(void);
    111 static int handshake_req(struct spclient *, int, void *, int, bool);
    112 
    113 /*
    114  * Default: don't retry.  Most clients can't handle it
    115  * (consider e.g. fds suddenly going missing).
    116  */
    117 static time_t retrytimo = 0;
    118 
    119 /* always defined to nothingness for now */
    120 #define ERRLOG(a)
    121 
    122 static int
    123 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
    124 {
    125 	struct timeval starttime, curtime;
    126 	time_t prevreconmsg;
    127 	unsigned reconretries;
    128 	int rv;
    129 
    130 	for (prevreconmsg = 0, reconretries = 0;;) {
    131 		rv = dosend(spc, iov, iovlen);
    132 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    133 			/* no persistent connections */
    134 			if (retrytimo == 0) {
    135 				rv = ENOTCONN;
    136 				break;
    137 			}
    138 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    139 				_exit(1);
    140 
    141 			if (!prevreconmsg) {
    142 				prevreconmsg = time(NULL);
    143 				gettimeofday(&starttime, NULL);
    144 			}
    145 			if (reconretries == 1) {
    146 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    147 					rv = ENOTCONN;
    148 					break;
    149 				}
    150 				fprintf(stderr, "rump_sp: connection to "
    151 				    "kernel lost, trying to reconnect ...\n");
    152 			} else if (time(NULL) - prevreconmsg > 120) {
    153 				fprintf(stderr, "rump_sp: still trying to "
    154 				    "reconnect ...\n");
    155 				prevreconmsg = time(NULL);
    156 			}
    157 
    158 			/* check that we aren't over the limit */
    159 			if (retrytimo > 0) {
    160 				time_t tdiff;
    161 
    162 				gettimeofday(&curtime, NULL);
    163 				tdiff = curtime.tv_sec - starttime.tv_sec;
    164 				if (starttime.tv_usec > curtime.tv_usec)
    165 					tdiff--;
    166 				if (tdiff >= retrytimo) {
    167 					fprintf(stderr, "rump_sp: reconnect "
    168 					    "failed, %lld second timeout\n",
    169 					    (long long)retrytimo);
    170 					return ENOTCONN;
    171 				}
    172 			}
    173 
    174 			/* adhoc backoff timer */
    175 			if (reconretries < 10) {
    176 				usleep(100000 * reconretries);
    177 			} else {
    178 				sleep(MIN(10, reconretries-9));
    179 			}
    180 			reconretries++;
    181 
    182 			if ((rv = doconnect()) != 0)
    183 				continue;
    184 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    185 			    NULL, 0, true)) != 0)
    186 				continue;
    187 
    188 			/*
    189 			 * ok, reconnect succesful.  we need to return to
    190 			 * the upper layer to get the entire PDU resent.
    191 			 */
    192 			if (reconretries != 1)
    193 				fprintf(stderr, "rump_sp: reconnected!\n");
    194 			rv = EAGAIN;
    195 			break;
    196 		} else {
    197 			_DIAGASSERT(errno != EAGAIN);
    198 			break;
    199 		}
    200 	}
    201 
    202 	return rv;
    203 }
    204 
    205 static int
    206 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    207 	bool keeplock)
    208 {
    209 	uint64_t mygen;
    210 	bool imalive = true;
    211 
    212 	pthread_mutex_lock(&spc->spc_mtx);
    213 	if (!keeplock)
    214 		sendunlockl(spc);
    215 	mygen = spc->spc_generation;
    216 
    217 	rw->rw_error = 0;
    218 	while (!rw->rw_done && rw->rw_error == 0) {
    219 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    220 			break;
    221 
    222 		/* are we free to receive? */
    223 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    224 			int gotresp, dosig, rv;
    225 
    226 			spc->spc_istatus = SPCSTATUS_BUSY;
    227 			pthread_mutex_unlock(&spc->spc_mtx);
    228 
    229 			dosig = 0;
    230 			for (gotresp = 0; !gotresp; ) {
    231 #ifdef USE_KQUEUE
    232 				struct kevent kev[8];
    233 				int i;
    234 
    235 				/*
    236 				 * typically we don't have a frame waiting
    237 				 * when we come in here, so call kevent now
    238 				 */
    239 				rv = host_kevent(kq, NULL, 0,
    240 				    kev, __arraycount(kev), NULL);
    241 
    242 				if (__predict_false(rv == -1)) {
    243 					goto activity;
    244 				}
    245 
    246 				/*
    247 				 * XXX: don't know how this can happen
    248 				 * (timeout cannot expire since there
    249 				 * isn't one), but it does happen.
    250 				 * treat it as an expectional condition
    251 				 * and go through tryread to determine
    252 				 * alive status.
    253 				 */
    254 				if (__predict_false(rv == 0))
    255 					goto activity;
    256 
    257 				for (i = 0; i < rv; i++) {
    258 					if (kev[i].filter == EVFILT_SIGNAL)
    259 						dosig++;
    260 				}
    261 				if (dosig)
    262 					goto cleanup;
    263 
    264 				/*
    265 				 * ok, activity.  try to read a frame to
    266 				 * determine what happens next.
    267 				 */
    268  activity:
    269 #else /* USE_KQUEUE */
    270 				struct pollfd pfd;
    271 
    272 				pfd.fd = clispc.spc_fd;
    273 				pfd.events = POLLIN;
    274 
    275 				rv = host_poll(&pfd, 1, -1);
    276 #endif /* !USE_KQUEUE */
    277 
    278 				switch (readframe(spc)) {
    279 				case 0:
    280 					continue;
    281 				case -1:
    282 					imalive = false;
    283 					goto cleanup;
    284 				default:
    285 					/* case 1 */
    286 					break;
    287 				}
    288 
    289 				switch (spc->spc_hdr.rsp_class) {
    290 				case RUMPSP_RESP:
    291 				case RUMPSP_ERROR:
    292 					kickwaiter(spc);
    293 					gotresp = spc->spc_hdr.rsp_reqno ==
    294 					    rw->rw_reqno;
    295 					break;
    296 				case RUMPSP_REQ:
    297 					handlereq(spc);
    298 					break;
    299 				default:
    300 					/* panic */
    301 					break;
    302 				}
    303 			}
    304 
    305  cleanup:
    306 			pthread_mutex_lock(&spc->spc_mtx);
    307 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    308 				kickall(spc);
    309 			spc->spc_istatus = SPCSTATUS_FREE;
    310 
    311 			/* take one for the team */
    312 			if (dosig) {
    313 				pthread_mutex_unlock(&spc->spc_mtx);
    314 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    315 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    316 				pthread_mutex_lock(&spc->spc_mtx);
    317 			}
    318 		} else {
    319 			spc->spc_istatus = SPCSTATUS_WANTED;
    320 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    321 		}
    322 	}
    323 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    324 	pthread_mutex_unlock(&spc->spc_mtx);
    325 	pthread_cond_destroy(&rw->rw_cv);
    326 
    327 	if (spc->spc_generation != mygen || !imalive) {
    328 		return ENOTCONN;
    329 	}
    330 	return rw->rw_error;
    331 }
    332 
    333 static int
    334 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    335 	const void *data, size_t dlen, void **resp)
    336 {
    337 	struct rsp_hdr rhdr;
    338 	struct respwait rw;
    339 	struct iovec iov[2];
    340 	int rv;
    341 
    342 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    343 	rhdr.rsp_class = RUMPSP_REQ;
    344 	rhdr.rsp_type = RUMPSP_SYSCALL;
    345 	rhdr.rsp_sysnum = sysnum;
    346 
    347 	IOVPUT(iov[0], rhdr);
    348 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    349 
    350 	do {
    351 		putwait(spc, &rw, &rhdr);
    352 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    353 			unputwait(spc, &rw);
    354 			continue;
    355 		}
    356 
    357 		rv = cliwaitresp(spc, &rw, omask, false);
    358 		if (rv == ENOTCONN)
    359 			rv = EAGAIN;
    360 	} while (rv == EAGAIN);
    361 
    362 	*resp = rw.rw_data;
    363 	return rv;
    364 }
    365 
    366 static int
    367 handshake_req(struct spclient *spc, int type, void *data,
    368 	int cancel, bool haslock)
    369 {
    370 	struct handshake_fork rf;
    371 	const char *myprogname = NULL; /* XXXgcc */
    372 	struct rsp_hdr rhdr;
    373 	struct respwait rw;
    374 	sigset_t omask;
    375 	size_t bonus;
    376 	struct iovec iov[2];
    377 	int rv;
    378 
    379 	if (type == HANDSHAKE_FORK) {
    380 		bonus = sizeof(rf);
    381 	} else {
    382 #ifdef __NetBSD__
    383 		/* would procfs work on NetBSD too? */
    384 		myprogname = getprogname();
    385 #else
    386 		int fd = open("/proc/self/comm", O_RDONLY);
    387 		if (fd == -1) {
    388 			myprogname = "???";
    389 		} else {
    390 			static char commname[128];
    391 
    392 			memset(commname, 0, sizeof(commname));
    393 			if (read(fd, commname, sizeof(commname)) > 0) {
    394 				char *n;
    395 
    396 				n = strrchr(commname, '\n');
    397 				if (n)
    398 					*n = '\0';
    399 				myprogname = commname;
    400 			} else {
    401 				myprogname = "???";
    402 			}
    403 			close(fd);
    404 		}
    405 #endif
    406 		bonus = strlen(myprogname)+1;
    407 	}
    408 
    409 	/* performs server handshake */
    410 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    411 	rhdr.rsp_class = RUMPSP_REQ;
    412 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    413 	rhdr.rsp_handshake = type;
    414 
    415 	IOVPUT(iov[0], rhdr);
    416 
    417 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    418 	if (haslock)
    419 		putwait_locked(spc, &rw, &rhdr);
    420 	else
    421 		putwait(spc, &rw, &rhdr);
    422 	if (type == HANDSHAKE_FORK) {
    423 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    424 		rf.rf_cancel = cancel;
    425 		IOVPUT(iov[1], rf);
    426 	} else {
    427 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
    428 	}
    429 	rv = send_with_recon(spc, iov, __arraycount(iov));
    430 	if (rv || cancel) {
    431 		if (haslock)
    432 			unputwait_locked(spc, &rw);
    433 		else
    434 			unputwait(spc, &rw);
    435 		if (cancel) {
    436 			goto out;
    437 		}
    438 	} else {
    439 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    440 	}
    441 	if (rv)
    442 		goto out;
    443 
    444 	rv = *(int *)rw.rw_data;
    445 	free(rw.rw_data);
    446 
    447  out:
    448 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    449 	return rv;
    450 }
    451 
    452 static int
    453 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    454 {
    455 	struct rsp_hdr rhdr;
    456 	struct respwait rw;
    457 	struct iovec iov[1];
    458 	int rv;
    459 
    460 	rhdr.rsp_len = sizeof(rhdr);
    461 	rhdr.rsp_class = RUMPSP_REQ;
    462 	rhdr.rsp_type = RUMPSP_PREFORK;
    463 	rhdr.rsp_error = 0;
    464 
    465 	IOVPUT(iov[0], rhdr);
    466 
    467 	do {
    468 		putwait(spc, &rw, &rhdr);
    469 		rv = send_with_recon(spc, iov, __arraycount(iov));
    470 		if (rv != 0) {
    471 			unputwait(spc, &rw);
    472 			continue;
    473 		}
    474 
    475 		rv = cliwaitresp(spc, &rw, omask, false);
    476 		if (rv == ENOTCONN)
    477 			rv = EAGAIN;
    478 	} while (rv == EAGAIN);
    479 
    480 	*resp = rw.rw_data;
    481 	return rv;
    482 }
    483 
    484 /*
    485  * prevent response code from deadlocking with reconnect code
    486  */
    487 static int
    488 resp_sendlock(struct spclient *spc)
    489 {
    490 	int rv = 0;
    491 
    492 	pthread_mutex_lock(&spc->spc_mtx);
    493 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    494 		if (__predict_false(spc->spc_reconnecting)) {
    495 			rv = EBUSY;
    496 			goto out;
    497 		}
    498 		spc->spc_ostatus = SPCSTATUS_WANTED;
    499 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    500 	}
    501 	spc->spc_ostatus = SPCSTATUS_BUSY;
    502 
    503  out:
    504 	pthread_mutex_unlock(&spc->spc_mtx);
    505 	return rv;
    506 }
    507 
    508 static void
    509 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    510 	int wantstr)
    511 {
    512 	struct rsp_hdr rhdr;
    513 	struct iovec iov[2];
    514 
    515 	if (wantstr)
    516 		dlen = MIN(dlen, strlen(data)+1);
    517 
    518 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    519 	rhdr.rsp_reqno = reqno;
    520 	rhdr.rsp_class = RUMPSP_RESP;
    521 	rhdr.rsp_type = RUMPSP_COPYIN;
    522 	rhdr.rsp_sysnum = 0;
    523 
    524 	IOVPUT(iov[0], rhdr);
    525 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    526 
    527 	if (resp_sendlock(spc) != 0)
    528 		return;
    529 	(void)SENDIOV(spc, iov);
    530 	sendunlock(spc);
    531 }
    532 
    533 static void
    534 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    535 {
    536 	struct rsp_hdr rhdr;
    537 	struct iovec iov[2];
    538 
    539 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    540 	rhdr.rsp_reqno = reqno;
    541 	rhdr.rsp_class = RUMPSP_RESP;
    542 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    543 	rhdr.rsp_sysnum = 0;
    544 
    545 	IOVPUT(iov[0], rhdr);
    546 	IOVPUT(iov[1], addr);
    547 
    548 	if (resp_sendlock(spc) != 0)
    549 		return;
    550 	(void)SENDIOV(spc, iov);
    551 	sendunlock(spc);
    552 }
    553 
    554 int
    555 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    556 	register_t *retval)
    557 {
    558 	struct rsp_sysresp *resp;
    559 	sigset_t omask;
    560 	void *rdata;
    561 	int rv;
    562 
    563 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    564 
    565 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    566 	    sysnum, data, dlen));
    567 
    568 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    569 	if (rv)
    570 		goto out;
    571 
    572 	resp = rdata;
    573 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    574 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    575 
    576 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    577 	rv = resp->rsys_error;
    578 	free(rdata);
    579 
    580  out:
    581 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    582 	return rv;
    583 }
    584 
    585 static void
    586 handlereq(struct spclient *spc)
    587 {
    588 	struct rsp_copydata *copydata;
    589 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    590 	void *mapaddr;
    591 	size_t maplen;
    592 	int reqtype = spc->spc_hdr.rsp_type;
    593 
    594 	switch (reqtype) {
    595 	case RUMPSP_COPYIN:
    596 	case RUMPSP_COPYINSTR:
    597 		/*LINTED*/
    598 		copydata = (struct rsp_copydata *)spc->spc_buf;
    599 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    600 		    copydata->rcp_addr, copydata->rcp_len));
    601 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    602 		    copydata->rcp_addr, copydata->rcp_len,
    603 		    reqtype == RUMPSP_COPYINSTR);
    604 		break;
    605 	case RUMPSP_COPYOUT:
    606 	case RUMPSP_COPYOUTSTR:
    607 		/*LINTED*/
    608 		copydata = (struct rsp_copydata *)spc->spc_buf;
    609 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    610 		    copydata->rcp_addr, copydata->rcp_len));
    611 		/*LINTED*/
    612 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    613 		    copydata->rcp_len);
    614 		break;
    615 	case RUMPSP_ANONMMAP:
    616 		/*LINTED*/
    617 		maplen = *(size_t *)spc->spc_buf;
    618 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    619 		    MAP_ANON, -1, 0);
    620 		if (mapaddr == MAP_FAILED)
    621 			mapaddr = NULL;
    622 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    623 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    624 		break;
    625 	case RUMPSP_RAISE:
    626 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    627 		raise((int)rhdr->rsp_signo);
    628 		/*
    629 		 * We most likely have signals blocked, but the signal
    630 		 * will be handled soon enough when we return.
    631 		 */
    632 		break;
    633 	default:
    634 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    635 		abort();
    636 		break;
    637 	}
    638 
    639 	spcfreebuf(spc);
    640 }
    641 
    642 static unsigned ptab_idx;
    643 static struct sockaddr *serv_sa;
    644 
    645 /* dup until we get a "good" fd which does not collide with stdio */
    646 static int
    647 dupgood(int myfd, int mustchange)
    648 {
    649 	int ofds[4];
    650 	int sverrno;
    651 	unsigned int i;
    652 
    653 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    654 		assert(i < __arraycount(ofds));
    655 		ofds[i] = myfd;
    656 		myfd = host_dup(myfd);
    657 		if (mustchange) {
    658 			i--; /* prevent closing old fd */
    659 			mustchange = 0;
    660 		}
    661 	}
    662 
    663 	sverrno = 0;
    664 	if (myfd == -1 && i > 0)
    665 		sverrno = errno;
    666 
    667 	while (i-- > 0) {
    668 		host_close(ofds[i]);
    669 	}
    670 
    671 	if (sverrno)
    672 		errno = sverrno;
    673 
    674 	return myfd;
    675 }
    676 
    677 static int
    678 doconnect(void)
    679 {
    680 	struct respwait rw;
    681 	struct rsp_hdr rhdr;
    682 	char banner[MAXBANNER];
    683 	int s, error, flags;
    684 	ssize_t n;
    685 
    686 	if (kq != -1)
    687 		host_close(kq);
    688 	kq = -1;
    689 	s = -1;
    690 
    691 	if (clispc.spc_fd != -1)
    692 		host_close(clispc.spc_fd);
    693 	clispc.spc_fd = -1;
    694 
    695 	/*
    696 	 * for reconnect, gate everyone out of the receiver code
    697 	 */
    698 	putwait_locked(&clispc, &rw, &rhdr);
    699 
    700 	pthread_mutex_lock(&clispc.spc_mtx);
    701 	clispc.spc_reconnecting = 1;
    702 	pthread_cond_broadcast(&clispc.spc_cv);
    703 	clispc.spc_generation++;
    704 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    705 		clispc.spc_istatus = SPCSTATUS_WANTED;
    706 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    707 	}
    708 	kickall(&clispc);
    709 
    710 	/*
    711 	 * we can release it already since we hold the
    712 	 * send lock during reconnect
    713 	 * XXX: assert it
    714 	 */
    715 	clispc.spc_istatus = SPCSTATUS_FREE;
    716 	pthread_mutex_unlock(&clispc.spc_mtx);
    717 	unputwait_locked(&clispc, &rw);
    718 
    719 	free(clispc.spc_buf);
    720 	clispc.spc_off = 0;
    721 
    722 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    723 	if (s == -1)
    724 		return -1;
    725 
    726 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
    727 		if (errno == EINTR)
    728 			continue;
    729 		ERRLOG(("rump_sp: client connect failed: %s\n",
    730 		    strerror(errno)));
    731 		return -1;
    732 	}
    733 
    734 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    735 		ERRLOG(("rump_sp: connect hook failed\n"));
    736 		return -1;
    737 	}
    738 
    739 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
    740 		ERRLOG(("rump_sp: failed to read banner\n"));
    741 		return -1;
    742 	}
    743 
    744 	if (banner[n-1] != '\n') {
    745 		ERRLOG(("rump_sp: invalid banner\n"));
    746 		return -1;
    747 	}
    748 	banner[n] = '\0';
    749 	/* XXX parse the banner some day */
    750 
    751 	flags = host_fcntl(s, F_GETFL, 0);
    752 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    753 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
    754 		return -1;
    755 	}
    756 	clispc.spc_fd = s;
    757 	clispc.spc_state = SPCSTATE_RUNNING;
    758 	clispc.spc_reconnecting = 0;
    759 
    760 #ifdef USE_KQUEUE
    761 {
    762 	struct kevent kev[NSIG+1];
    763 	int i;
    764 
    765 	/* setup kqueue, we want all signals and the fd */
    766 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    767 		ERRLOG(("rump_sp: cannot setup kqueue"));
    768 		return -1;
    769 	}
    770 
    771 	for (i = 0; i < NSIG; i++) {
    772 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    773 	}
    774 	EV_SET(&kev[NSIG], clispc.spc_fd,
    775 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    776 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    777 		ERRLOG(("rump_sp: kevent() failed"));
    778 		return -1;
    779 	}
    780 }
    781 #endif /* USE_KQUEUE */
    782 
    783 	return 0;
    784 }
    785 
    786 static int
    787 doinit(void)
    788 {
    789 
    790 	TAILQ_INIT(&clispc.spc_respwait);
    791 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    792 	pthread_cond_init(&clispc.spc_cv, NULL);
    793 
    794 	return 0;
    795 }
    796 
    797 #ifdef RTLD_NEXT
    798 void *rumpclient__dlsym(void *, const char *);
    799 void *
    800 rumpclient__dlsym(void *handle, const char *symbol)
    801 {
    802 
    803 	return dlsym(handle, symbol);
    804 }
    805 void *rumphijack_dlsym(void *, const char *)
    806     __attribute__((__weak__, alias("rumpclient__dlsym")));
    807 #endif
    808 
    809 static pid_t init_done = 0;
    810 
    811 int
    812 rumpclient_init(void)
    813 {
    814 	char *p;
    815 	int error;
    816 	int rv = -1;
    817 	int hstype;
    818 	pid_t mypid;
    819 
    820 	/*
    821 	 * Make sure we're not riding the context of a previous
    822 	 * host fork.  Note: it's *possible* that after n>1 forks
    823 	 * we have the same pid as one of our exited parents, but
    824 	 * I'm pretty sure there are 0 practical implications, since
    825 	 * it means generations would have to skip rumpclient init.
    826 	 */
    827 	if (init_done == (mypid = getpid()))
    828 		return 0;
    829 
    830 	/* kq does not traverse fork() */
    831 	if (init_done != 0)
    832 		kq = -1;
    833 	init_done = mypid;
    834 
    835 	sigfillset(&fullset);
    836 
    837 	/*
    838 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
    839 	 * wann wird man je verstehen?  wann wird man je verstehen?
    840 	 */
    841 #ifdef RTLD_NEXT
    842 #define FINDSYM2(_name_,_syscall_)					\
    843 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    844 	    #_syscall_)) == NULL) {					\
    845 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    846 			host_##_name_ = _name_; /* static fallback */	\
    847 		if (host_##_name_ == NULL) {				\
    848 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
    849 			    dlerror());					\
    850 			exit(1);					\
    851 		}							\
    852 	}
    853 #else
    854 #define FINDSYM2(_name_,_syscall)					\
    855 	host_##_name_ = _name_;
    856 #endif
    857 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    858 #ifdef __NetBSD__
    859 	FINDSYM2(socket,__socket30)
    860 #else
    861 	FINDSYM(socket)
    862 #endif
    863 
    864 	FINDSYM(close)
    865 	FINDSYM(connect)
    866 	FINDSYM(fcntl)
    867 	FINDSYM(poll)
    868 	FINDSYM(read)
    869 	FINDSYM(sendmsg)
    870 	FINDSYM(setsockopt)
    871 	FINDSYM(dup)
    872 	FINDSYM(execve)
    873 
    874 #ifdef USE_KQUEUE
    875 	FINDSYM(kqueue)
    876 #if !__NetBSD_Prereq__(5,99,7)
    877 	FINDSYM(kevent)
    878 #else
    879 	FINDSYM2(kevent,_sys___kevent50)
    880 #endif
    881 #endif /* USE_KQUEUE */
    882 
    883 #undef	FINDSYM
    884 #undef	FINDSY2
    885 
    886 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    887 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    888 			fprintf(stderr, "error: RUMP_SERVER not set\n");
    889 			errno = ENOENT;
    890 			goto out;
    891 		}
    892 	}
    893 
    894 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    895 		errno = error;
    896 		goto out;
    897 	}
    898 
    899 	if (doinit() == -1)
    900 		goto out;
    901 
    902 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    903 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    904 		unsetenv("RUMPCLIENT__EXECFD");
    905 		hstype = HANDSHAKE_EXEC;
    906 	} else {
    907 		if (doconnect() == -1)
    908 			goto out;
    909 		hstype = HANDSHAKE_GUEST;
    910 	}
    911 
    912 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    913 	if (error) {
    914 		pthread_mutex_destroy(&clispc.spc_mtx);
    915 		pthread_cond_destroy(&clispc.spc_cv);
    916 		if (clispc.spc_fd != -1)
    917 			host_close(clispc.spc_fd);
    918 		errno = error;
    919 		goto out;
    920 	}
    921 	rv = 0;
    922 
    923  out:
    924 	if (rv == -1)
    925 		init_done = 0;
    926 	return rv;
    927 }
    928 
    929 struct rumpclient_fork {
    930 	uint32_t fork_auth[AUTHLEN];
    931 	struct spclient fork_spc;
    932 	int fork_kq;
    933 };
    934 
    935 struct rumpclient_fork *
    936 rumpclient_prefork(void)
    937 {
    938 	struct rumpclient_fork *rpf;
    939 	sigset_t omask;
    940 	void *resp;
    941 	int rv;
    942 
    943 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    944 	rpf = malloc(sizeof(*rpf));
    945 	if (rpf == NULL)
    946 		goto out;
    947 
    948 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    949 		free(rpf);
    950 		errno = rv;
    951 		rpf = NULL;
    952 		goto out;
    953 	}
    954 
    955 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    956 	free(resp);
    957 
    958 	rpf->fork_spc = clispc;
    959 	rpf->fork_kq = kq;
    960 
    961  out:
    962 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    963 	return rpf;
    964 }
    965 
    966 int
    967 rumpclient_fork_init(struct rumpclient_fork *rpf)
    968 {
    969 	int error;
    970 	int osock;
    971 
    972 	osock = clispc.spc_fd;
    973 	memset(&clispc, 0, sizeof(clispc));
    974 	clispc.spc_fd = osock;
    975 
    976 	kq = -1; /* kqueue descriptor is not copied over fork() */
    977 
    978 	if (doinit() == -1)
    979 		return -1;
    980 	if (doconnect() == -1)
    981 		return -1;
    982 
    983 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
    984 	    0, false);
    985 	if (error) {
    986 		pthread_mutex_destroy(&clispc.spc_mtx);
    987 		pthread_cond_destroy(&clispc.spc_cv);
    988 		errno = error;
    989 		return -1;
    990 	}
    991 
    992 	return 0;
    993 }
    994 
    995 /*ARGSUSED*/
    996 void
    997 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
    998 {
    999 
   1000 	/* EUNIMPL */
   1001 }
   1002 
   1003 void
   1004 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
   1005 {
   1006 
   1007 	clispc = rpf->fork_spc;
   1008 	kq = rpf->fork_kq;
   1009 }
   1010 
   1011 void
   1012 rumpclient_setconnretry(time_t timeout)
   1013 {
   1014 
   1015 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
   1016 		return; /* gigo */
   1017 
   1018 	retrytimo = timeout;
   1019 }
   1020 
   1021 int
   1022 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
   1023 {
   1024 	int fd = *fdp;
   1025 	int untilfd, rv;
   1026 	int newfd;
   1027 
   1028 	switch (variant) {
   1029 	case RUMPCLIENT_CLOSE_FCLOSEM:
   1030 		untilfd = MAX(clispc.spc_fd, kq);
   1031 		for (; fd <= untilfd; fd++) {
   1032 			if (fd == clispc.spc_fd || fd == kq)
   1033 				continue;
   1034 			rv = host_close(fd);
   1035 			if (rv == -1)
   1036 				return -1;
   1037 		}
   1038 		*fdp = fd;
   1039 		break;
   1040 
   1041 	case RUMPCLIENT_CLOSE_CLOSE:
   1042 	case RUMPCLIENT_CLOSE_DUP2:
   1043 		if (fd == clispc.spc_fd) {
   1044 			newfd = dupgood(clispc.spc_fd, 1);
   1045 			if (newfd == -1)
   1046 				return -1;
   1047 
   1048 #ifdef USE_KQUEUE
   1049 			{
   1050 			struct kevent kev[2];
   1051 
   1052 			/*
   1053 			 * now, we have a new socket number, so change
   1054 			 * the file descriptor that kqueue is
   1055 			 * monitoring.  remove old and add new.
   1056 			 */
   1057 			EV_SET(&kev[0], clispc.spc_fd,
   1058 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
   1059 			EV_SET(&kev[1], newfd,
   1060 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
   1061 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
   1062 				int sverrno = errno;
   1063 				host_close(newfd);
   1064 				errno = sverrno;
   1065 				return -1;
   1066 			}
   1067 			clispc.spc_fd = newfd;
   1068 			}
   1069 		}
   1070 		if (fd == kq) {
   1071 			newfd = dupgood(kq, 1);
   1072 			if (newfd == -1)
   1073 				return -1;
   1074 			kq = newfd;
   1075 #else /* USE_KQUEUE */
   1076 			clispc.spc_fd = newfd;
   1077 #endif /* !USE_KQUEUE */
   1078 		}
   1079 		break;
   1080 	}
   1081 
   1082 	return 0;
   1083 }
   1084 
   1085 pid_t
   1086 rumpclient_fork(void)
   1087 {
   1088 
   1089 	return rumpclient__dofork(fork);
   1090 }
   1091 
   1092 /*
   1093  * Process is about to exec.  Save info about our existing connection
   1094  * in the env.  rumpclient will check for this info in init().
   1095  * This is mostly for the benefit of rumphijack, but regular applications
   1096  * may use it as well.
   1097  */
   1098 int
   1099 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1100 {
   1101 	char buf[4096];
   1102 	char **newenv;
   1103 	char *envstr, *envstr2;
   1104 	size_t nelem;
   1105 	int rv, sverrno;
   1106 
   1107 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1108 	    clispc.spc_fd, kq);
   1109 	envstr = malloc(strlen(buf)+1);
   1110 	if (envstr == NULL) {
   1111 		return ENOMEM;
   1112 	}
   1113 	strcpy(envstr, buf);
   1114 
   1115 	/* do we have a fully parsed url we want to forward in the env? */
   1116 	if (*parsedurl != '\0') {
   1117 		snprintf(buf, sizeof(buf),
   1118 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1119 		envstr2 = malloc(strlen(buf)+1);
   1120 		if (envstr2 == NULL) {
   1121 			free(envstr);
   1122 			return ENOMEM;
   1123 		}
   1124 		strcpy(envstr2, buf);
   1125 	} else {
   1126 		envstr2 = NULL;
   1127 	}
   1128 
   1129 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1130 		continue;
   1131 
   1132 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1133 	if (newenv == NULL) {
   1134 		free(envstr2);
   1135 		free(envstr);
   1136 		return ENOMEM;
   1137 	}
   1138 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1139 
   1140 	newenv[nelem] = envstr;
   1141 	newenv[nelem+1] = envstr2;
   1142 	newenv[nelem+2] = NULL;
   1143 
   1144 	rv = host_execve(path, argv, newenv);
   1145 
   1146 	_DIAGASSERT(rv != 0);
   1147 	sverrno = errno;
   1148 	free(envstr2);
   1149 	free(envstr);
   1150 	free(newenv);
   1151 	errno = sverrno;
   1152 	return rv;
   1153 }
   1154 
   1155 /*
   1156  * daemon() is handwritten for the benefit of platforms which
   1157  * do not support daemon().
   1158  */
   1159 int
   1160 rumpclient_daemon(int nochdir, int noclose)
   1161 {
   1162 	struct rumpclient_fork *rf;
   1163 	int sverrno;
   1164 
   1165 	if ((rf = rumpclient_prefork()) == NULL)
   1166 		return -1;
   1167 
   1168 	switch (fork()) {
   1169 	case 0:
   1170 		break;
   1171 	case -1:
   1172 		goto daemonerr;
   1173 	default:
   1174 		_exit(0);
   1175 	}
   1176 
   1177 	if (setsid() == -1)
   1178 		goto daemonerr;
   1179 	if (!nochdir && chdir("/") == -1)
   1180 		goto daemonerr;
   1181 	if (!noclose) {
   1182 		int fd = open("/dev/null", O_RDWR);
   1183 		dup2(fd, 0);
   1184 		dup2(fd, 1);
   1185 		dup2(fd, 2);
   1186 		if (fd > 2)
   1187 			close(fd);
   1188 	}
   1189 
   1190 	/* note: fork is either completed or cancelled by the call */
   1191 	if (rumpclient_fork_init(rf) == -1)
   1192 		return -1;
   1193 
   1194 	return 0;
   1195 
   1196  daemonerr:
   1197 	sverrno = errno;
   1198 	rumpclient_fork_cancel(rf);
   1199 	errno = sverrno;
   1200 	return -1;
   1201 }
   1202