Home | History | Annotate | Line # | Download | only in librumpclient
      1 /*      $NetBSD: rumpclient.c,v 1.71 2023/07/31 04:37:04 rin Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <rump/rumpuser_port.h>
     33 
     34 /*
     35  * We use kqueue on the BSDs, poll elsewhere.  We
     36  * want to use kqueue because it will give us the ability to get signal
     37  * notifications but defer their handling to a stage where we do not
     38  * hold the communication lock.  Taking a signal while holding on to
     39  * that lock may cause a deadlock.  Therefore, block signals throughout
     40  * the RPC when using poll.  On Linux, we use signalfd in the same role
     41  * as kqueue on NetBSD to be able to take signals while waiting for a
     42  * response from the server.
     43  */
     44 
     45 #if defined(__NetBSD__) || defined(__FreeBSD__) || \
     46     defined(__DragonFly__) || defined(__OpenBSD__)
     47 #define USE_KQUEUE
     48 #endif
     49 #if defined(__linux__)
     50 #define USE_SIGNALFD
     51 #endif
     52 
     53 __RCSID("$NetBSD: rumpclient.c,v 1.71 2023/07/31 04:37:04 rin Exp $");
     54 
     55 #include <sys/param.h>
     56 #include <sys/mman.h>
     57 #include <sys/socket.h>
     58 #include <sys/time.h>
     59 
     60 #ifdef USE_KQUEUE
     61 #include <sys/event.h>
     62 #endif
     63 
     64 #include <arpa/inet.h>
     65 #include <netinet/in.h>
     66 #include <netinet/tcp.h>
     67 
     68 #include <assert.h>
     69 #include <dlfcn.h>
     70 #include <errno.h>
     71 #include <fcntl.h>
     72 #include <poll.h>
     73 #include <pthread.h>
     74 #include <signal.h>
     75 #include <stdarg.h>
     76 #include <stdbool.h>
     77 #include <stdio.h>
     78 #include <stdlib.h>
     79 #include <string.h>
     80 #include <unistd.h>
     81 
     82 #include <rump/rumpclient.h>
     83 
     84 #define HOSTOPS
     85 int	(*host_socket)(int, int, int);
     86 int	(*host_close)(int);
     87 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     88 int	(*host_fcntl)(int, int, ...);
     89 int	(*host_poll)(struct pollfd *, nfds_t, int);
     90 ssize_t	(*host_read)(int, void *, size_t);
     91 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
     92 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     93 int	(*host_dup)(int);
     94 
     95 #ifdef USE_KQUEUE
     96 int	(*host_kqueue)(void);
     97 #ifdef __NetBSD__
     98 int	(*host_kevent)(int, const struct kevent *, size_t,
     99 		       struct kevent *, size_t, const struct timespec *);
    100 #else
    101 int	(*host_kevent)(int, const struct kevent *, int,
    102 		       struct kevent *, int, const struct timespec *);
    103 #endif
    104 #endif
    105 
    106 #ifdef USE_SIGNALFD
    107 #include <sys/signalfd.h>
    108 
    109 int	(*host_signalfd)(int, const sigset_t *, int);
    110 #endif
    111 
    112 int	(*host_execve)(const char *, char *const[], char *const[]);
    113 
    114 #include "sp_common.c"
    115 #include "rumpuser_sigtrans.c"
    116 
    117 static struct spclient clispc = {
    118 	.spc_fd = -1,
    119 };
    120 
    121 static int holyfd = -1;
    122 static sigset_t fullset;
    123 
    124 static int doconnect(void);
    125 static int handshake_req(struct spclient *, int, void *, int, bool);
    126 
    127 /*
    128  * Default: don't retry.  Most clients can't handle it
    129  * (consider e.g. fds suddenly going missing).
    130  */
    131 static time_t retrytimo = 0;
    132 
    133 /* always defined to nothingness for now */
    134 #define ERRLOG(a)
    135 
    136 static int
    137 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
    138 {
    139 	struct timeval starttime, curtime;
    140 	time_t prevreconmsg;
    141 	unsigned reconretries;
    142 	int rv;
    143 
    144 	for (prevreconmsg = 0, reconretries = 0;;) {
    145 		rv = dosend(spc, iov, iovlen);
    146 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    147 			/* no persistent connections */
    148 			if (retrytimo == 0) {
    149 				rv = ENOTCONN;
    150 				break;
    151 			}
    152 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    153 				_exit(1);
    154 
    155 			if (!prevreconmsg) {
    156 				prevreconmsg = time(NULL);
    157 				gettimeofday(&starttime, NULL);
    158 			}
    159 			if (reconretries == 1) {
    160 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    161 					rv = ENOTCONN;
    162 					break;
    163 				}
    164 				fprintf(stderr, "rump_sp: connection to "
    165 				    "kernel lost, trying to reconnect ...\n");
    166 			} else if (time(NULL) - prevreconmsg > 120) {
    167 				fprintf(stderr, "rump_sp: still trying to "
    168 				    "reconnect ...\n");
    169 				prevreconmsg = time(NULL);
    170 			}
    171 
    172 			/* check that we aren't over the limit */
    173 			if (retrytimo > 0) {
    174 				time_t tdiff;
    175 
    176 				gettimeofday(&curtime, NULL);
    177 				tdiff = curtime.tv_sec - starttime.tv_sec;
    178 				if (starttime.tv_usec > curtime.tv_usec)
    179 					tdiff--;
    180 				if (tdiff >= retrytimo) {
    181 					fprintf(stderr, "rump_sp: reconnect "
    182 					    "failed, %lld second timeout\n",
    183 					    (long long)retrytimo);
    184 					return ENOTCONN;
    185 				}
    186 			}
    187 
    188 			/* adhoc backoff timer */
    189 			if (reconretries < 10) {
    190 				usleep(100000 * reconretries);
    191 			} else {
    192 				sleep(MIN(10, reconretries-9));
    193 			}
    194 			reconretries++;
    195 
    196 			if ((rv = doconnect()) != 0)
    197 				continue;
    198 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
    199 			    NULL, 0, true)) != 0)
    200 				continue;
    201 
    202 			/*
    203 			 * ok, reconnect successful.  we need to return to
    204 			 * the upper layer to get the entire PDU resent.
    205 			 */
    206 			if (reconretries != 1)
    207 				fprintf(stderr, "rump_sp: reconnected!\n");
    208 			rv = EAGAIN;
    209 			break;
    210 		} else {
    211 			_DIAGASSERT(errno != EAGAIN);
    212 			break;
    213 		}
    214 	}
    215 
    216 	return rv;
    217 }
    218 
    219 static int
    220 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    221 	bool keeplock)
    222 {
    223 	uint64_t mygen;
    224 	bool imalive = true;
    225 
    226 	pthread_mutex_lock(&spc->spc_mtx);
    227 	if (!keeplock)
    228 		sendunlockl(spc);
    229 	mygen = spc->spc_generation;
    230 
    231 	rw->rw_error = 0;
    232 	while (!rw->rw_done && rw->rw_error == 0) {
    233 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    234 			break;
    235 
    236 		/* are we free to receive? */
    237 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    238 			int gotresp, dosig, rv;
    239 
    240 			spc->spc_istatus = SPCSTATUS_BUSY;
    241 			pthread_mutex_unlock(&spc->spc_mtx);
    242 
    243 			dosig = 0;
    244 			for (gotresp = 0; !gotresp; ) {
    245 #ifdef USE_KQUEUE
    246 				struct kevent kev[8];
    247 				int i;
    248 
    249 				/*
    250 				 * typically we don't have a frame waiting
    251 				 * when we come in here, so call kevent now
    252 				 */
    253 				rv = host_kevent(holyfd, NULL, 0,
    254 				    kev, __arraycount(kev), NULL);
    255 
    256 				if (__predict_false(rv == -1)) {
    257 					goto activity;
    258 				}
    259 
    260 				/*
    261 				 * XXX: don't know how this can happen
    262 				 * (timeout cannot expire since there
    263 				 * isn't one), but it does happen.
    264 				 * treat it as an expectional condition
    265 				 * and go through tryread to determine
    266 				 * alive status.
    267 				 */
    268 				if (__predict_false(rv == 0))
    269 					goto activity;
    270 
    271 				for (i = 0; i < rv; i++) {
    272 					if (kev[i].filter == EVFILT_SIGNAL)
    273 						dosig++;
    274 				}
    275 				if (dosig)
    276 					goto cleanup;
    277 
    278 				/*
    279 				 * ok, activity.  try to read a frame to
    280 				 * determine what happens next.
    281 				 */
    282  activity:
    283 #else /* !USE_KQUEUE */
    284 				struct pollfd pfd[2];
    285 
    286 				pfd[0].fd = clispc.spc_fd;
    287 				pfd[0].events = POLLIN;
    288 				pfd[1].fd = holyfd;
    289 				pfd[1].events = POLLIN;
    290 
    291 				rv = host_poll(pfd, 2, -1);
    292 				if (rv >= 1 && pfd[1].revents & POLLIN) {
    293 					dosig = 1;
    294 					goto cleanup;
    295 				}
    296 #endif /* !USE_KQUEUE */
    297 
    298 				switch (readframe(spc)) {
    299 				case 0:
    300 					continue;
    301 				case -1:
    302 					imalive = false;
    303 					goto cleanup;
    304 				default:
    305 					/* case 1 */
    306 					break;
    307 				}
    308 
    309 				switch (spc->spc_hdr.rsp_class) {
    310 				case RUMPSP_RESP:
    311 				case RUMPSP_ERROR:
    312 					kickwaiter(spc);
    313 					gotresp = spc->spc_hdr.rsp_reqno ==
    314 					    rw->rw_reqno;
    315 					break;
    316 				case RUMPSP_REQ:
    317 					handlereq(spc);
    318 					break;
    319 				default:
    320 					/* panic */
    321 					break;
    322 				}
    323 			}
    324 
    325  cleanup:
    326 			pthread_mutex_lock(&spc->spc_mtx);
    327 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    328 				kickall(spc);
    329 			spc->spc_istatus = SPCSTATUS_FREE;
    330 
    331 			/* take one for the team */
    332 			if (dosig) {
    333 				pthread_mutex_unlock(&spc->spc_mtx);
    334 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    335 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    336 				pthread_mutex_lock(&spc->spc_mtx);
    337 			}
    338 		} else {
    339 			spc->spc_istatus = SPCSTATUS_WANTED;
    340 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    341 		}
    342 	}
    343 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    344 	pthread_mutex_unlock(&spc->spc_mtx);
    345 	pthread_cond_destroy(&rw->rw_cv);
    346 
    347 	if (spc->spc_generation != mygen || !imalive) {
    348 		return ENOTCONN;
    349 	}
    350 	return rw->rw_error;
    351 }
    352 
    353 static int
    354 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    355 	const void *data, size_t dlen, void **resp)
    356 {
    357 	struct rsp_hdr rhdr;
    358 	struct respwait rw;
    359 	struct iovec iov[2];
    360 	int rv;
    361 
    362 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    363 	rhdr.rsp_class = RUMPSP_REQ;
    364 	rhdr.rsp_type = RUMPSP_SYSCALL;
    365 	rhdr.rsp_sysnum = sysnum;
    366 
    367 	IOVPUT(iov[0], rhdr);
    368 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
    369 
    370 	do {
    371 		putwait(spc, &rw, &rhdr);
    372 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
    373 			unputwait(spc, &rw);
    374 			continue;
    375 		}
    376 
    377 		rv = cliwaitresp(spc, &rw, omask, false);
    378 		if (rv == ENOTCONN)
    379 			rv = EAGAIN;
    380 	} while (rv == EAGAIN);
    381 
    382 	*resp = rw.rw_data;
    383 	return rv;
    384 }
    385 
    386 static int
    387 handshake_req(struct spclient *spc, int type, void *data,
    388 	int cancel, bool haslock)
    389 {
    390 	struct handshake_fork rf;
    391 	const char *myprogname = NULL; /* XXXgcc */
    392 	struct rsp_hdr rhdr;
    393 	struct respwait rw;
    394 	sigset_t omask;
    395 	size_t bonus;
    396 	struct iovec iov[2];
    397 	int rv;
    398 
    399 	if (type == HANDSHAKE_FORK) {
    400 		bonus = sizeof(rf);
    401 	} else {
    402 #ifdef __NetBSD__
    403 		/* would procfs work on NetBSD too? */
    404 		myprogname = getprogname();
    405 #else
    406 		int fd = open("/proc/self/comm", O_RDONLY);
    407 		if (fd == -1) {
    408 			myprogname = "???";
    409 		} else {
    410 			static char commname[128];
    411 
    412 			memset(commname, 0, sizeof(commname));
    413 			if (read(fd, commname, sizeof(commname)) > 0) {
    414 				char *n;
    415 
    416 				n = strrchr(commname, '\n');
    417 				if (n)
    418 					*n = '\0';
    419 				myprogname = commname;
    420 			} else {
    421 				myprogname = "???";
    422 			}
    423 			close(fd);
    424 		}
    425 #endif
    426 		bonus = strlen(myprogname)+1;
    427 	}
    428 
    429 	/* performs server handshake */
    430 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    431 	rhdr.rsp_class = RUMPSP_REQ;
    432 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    433 	rhdr.rsp_handshake = type;
    434 
    435 	IOVPUT(iov[0], rhdr);
    436 
    437 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    438 	if (haslock)
    439 		putwait_locked(spc, &rw, &rhdr);
    440 	else
    441 		putwait(spc, &rw, &rhdr);
    442 	if (type == HANDSHAKE_FORK) {
    443 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
    444 		rf.rf_cancel = cancel;
    445 		IOVPUT(iov[1], rf);
    446 	} else {
    447 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
    448 	}
    449 	rv = send_with_recon(spc, iov, __arraycount(iov));
    450 	if (rv || cancel) {
    451 		if (haslock)
    452 			unputwait_locked(spc, &rw);
    453 		else
    454 			unputwait(spc, &rw);
    455 		if (cancel) {
    456 			goto out;
    457 		}
    458 	} else {
    459 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    460 	}
    461 	if (rv)
    462 		goto out;
    463 
    464 	rv = *(int *)rw.rw_data;
    465 	free(rw.rw_data);
    466 
    467  out:
    468 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    469 	return rv;
    470 }
    471 
    472 static int
    473 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    474 {
    475 	struct rsp_hdr rhdr;
    476 	struct respwait rw;
    477 	struct iovec iov[1];
    478 	int rv;
    479 
    480 	rhdr.rsp_len = sizeof(rhdr);
    481 	rhdr.rsp_class = RUMPSP_REQ;
    482 	rhdr.rsp_type = RUMPSP_PREFORK;
    483 	rhdr.rsp_error = 0;
    484 
    485 	IOVPUT(iov[0], rhdr);
    486 
    487 	do {
    488 		putwait(spc, &rw, &rhdr);
    489 		rv = send_with_recon(spc, iov, __arraycount(iov));
    490 		if (rv != 0) {
    491 			unputwait(spc, &rw);
    492 			continue;
    493 		}
    494 
    495 		rv = cliwaitresp(spc, &rw, omask, false);
    496 		if (rv == ENOTCONN)
    497 			rv = EAGAIN;
    498 	} while (rv == EAGAIN);
    499 
    500 	*resp = rw.rw_data;
    501 	return rv;
    502 }
    503 
    504 /*
    505  * prevent response code from deadlocking with reconnect code
    506  */
    507 static int
    508 resp_sendlock(struct spclient *spc)
    509 {
    510 	int rv = 0;
    511 
    512 	pthread_mutex_lock(&spc->spc_mtx);
    513 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    514 		if (__predict_false(spc->spc_reconnecting)) {
    515 			rv = EBUSY;
    516 			goto out;
    517 		}
    518 		spc->spc_ostatus = SPCSTATUS_WANTED;
    519 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    520 	}
    521 	spc->spc_ostatus = SPCSTATUS_BUSY;
    522 
    523  out:
    524 	pthread_mutex_unlock(&spc->spc_mtx);
    525 	return rv;
    526 }
    527 
    528 static void
    529 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    530 	int wantstr)
    531 {
    532 	struct rsp_hdr rhdr;
    533 	struct iovec iov[2];
    534 
    535 	if (wantstr)
    536 		dlen = MIN(dlen, strlen(data)+1);
    537 
    538 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    539 	rhdr.rsp_reqno = reqno;
    540 	rhdr.rsp_class = RUMPSP_RESP;
    541 	rhdr.rsp_type = RUMPSP_COPYIN;
    542 	rhdr.rsp_sysnum = 0;
    543 
    544 	IOVPUT(iov[0], rhdr);
    545 	IOVPUT_WITHSIZE(iov[1], data, dlen);
    546 
    547 	if (resp_sendlock(spc) != 0)
    548 		return;
    549 	(void)SENDIOV(spc, iov);
    550 	sendunlock(spc);
    551 }
    552 
    553 static void
    554 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    555 {
    556 	struct rsp_hdr rhdr;
    557 	struct iovec iov[2];
    558 
    559 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    560 	rhdr.rsp_reqno = reqno;
    561 	rhdr.rsp_class = RUMPSP_RESP;
    562 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    563 	rhdr.rsp_sysnum = 0;
    564 
    565 	IOVPUT(iov[0], rhdr);
    566 	IOVPUT(iov[1], addr);
    567 
    568 	if (resp_sendlock(spc) != 0)
    569 		return;
    570 	(void)SENDIOV(spc, iov);
    571 	sendunlock(spc);
    572 }
    573 
    574 int
    575 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    576 	register_t *retval)
    577 {
    578 	struct rsp_sysresp *resp;
    579 	sigset_t omask;
    580 	void *rdata;
    581 	int rv;
    582 
    583 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    584 
    585 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    586 	    sysnum, data, dlen));
    587 
    588 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    589 	if (rv)
    590 		goto out;
    591 
    592 	resp = rdata;
    593 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %"
    594 	    PRIxREGISTER"/%"PRIxREGISTER"\n",
    595 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    596 
    597 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    598 	rv = resp->rsys_error;
    599 	free(rdata);
    600 
    601  out:
    602 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    603 	return rv;
    604 }
    605 
    606 static void
    607 handlereq(struct spclient *spc)
    608 {
    609 	struct rsp_copydata *copydata;
    610 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    611 	void *mapaddr;
    612 	size_t maplen;
    613 	int reqtype = spc->spc_hdr.rsp_type;
    614 	int sig;
    615 
    616 	switch (reqtype) {
    617 	case RUMPSP_COPYIN:
    618 	case RUMPSP_COPYINSTR:
    619 		/*LINTED*/
    620 		copydata = (struct rsp_copydata *)spc->spc_buf;
    621 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    622 		    copydata->rcp_addr, copydata->rcp_len));
    623 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    624 		    copydata->rcp_addr, copydata->rcp_len,
    625 		    reqtype == RUMPSP_COPYINSTR);
    626 		break;
    627 	case RUMPSP_COPYOUT:
    628 	case RUMPSP_COPYOUTSTR:
    629 		/*LINTED*/
    630 		copydata = (struct rsp_copydata *)spc->spc_buf;
    631 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    632 		    copydata->rcp_addr, copydata->rcp_len));
    633 		/*LINTED*/
    634 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    635 		    copydata->rcp_len);
    636 		break;
    637 	case RUMPSP_ANONMMAP:
    638 		/*LINTED*/
    639 		maplen = *(size_t *)spc->spc_buf;
    640 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    641 		    MAP_ANON|MAP_PRIVATE, -1, 0);
    642 		if (mapaddr == MAP_FAILED)
    643 			mapaddr = NULL;
    644 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    645 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    646 		break;
    647 	case RUMPSP_RAISE:
    648 		sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
    649 		DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
    650 		raise(sig);
    651 		/*
    652 		 * We most likely have signals blocked, but the signal
    653 		 * will be handled soon enough when we return.
    654 		 */
    655 		break;
    656 	default:
    657 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    658 		abort();
    659 		break;
    660 	}
    661 
    662 	spcfreebuf(spc);
    663 }
    664 
    665 static unsigned ptab_idx;
    666 static struct sockaddr *serv_sa;
    667 
    668 /* dup until we get a "good" fd which does not collide with stdio */
    669 static int
    670 dupgood(int myfd, int mustchange)
    671 {
    672 	int ofds[4];
    673 	int sverrno;
    674 	unsigned int i;
    675 
    676 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    677 		assert(i < __arraycount(ofds));
    678 		ofds[i] = myfd;
    679 		myfd = host_dup(myfd);
    680 		if (mustchange) {
    681 			i--; /* prevent closing old fd */
    682 			mustchange = 0;
    683 		}
    684 	}
    685 
    686 	sverrno = 0;
    687 	if (myfd == -1 && i > 0)
    688 		sverrno = errno;
    689 
    690 	while (i-- > 0) {
    691 		host_close(ofds[i]);
    692 	}
    693 
    694 	if (sverrno)
    695 		errno = sverrno;
    696 
    697 	return myfd;
    698 }
    699 
    700 #if defined(USE_KQUEUE)
    701 
    702 static int
    703 makeholyfd(void)
    704 {
    705 	struct kevent kev[NSIG+1];
    706 	int i, fd;
    707 
    708 	/* setup kqueue, we want all signals and the fd */
    709 	if ((fd = dupgood(host_kqueue(), 0)) == -1) {
    710 		ERRLOG(("rump_sp: cannot setup kqueue"));
    711 		return -1;
    712 	}
    713 
    714 	for (i = 0; i < NSIG; i++) {
    715 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    716 	}
    717 	EV_SET(&kev[NSIG], clispc.spc_fd,
    718 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    719 	if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
    720 		ERRLOG(("rump_sp: kevent() failed"));
    721 		host_close(fd);
    722 		return -1;
    723 	}
    724 
    725 	return fd;
    726 }
    727 
    728 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
    729 
    730 static int
    731 makeholyfd(void)
    732 {
    733 
    734 	return host_signalfd(-1, &fullset, 0);
    735 }
    736 
    737 #else /* !USE_KQUEUE && !USE_SIGNALFD */
    738 
    739 static int
    740 makeholyfd(void)
    741 {
    742 
    743 	return -1;
    744 }
    745 
    746 #endif
    747 
    748 static int
    749 doconnect(void)
    750 {
    751 	struct respwait rw;
    752 	struct rsp_hdr rhdr;
    753 	char banner[MAXBANNER];
    754 	int s, error, flags;
    755 	ssize_t n;
    756 
    757 	if (holyfd != -1)
    758 		host_close(holyfd);
    759 	holyfd = -1;
    760 	s = -1;
    761 
    762 	if (clispc.spc_fd != -1)
    763 		host_close(clispc.spc_fd);
    764 	clispc.spc_fd = -1;
    765 
    766 	/*
    767 	 * for reconnect, gate everyone out of the receiver code
    768 	 */
    769 	putwait_locked(&clispc, &rw, &rhdr);
    770 
    771 	pthread_mutex_lock(&clispc.spc_mtx);
    772 	clispc.spc_reconnecting = 1;
    773 	pthread_cond_broadcast(&clispc.spc_cv);
    774 	clispc.spc_generation++;
    775 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    776 		clispc.spc_istatus = SPCSTATUS_WANTED;
    777 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    778 	}
    779 	kickall(&clispc);
    780 
    781 	/*
    782 	 * we can release it already since we hold the
    783 	 * send lock during reconnect
    784 	 * XXX: assert it
    785 	 */
    786 	clispc.spc_istatus = SPCSTATUS_FREE;
    787 	pthread_mutex_unlock(&clispc.spc_mtx);
    788 	unputwait_locked(&clispc, &rw);
    789 
    790 	free(clispc.spc_buf);
    791 	clispc.spc_off = 0;
    792 
    793 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    794 	if (s == -1)
    795 		return -1;
    796 
    797 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
    798 		if (errno == EINTR)
    799 			continue;
    800 		ERRLOG(("rump_sp: client connect failed: %s\n",
    801 		    strerror(errno)));
    802 		return -1;
    803 	}
    804 
    805 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    806 		ERRLOG(("rump_sp: connect hook failed\n"));
    807 		return -1;
    808 	}
    809 
    810 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
    811 		ERRLOG(("rump_sp: failed to read banner\n"));
    812 		return -1;
    813 	}
    814 
    815 	if (banner[n-1] != '\n') {
    816 		ERRLOG(("rump_sp: invalid banner\n"));
    817 		return -1;
    818 	}
    819 	banner[n] = '\0';
    820 	/* XXX parse the banner some day */
    821 
    822 	flags = host_fcntl(s, F_GETFL, 0);
    823 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    824 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
    825 		return -1;
    826 	}
    827 	clispc.spc_fd = s;
    828 	clispc.spc_state = SPCSTATE_RUNNING;
    829 	clispc.spc_reconnecting = 0;
    830 	holyfd = makeholyfd();
    831 
    832 	return 0;
    833 }
    834 
    835 static int
    836 doinit(void)
    837 {
    838 
    839 	TAILQ_INIT(&clispc.spc_respwait);
    840 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    841 	pthread_cond_init(&clispc.spc_cv, NULL);
    842 
    843 	return 0;
    844 }
    845 
    846 #ifdef RTLD_NEXT
    847 void *rumpclient__dlsym(void *, const char *);
    848 void *
    849 rumpclient__dlsym(void *handle, const char *symbol)
    850 {
    851 
    852 	return dlsym(handle, symbol);
    853 }
    854 void *rumphijack_dlsym(void *, const char *)
    855     __attribute__((__weak__, alias("rumpclient__dlsym")));
    856 #endif
    857 
    858 static pid_t init_done = 0;
    859 
    860 int
    861 rumpclient_init(void)
    862 {
    863 	char *p;
    864 	int error;
    865 	int rv = -1;
    866 	int hstype;
    867 	pid_t mypid;
    868 
    869 	/*
    870 	 * Make sure we're not riding the context of a previous
    871 	 * host fork.  Note: it's *possible* that after n>1 forks
    872 	 * we have the same pid as one of our exited parents, but
    873 	 * I'm pretty sure there are 0 practical implications, since
    874 	 * it means generations would have to skip rumpclient init.
    875 	 */
    876 	if (init_done == (mypid = getpid()))
    877 		return 0;
    878 
    879 #ifdef USE_KQUEUE
    880 	/* kq does not traverse fork() */
    881 	holyfd = -1;
    882 #endif
    883 	init_done = mypid;
    884 
    885 	sigfillset(&fullset);
    886 
    887 	/*
    888 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
    889 	 * wann wird man je verstehen?  wann wird man je verstehen?
    890 	 */
    891 #ifdef RTLD_NEXT
    892 #define FINDSYM2(_name_,_syscall_)					\
    893 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
    894 	    #_syscall_)) == NULL) {					\
    895 		if (rumphijack_dlsym == rumpclient__dlsym)		\
    896 			host_##_name_ = _name_; /* static fallback */	\
    897 		if (host_##_name_ == NULL) {				\
    898 			fprintf(stderr,"cannot find %s: %s", #_syscall_,\
    899 			    dlerror());					\
    900 			exit(1);					\
    901 		}							\
    902 	}
    903 #else
    904 #define FINDSYM2(_name_,_syscall)					\
    905 	host_##_name_ = _name_;
    906 #endif
    907 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    908 #ifdef __NetBSD__
    909 	FINDSYM2(socket,__socket30)
    910 #else
    911 	FINDSYM(socket)
    912 #endif
    913 
    914 	FINDSYM(close)
    915 	FINDSYM(connect)
    916 	FINDSYM(fcntl)
    917 	FINDSYM(poll)
    918 	FINDSYM(read)
    919 	FINDSYM(sendmsg)
    920 	FINDSYM(setsockopt)
    921 	FINDSYM(dup)
    922 	FINDSYM(execve)
    923 
    924 #ifdef USE_KQUEUE
    925 	FINDSYM(kqueue)
    926 #ifdef __NetBSD__
    927 #if !__NetBSD_Prereq__(5,99,7)
    928 	FINDSYM(kevent)
    929 #elif !__NetBSD_Prereq__(10,99,7)
    930 	FINDSYM2(kevent,_sys___kevent50)
    931 #else
    932 	FINDSYM2(kevent,_sys___kevent100)
    933 #endif
    934 #else
    935 	FINDSYM(kevent)
    936 #endif
    937 #endif /* USE_KQUEUE */
    938 
    939 #ifdef USE_SIGNALFD
    940 	FINDSYM(signalfd)
    941 #endif
    942 
    943 #undef	FINDSYM
    944 #undef	FINDSY2
    945 
    946 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    947 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    948 			fprintf(stderr, "error: RUMP_SERVER not set\n");
    949 			errno = ENOENT;
    950 			goto out;
    951 		}
    952 	}
    953 
    954 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    955 		errno = error;
    956 		goto out;
    957 	}
    958 
    959 	if (doinit() == -1)
    960 		goto out;
    961 
    962 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    963 		sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
    964 		unsetenv("RUMPCLIENT__EXECFD");
    965 		hstype = HANDSHAKE_EXEC;
    966 	} else {
    967 		if (doconnect() == -1)
    968 			goto out;
    969 		hstype = HANDSHAKE_GUEST;
    970 	}
    971 
    972 	error = handshake_req(&clispc, hstype, NULL, 0, false);
    973 	if (error) {
    974 		pthread_mutex_destroy(&clispc.spc_mtx);
    975 		pthread_cond_destroy(&clispc.spc_cv);
    976 		if (clispc.spc_fd != -1)
    977 			host_close(clispc.spc_fd);
    978 		errno = error;
    979 		goto out;
    980 	}
    981 	rv = 0;
    982 
    983  out:
    984 	if (rv == -1)
    985 		init_done = 0;
    986 	return rv;
    987 }
    988 
    989 struct rumpclient_fork {
    990 	uint32_t fork_auth[AUTHLEN];
    991 	struct spclient fork_spc;
    992 	int fork_holyfd;
    993 };
    994 
    995 struct rumpclient_fork *
    996 rumpclient_prefork(void)
    997 {
    998 	struct rumpclient_fork *rpf;
    999 	sigset_t omask;
   1000 	void *resp;
   1001 	int rv;
   1002 
   1003 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
   1004 	rpf = malloc(sizeof(*rpf));
   1005 	if (rpf == NULL)
   1006 		goto out;
   1007 
   1008 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
   1009 		free(rpf);
   1010 		errno = rv;
   1011 		rpf = NULL;
   1012 		goto out;
   1013 	}
   1014 
   1015 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
   1016 	free(resp);
   1017 
   1018 	rpf->fork_spc = clispc;
   1019 	rpf->fork_holyfd = holyfd;
   1020 
   1021  out:
   1022 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
   1023 	return rpf;
   1024 }
   1025 
   1026 int
   1027 rumpclient_fork_init(struct rumpclient_fork *rpf)
   1028 {
   1029 	int error;
   1030 	int osock;
   1031 
   1032 	osock = clispc.spc_fd;
   1033 	memset(&clispc, 0, sizeof(clispc));
   1034 	clispc.spc_fd = osock;
   1035 
   1036 #ifdef USE_KQUEUE
   1037 	holyfd = -1; /* kqueue descriptor is not copied over fork() */
   1038 #else
   1039 	if (holyfd != -1) {
   1040 		host_close(holyfd);
   1041 		holyfd = -1;
   1042 	}
   1043 #endif
   1044 
   1045 	if (doinit() == -1)
   1046 		return -1;
   1047 	if (doconnect() == -1)
   1048 		return -1;
   1049 
   1050 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
   1051 	    0, false);
   1052 	if (error) {
   1053 		pthread_mutex_destroy(&clispc.spc_mtx);
   1054 		pthread_cond_destroy(&clispc.spc_cv);
   1055 		errno = error;
   1056 		return -1;
   1057 	}
   1058 
   1059 	return 0;
   1060 }
   1061 
   1062 /*ARGSUSED*/
   1063 void
   1064 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
   1065 {
   1066 
   1067 	/* EUNIMPL */
   1068 }
   1069 
   1070 void
   1071 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
   1072 {
   1073 
   1074 	clispc = rpf->fork_spc;
   1075 	holyfd = rpf->fork_holyfd;
   1076 }
   1077 
   1078 void
   1079 rumpclient_setconnretry(time_t timeout)
   1080 {
   1081 
   1082 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
   1083 		return; /* gigo */
   1084 
   1085 	retrytimo = timeout;
   1086 }
   1087 
   1088 int
   1089 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
   1090 {
   1091 	int fd = *fdp;
   1092 	int untilfd;
   1093 	int newfd;
   1094 
   1095 	switch (variant) {
   1096 	case RUMPCLIENT_CLOSE_FCLOSEM:
   1097 		untilfd = MAX(clispc.spc_fd, holyfd);
   1098 		for (; fd <= untilfd; fd++) {
   1099 			if (fd == clispc.spc_fd || fd == holyfd)
   1100 				continue;
   1101 			(void)host_close(fd);
   1102 		}
   1103 		*fdp = fd;
   1104 		break;
   1105 
   1106 	case RUMPCLIENT_CLOSE_CLOSE:
   1107 	case RUMPCLIENT_CLOSE_DUP2:
   1108 		if (fd == clispc.spc_fd) {
   1109 			newfd = dupgood(clispc.spc_fd, 1);
   1110 			if (newfd == -1)
   1111 				return -1;
   1112 
   1113 #ifdef USE_KQUEUE
   1114 			{
   1115 			struct kevent kev[2];
   1116 
   1117 			/*
   1118 			 * now, we have a new socket number, so change
   1119 			 * the file descriptor that kqueue is
   1120 			 * monitoring.  remove old and add new.
   1121 			 */
   1122 			EV_SET(&kev[0], clispc.spc_fd,
   1123 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
   1124 			EV_SET(&kev[1], newfd,
   1125 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
   1126 			if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
   1127 				int sverrno = errno;
   1128 				host_close(newfd);
   1129 				errno = sverrno;
   1130 				return -1;
   1131 			}}
   1132 #endif /* !USE_KQUEUE */
   1133 			clispc.spc_fd = newfd;
   1134 		}
   1135 		if (holyfd != -1 && fd == holyfd) {
   1136 			newfd = dupgood(holyfd, 1);
   1137 			if (newfd == -1)
   1138 				return -1;
   1139 			holyfd = newfd;
   1140 		}
   1141 		break;
   1142 	}
   1143 
   1144 	return 0;
   1145 }
   1146 
   1147 pid_t
   1148 rumpclient_fork(void)
   1149 {
   1150 
   1151 	return rumpclient__dofork(fork);
   1152 }
   1153 
   1154 /*
   1155  * Process is about to exec.  Save info about our existing connection
   1156  * in the env.  rumpclient will check for this info in init().
   1157  * This is mostly for the benefit of rumphijack, but regular applications
   1158  * may use it as well.
   1159  */
   1160 int
   1161 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
   1162 {
   1163 	char buf[4096];
   1164 	char **newenv;
   1165 	char *envstr, *envstr2;
   1166 	size_t nelem;
   1167 	int rv, sverrno;
   1168 
   1169 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
   1170 	    clispc.spc_fd, holyfd);
   1171 	envstr = malloc(strlen(buf)+1);
   1172 	if (envstr == NULL) {
   1173 		return ENOMEM;
   1174 	}
   1175 	strcpy(envstr, buf);
   1176 
   1177 	/* do we have a fully parsed url we want to forward in the env? */
   1178 	if (*parsedurl != '\0') {
   1179 		snprintf(buf, sizeof(buf),
   1180 		    "RUMP__PARSEDSERVER=%s", parsedurl);
   1181 		envstr2 = malloc(strlen(buf)+1);
   1182 		if (envstr2 == NULL) {
   1183 			free(envstr);
   1184 			return ENOMEM;
   1185 		}
   1186 		strcpy(envstr2, buf);
   1187 	} else {
   1188 		envstr2 = NULL;
   1189 	}
   1190 
   1191 	for (nelem = 0; envp && envp[nelem]; nelem++)
   1192 		continue;
   1193 
   1194 	newenv = malloc(sizeof(*newenv) * (nelem+3));
   1195 	if (newenv == NULL) {
   1196 		free(envstr2);
   1197 		free(envstr);
   1198 		return ENOMEM;
   1199 	}
   1200 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
   1201 
   1202 	newenv[nelem] = envstr;
   1203 	newenv[nelem+1] = envstr2;
   1204 	newenv[nelem+2] = NULL;
   1205 
   1206 	rv = host_execve(path, argv, newenv);
   1207 
   1208 	_DIAGASSERT(rv != 0);
   1209 	sverrno = errno;
   1210 	free(envstr2);
   1211 	free(envstr);
   1212 	free(newenv);
   1213 	errno = sverrno;
   1214 	return rv;
   1215 }
   1216 
   1217 /*
   1218  * daemon() is handwritten for the benefit of platforms which
   1219  * do not support daemon().
   1220  */
   1221 int
   1222 rumpclient_daemon(int nochdir, int noclose)
   1223 {
   1224 	struct rumpclient_fork *rf;
   1225 	int sverrno;
   1226 
   1227 	if ((rf = rumpclient_prefork()) == NULL)
   1228 		return -1;
   1229 
   1230 	switch (fork()) {
   1231 	case 0:
   1232 		break;
   1233 	case -1:
   1234 		goto daemonerr;
   1235 	default:
   1236 		_exit(0);
   1237 	}
   1238 
   1239 	if (setsid() == -1)
   1240 		goto daemonerr;
   1241 	if (!nochdir && chdir("/") == -1)
   1242 		goto daemonerr;
   1243 	if (!noclose) {
   1244 		int fd = open("/dev/null", O_RDWR);
   1245 		dup2(fd, 0);
   1246 		dup2(fd, 1);
   1247 		dup2(fd, 2);
   1248 		if (fd > 2)
   1249 			close(fd);
   1250 	}
   1251 
   1252 	/* note: fork is either completed or cancelled by the call */
   1253 	if (rumpclient_fork_init(rf) == -1)
   1254 		return -1;
   1255 
   1256 	return 0;
   1257 
   1258  daemonerr:
   1259 	sverrno = errno;
   1260 	rumpclient_fork_cancel(rf);
   1261 	errno = sverrno;
   1262 	return -1;
   1263 }
   1264