Home | History | Annotate | Line # | Download | only in librumpclient
rumpclient.c revision 1.28
      1 /*      $NetBSD: rumpclient.c,v 1.28 2011/02/14 14:56:23 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * Client side routines for rump syscall proxy.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __RCSID("$NetBSD");
     34 
     35 #include <sys/param.h>
     36 #include <sys/event.h>
     37 #include <sys/mman.h>
     38 #include <sys/socket.h>
     39 
     40 #include <arpa/inet.h>
     41 #include <netinet/in.h>
     42 #include <netinet/tcp.h>
     43 
     44 #include <assert.h>
     45 #include <dlfcn.h>
     46 #include <errno.h>
     47 #include <fcntl.h>
     48 #include <link.h>
     49 #include <poll.h>
     50 #include <pthread.h>
     51 #include <signal.h>
     52 #include <stdarg.h>
     53 #include <stdbool.h>
     54 #include <stdio.h>
     55 #include <stdlib.h>
     56 #include <string.h>
     57 #include <unistd.h>
     58 
     59 #include <rump/rumpclient.h>
     60 
     61 #define HOSTOPS
     62 int	(*host_socket)(int, int, int);
     63 int	(*host_close)(int);
     64 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
     65 int	(*host_fcntl)(int, int, ...);
     66 int	(*host_poll)(struct pollfd *, nfds_t, int);
     67 ssize_t	(*host_read)(int, void *, size_t);
     68 ssize_t (*host_sendto)(int, const void *, size_t, int,
     69 		       const struct sockaddr *, socklen_t);
     70 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
     71 int	(*host_dup)(int);
     72 
     73 int	(*host_kqueue)(void);
     74 int	(*host_kevent)(int, const struct kevent *, size_t,
     75 		       struct kevent *, size_t, const struct timespec *);
     76 
     77 #include "sp_common.c"
     78 
     79 static struct spclient clispc = {
     80 	.spc_fd = -1,
     81 };
     82 
     83 static int kq = -1;
     84 static sigset_t fullset;
     85 
     86 static int doconnect(bool);
     87 static int handshake_req(struct spclient *, uint32_t *, int, bool);
     88 
     89 time_t retrytimo = RUMPCLIENT_RETRYCONN_ONCE;
     90 
     91 static int
     92 send_with_recon(struct spclient *spc, const void *data, size_t dlen)
     93 {
     94 	struct timeval starttime, curtime;
     95 	time_t prevreconmsg;
     96 	unsigned reconretries;
     97 	int rv;
     98 
     99 	for (prevreconmsg = 0, reconretries = 0;;) {
    100 		rv = dosend(spc, data, dlen);
    101 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
    102 			/* no persistent connections */
    103 			if (retrytimo == 0)
    104 				break;
    105 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
    106 				exit(1);
    107 
    108 			if (!prevreconmsg) {
    109 				prevreconmsg = time(NULL);
    110 				gettimeofday(&starttime, NULL);
    111 			}
    112 			if (reconretries == 1) {
    113 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
    114 					rv = ENOTCONN;
    115 					break;
    116 				}
    117 				fprintf(stderr, "rump_sp: connection to "
    118 				    "kernel lost, trying to reconnect ...\n");
    119 			} else if (time(NULL) - prevreconmsg > 120) {
    120 				fprintf(stderr, "rump_sp: still trying to "
    121 				    "reconnect ...\n");
    122 				prevreconmsg = time(NULL);
    123 			}
    124 
    125 			/* check that we aren't over the limit */
    126 			if (retrytimo > 0) {
    127 				struct timeval tmp;
    128 
    129 				gettimeofday(&curtime, NULL);
    130 				timersub(&curtime, &starttime, &tmp);
    131 				if (tmp.tv_sec >= retrytimo) {
    132 					fprintf(stderr, "rump_sp: reconnect "
    133 					    "failed, %lld second timeout\n",
    134 					    (long long)retrytimo);
    135 					return ENOTCONN;
    136 				}
    137 			}
    138 
    139 			/* adhoc backoff timer */
    140 			if (reconretries < 10) {
    141 				usleep(100000 * reconretries);
    142 			} else {
    143 				sleep(MIN(10, reconretries-9));
    144 			}
    145 			reconretries++;
    146 
    147 			if ((rv = doconnect(false)) != 0)
    148 				continue;
    149 			if ((rv = handshake_req(&clispc, NULL, 0, true)) != 0)
    150 				continue;
    151 
    152 			/*
    153 			 * ok, reconnect succesful.  we need to return to
    154 			 * the upper layer to get the entire PDU resent.
    155 			 */
    156 			if (reconretries != 1)
    157 				fprintf(stderr, "rump_sp: reconnected!\n");
    158 			rv = EAGAIN;
    159 			break;
    160 		} else {
    161 			_DIAGASSERT(errno != EAGAIN);
    162 			break;
    163 		}
    164 	}
    165 
    166 	return rv;
    167 }
    168 
    169 static int
    170 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
    171 	bool keeplock)
    172 {
    173 	uint64_t mygen;
    174 	bool imalive = true;
    175 
    176 	pthread_mutex_lock(&spc->spc_mtx);
    177 	if (!keeplock)
    178 		sendunlockl(spc);
    179 	mygen = spc->spc_generation;
    180 
    181 	rw->rw_error = 0;
    182 	while (!rw->rw_done && rw->rw_error == 0) {
    183 		if (__predict_false(spc->spc_generation != mygen || !imalive))
    184 			break;
    185 
    186 		/* are we free to receive? */
    187 		if (spc->spc_istatus == SPCSTATUS_FREE) {
    188 			struct kevent kev[8];
    189 			int gotresp, dosig, rv, i;
    190 
    191 			spc->spc_istatus = SPCSTATUS_BUSY;
    192 			pthread_mutex_unlock(&spc->spc_mtx);
    193 
    194 			dosig = 0;
    195 			for (gotresp = 0; !gotresp; ) {
    196 				switch (readframe(spc)) {
    197 				case 0:
    198 					rv = host_kevent(kq, NULL, 0,
    199 					    kev, __arraycount(kev), NULL);
    200 
    201 					if (__predict_false(rv == -1)) {
    202 						goto cleanup;
    203 					}
    204 
    205 					/*
    206 					 * XXX: don't know how this can
    207 					 * happen (timeout cannot expire
    208 					 * since there isn't one), but
    209 					 * it does happen
    210 					 */
    211 					if (__predict_false(rv == 0))
    212 						continue;
    213 
    214 					for (i = 0; i < rv; i++) {
    215 						if (kev[i].filter
    216 						    == EVFILT_SIGNAL)
    217 							dosig++;
    218 					}
    219 					if (dosig)
    220 						goto cleanup;
    221 
    222 					continue;
    223 				case -1:
    224 					imalive = false;
    225 					goto cleanup;
    226 				default:
    227 					break;
    228 				}
    229 
    230 				switch (spc->spc_hdr.rsp_class) {
    231 				case RUMPSP_RESP:
    232 				case RUMPSP_ERROR:
    233 					kickwaiter(spc);
    234 					gotresp = spc->spc_hdr.rsp_reqno ==
    235 					    rw->rw_reqno;
    236 					break;
    237 				case RUMPSP_REQ:
    238 					handlereq(spc);
    239 					break;
    240 				default:
    241 					/* panic */
    242 					break;
    243 				}
    244 			}
    245 
    246  cleanup:
    247 			pthread_mutex_lock(&spc->spc_mtx);
    248 			if (spc->spc_istatus == SPCSTATUS_WANTED)
    249 				kickall(spc);
    250 			spc->spc_istatus = SPCSTATUS_FREE;
    251 
    252 			/* take one for the team */
    253 			if (dosig) {
    254 				pthread_mutex_unlock(&spc->spc_mtx);
    255 				pthread_sigmask(SIG_SETMASK, mask, NULL);
    256 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
    257 				pthread_mutex_lock(&spc->spc_mtx);
    258 			}
    259 		} else {
    260 			spc->spc_istatus = SPCSTATUS_WANTED;
    261 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
    262 		}
    263 	}
    264 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
    265 	pthread_mutex_unlock(&spc->spc_mtx);
    266 	pthread_cond_destroy(&rw->rw_cv);
    267 
    268 	if (spc->spc_generation != mygen || !imalive) {
    269 		return ENOTCONN;
    270 	}
    271 	return rw->rw_error;
    272 }
    273 
    274 static int
    275 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
    276 	const void *data, size_t dlen, void **resp)
    277 {
    278 	struct rsp_hdr rhdr;
    279 	struct respwait rw;
    280 	int rv;
    281 
    282 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    283 	rhdr.rsp_class = RUMPSP_REQ;
    284 	rhdr.rsp_type = RUMPSP_SYSCALL;
    285 	rhdr.rsp_sysnum = sysnum;
    286 
    287 	do {
    288 		putwait(spc, &rw, &rhdr);
    289 		if ((rv = send_with_recon(spc, &rhdr, sizeof(rhdr))) != 0) {
    290 			unputwait(spc, &rw);
    291 			continue;
    292 		}
    293 		if ((rv = send_with_recon(spc, data, dlen)) != 0) {
    294 			unputwait(spc, &rw);
    295 			continue;
    296 		}
    297 
    298 		rv = cliwaitresp(spc, &rw, omask, false);
    299 		if (rv == ENOTCONN)
    300 			rv = EAGAIN;
    301 	} while (rv == EAGAIN);
    302 
    303 	*resp = rw.rw_data;
    304 	return rv;
    305 }
    306 
    307 static int
    308 handshake_req(struct spclient *spc, uint32_t *auth, int cancel, bool haslock)
    309 {
    310 	struct handshake_fork rf;
    311 	struct rsp_hdr rhdr;
    312 	struct respwait rw;
    313 	sigset_t omask;
    314 	size_t bonus;
    315 	int rv;
    316 
    317 	if (auth) {
    318 		bonus = sizeof(rf);
    319 	} else {
    320 		bonus = strlen(getprogname())+1;
    321 	}
    322 
    323 	/* performs server handshake */
    324 	rhdr.rsp_len = sizeof(rhdr) + bonus;
    325 	rhdr.rsp_class = RUMPSP_REQ;
    326 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
    327 	if (auth)
    328 		rhdr.rsp_handshake = HANDSHAKE_FORK;
    329 	else
    330 		rhdr.rsp_handshake = HANDSHAKE_GUEST;
    331 
    332 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    333 	if (haslock)
    334 		putwait_locked(spc, &rw, &rhdr);
    335 	else
    336 		putwait(spc, &rw, &rhdr);
    337 	rv = dosend(spc, &rhdr, sizeof(rhdr));
    338 	if (auth) {
    339 		memcpy(rf.rf_auth, auth, AUTHLEN*sizeof(*auth));
    340 		rf.rf_cancel = cancel;
    341 		rv = send_with_recon(spc, &rf, sizeof(rf));
    342 	} else {
    343 		rv = dosend(spc, getprogname(), strlen(getprogname())+1);
    344 	}
    345 	if (rv || cancel) {
    346 		if (haslock)
    347 			unputwait_locked(spc, &rw);
    348 		else
    349 			unputwait(spc, &rw);
    350 		if (cancel) {
    351 			goto out;
    352 		}
    353 	} else {
    354 		rv = cliwaitresp(spc, &rw, &omask, haslock);
    355 	}
    356 	if (rv)
    357 		goto out;
    358 
    359 	rv = *(int *)rw.rw_data;
    360 	free(rw.rw_data);
    361 
    362  out:
    363 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    364 	return rv;
    365 }
    366 
    367 static int
    368 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
    369 {
    370 	struct rsp_hdr rhdr;
    371 	struct respwait rw;
    372 	int rv;
    373 
    374 	rhdr.rsp_len = sizeof(rhdr);
    375 	rhdr.rsp_class = RUMPSP_REQ;
    376 	rhdr.rsp_type = RUMPSP_PREFORK;
    377 	rhdr.rsp_error = 0;
    378 
    379 	do {
    380 		putwait(spc, &rw, &rhdr);
    381 		rv = send_with_recon(spc, &rhdr, sizeof(rhdr));
    382 		if (rv != 0) {
    383 			unputwait(spc, &rw);
    384 			continue;
    385 		}
    386 
    387 		rv = cliwaitresp(spc, &rw, omask, false);
    388 		if (rv == ENOTCONN)
    389 			rv = EAGAIN;
    390 	} while (rv == EAGAIN);
    391 
    392 	*resp = rw.rw_data;
    393 	return rv;
    394 }
    395 
    396 /*
    397  * prevent response code from deadlocking with reconnect code
    398  */
    399 static int
    400 resp_sendlock(struct spclient *spc)
    401 {
    402 	int rv = 0;
    403 
    404 	pthread_mutex_lock(&spc->spc_mtx);
    405 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
    406 		if (__predict_false(spc->spc_reconnecting)) {
    407 			rv = EBUSY;
    408 			goto out;
    409 		}
    410 		spc->spc_ostatus = SPCSTATUS_WANTED;
    411 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
    412 	}
    413 	spc->spc_ostatus = SPCSTATUS_BUSY;
    414 
    415  out:
    416 	pthread_mutex_unlock(&spc->spc_mtx);
    417 	return rv;
    418 }
    419 
    420 static void
    421 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
    422 	int wantstr)
    423 {
    424 	struct rsp_hdr rhdr;
    425 
    426 	if (wantstr)
    427 		dlen = MIN(dlen, strlen(data)+1);
    428 
    429 	rhdr.rsp_len = sizeof(rhdr) + dlen;
    430 	rhdr.rsp_reqno = reqno;
    431 	rhdr.rsp_class = RUMPSP_RESP;
    432 	rhdr.rsp_type = RUMPSP_COPYIN;
    433 	rhdr.rsp_sysnum = 0;
    434 
    435 	if (resp_sendlock(spc) != 0)
    436 		return;
    437 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    438 	(void)dosend(spc, data, dlen);
    439 	sendunlock(spc);
    440 }
    441 
    442 static void
    443 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
    444 {
    445 	struct rsp_hdr rhdr;
    446 
    447 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
    448 	rhdr.rsp_reqno = reqno;
    449 	rhdr.rsp_class = RUMPSP_RESP;
    450 	rhdr.rsp_type = RUMPSP_ANONMMAP;
    451 	rhdr.rsp_sysnum = 0;
    452 
    453 	if (resp_sendlock(spc) != 0)
    454 		return;
    455 	(void)dosend(spc, &rhdr, sizeof(rhdr));
    456 	(void)dosend(spc, &addr, sizeof(addr));
    457 	sendunlock(spc);
    458 }
    459 
    460 int
    461 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
    462 	register_t *retval)
    463 {
    464 	struct rsp_sysresp *resp;
    465 	sigset_t omask;
    466 	void *rdata;
    467 	int rv;
    468 
    469 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    470 
    471 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
    472 	    sysnum, data, dlen));
    473 
    474 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
    475 	if (rv)
    476 		goto out;
    477 
    478 	resp = rdata;
    479 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
    480 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
    481 
    482 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
    483 	rv = resp->rsys_error;
    484 	free(rdata);
    485 
    486  out:
    487 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    488 	return rv;
    489 }
    490 
    491 static void
    492 handlereq(struct spclient *spc)
    493 {
    494 	struct rsp_copydata *copydata;
    495 	struct rsp_hdr *rhdr = &spc->spc_hdr;
    496 	void *mapaddr;
    497 	size_t maplen;
    498 	int reqtype = spc->spc_hdr.rsp_type;
    499 
    500 	switch (reqtype) {
    501 	case RUMPSP_COPYIN:
    502 	case RUMPSP_COPYINSTR:
    503 		/*LINTED*/
    504 		copydata = (struct rsp_copydata *)spc->spc_buf;
    505 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
    506 		    copydata->rcp_addr, copydata->rcp_len));
    507 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
    508 		    copydata->rcp_addr, copydata->rcp_len,
    509 		    reqtype == RUMPSP_COPYINSTR);
    510 		break;
    511 	case RUMPSP_COPYOUT:
    512 	case RUMPSP_COPYOUTSTR:
    513 		/*LINTED*/
    514 		copydata = (struct rsp_copydata *)spc->spc_buf;
    515 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
    516 		    copydata->rcp_addr, copydata->rcp_len));
    517 		/*LINTED*/
    518 		memcpy(copydata->rcp_addr, copydata->rcp_data,
    519 		    copydata->rcp_len);
    520 		break;
    521 	case RUMPSP_ANONMMAP:
    522 		/*LINTED*/
    523 		maplen = *(size_t *)spc->spc_buf;
    524 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
    525 		    MAP_ANON, -1, 0);
    526 		if (mapaddr == MAP_FAILED)
    527 			mapaddr = NULL;
    528 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
    529 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
    530 		break;
    531 	case RUMPSP_RAISE:
    532 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
    533 		raise((int)rhdr->rsp_signo);
    534 		/*
    535 		 * We most likely have signals blocked, but the signal
    536 		 * will be handled soon enough when we return.
    537 		 */
    538 		break;
    539 	default:
    540 		printf("PANIC: INVALID TYPE %d\n", reqtype);
    541 		abort();
    542 		break;
    543 	}
    544 
    545 	spcfreebuf(spc);
    546 }
    547 
    548 static unsigned ptab_idx;
    549 static struct sockaddr *serv_sa;
    550 
    551 /* dup until we get a "good" fd which does not collide with stdio */
    552 static int
    553 dupgood(int myfd, int mustchange)
    554 {
    555 	int ofds[4];
    556 	int i;
    557 
    558 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
    559 		assert(i < __arraycount(ofds));
    560 		ofds[i] = myfd;
    561 		myfd = host_dup(myfd);
    562 		if (mustchange) {
    563 			i--; /* prevent closing old fd */
    564 			mustchange = 0;
    565 		}
    566 	}
    567 
    568 	for (i--; i >= 0; i--) {
    569 		host_close(ofds[i]);
    570 	}
    571 
    572 	return myfd;
    573 }
    574 
    575 static int
    576 doconnect(bool noisy)
    577 {
    578 	struct respwait rw;
    579 	struct rsp_hdr rhdr;
    580 	struct kevent kev[NSIG+1];
    581 	char banner[MAXBANNER];
    582 	struct pollfd pfd;
    583 	int s, error, flags, i;
    584 	ssize_t n;
    585 
    586 	if (kq != -1)
    587 		host_close(kq);
    588 	kq = -1;
    589 	s = -1;
    590 
    591 	if (clispc.spc_fd != -1)
    592 		host_close(clispc.spc_fd);
    593 	clispc.spc_fd = -1;
    594 
    595 	/*
    596 	 * for reconnect, gate everyone out of the receiver code
    597 	 */
    598 	putwait_locked(&clispc, &rw, &rhdr);
    599 
    600 	pthread_mutex_lock(&clispc.spc_mtx);
    601 	clispc.spc_reconnecting = 1;
    602 	pthread_cond_broadcast(&clispc.spc_cv);
    603 	clispc.spc_generation++;
    604 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
    605 		clispc.spc_istatus = SPCSTATUS_WANTED;
    606 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
    607 	}
    608 	kickall(&clispc);
    609 
    610 	/*
    611 	 * we can release it already since we hold the
    612 	 * send lock during reconnect
    613 	 * XXX: assert it
    614 	 */
    615 	clispc.spc_istatus = SPCSTATUS_FREE;
    616 	pthread_mutex_unlock(&clispc.spc_mtx);
    617 	unputwait_locked(&clispc, &rw);
    618 
    619 	free(clispc.spc_buf);
    620 	clispc.spc_off = 0;
    621 
    622 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
    623 	if (s == -1)
    624 		return -1;
    625 
    626 	pfd.fd = s;
    627 	pfd.events = POLLIN;
    628 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
    629 		if (errno == EINTR)
    630 			continue;
    631 		error = errno;
    632 		if (noisy)
    633 			fprintf(stderr, "rump_sp: client connect failed: %s\n",
    634 			    strerror(errno));
    635 		errno = error;
    636 		return -1;
    637 	}
    638 
    639 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
    640 		error = errno;
    641 		if (noisy)
    642 			fprintf(stderr, "rump_sp: connect hook failed\n");
    643 		errno = error;
    644 		return -1;
    645 	}
    646 
    647 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
    648 		error = errno;
    649 		if (noisy)
    650 			fprintf(stderr, "rump_sp: failed to read banner\n");
    651 		errno = error;
    652 		return -1;
    653 	}
    654 
    655 	if (banner[n-1] != '\n') {
    656 		if (noisy)
    657 			fprintf(stderr, "rump_sp: invalid banner\n");
    658 		errno = EINVAL;
    659 		return -1;
    660 	}
    661 	banner[n] = '\0';
    662 	/* parse the banner some day */
    663 
    664 	flags = host_fcntl(s, F_GETFL, 0);
    665 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
    666 		if (noisy)
    667 			fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
    668 			    strerror(errno));
    669 		errno = EINVAL;
    670 		return -1;
    671 	}
    672 	clispc.spc_fd = s;
    673 	clispc.spc_state = SPCSTATE_RUNNING;
    674 	clispc.spc_reconnecting = 0;
    675 
    676 	/* setup kqueue, we want all signals and the fd */
    677 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
    678 		error = errno;
    679 		if (noisy)
    680 			fprintf(stderr, "rump_sp: cannot setup kqueue");
    681 		errno = error;
    682 		return -1;
    683 	}
    684 
    685 	for (i = 0; i < NSIG; i++) {
    686 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
    687 	}
    688 	EV_SET(&kev[NSIG], clispc.spc_fd,
    689 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    690 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
    691 		error = errno;
    692 		if (noisy)
    693 			fprintf(stderr, "rump_sp: kevent() failed");
    694 		errno = error;
    695 		return -1;
    696 	}
    697 
    698 	return 0;
    699 }
    700 
    701 static int
    702 doinit(void)
    703 {
    704 
    705 	TAILQ_INIT(&clispc.spc_respwait);
    706 	pthread_mutex_init(&clispc.spc_mtx, NULL);
    707 	pthread_cond_init(&clispc.spc_cv, NULL);
    708 
    709 	return 0;
    710 }
    711 
    712 void *(*rumpclient_dlsym)(void *, const char *);
    713 
    714 int
    715 rumpclient_init()
    716 {
    717 	char *p;
    718 	int error;
    719 
    720 	sigfillset(&fullset);
    721 
    722 	/* dlsym overrided by rumphijack? */
    723 	if (!rumpclient_dlsym)
    724 		rumpclient_dlsym = dlsym;
    725 
    726 	/*
    727 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
    728 	 * wann wird man je verstehen?  wann wird man je verstehen?
    729 	 */
    730 #define FINDSYM2(_name_,_syscall_)					\
    731 	if ((host_##_name_ = rumpclient_dlsym(RTLD_NEXT,		\
    732 	    #_syscall_)) == NULL)					\
    733 		/* host_##_name_ = _syscall_ */;
    734 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
    735 	FINDSYM2(socket,__socket30);
    736 	FINDSYM(close);
    737 	FINDSYM(connect);
    738 	FINDSYM(fcntl);
    739 	FINDSYM(poll);
    740 	FINDSYM(read);
    741 	FINDSYM(sendto);
    742 	FINDSYM(setsockopt);
    743 	FINDSYM(dup);
    744 	FINDSYM(kqueue);
    745 #if !__NetBSD_Prereq__(5,99,7)
    746 	FINDSYM(kevent);
    747 #else
    748 	FINDSYM2(kevent,_sys___kevent50);
    749 #endif
    750 #undef	FINDSYM
    751 #undef	FINDSY2
    752 
    753 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
    754 		if ((p = getenv("RUMP_SERVER")) == NULL) {
    755 			errno = ENOENT;
    756 			return -1;
    757 		}
    758 	}
    759 
    760 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
    761 		errno = error;
    762 		return -1;
    763 	}
    764 
    765 	if (doinit() == -1)
    766 		return -1;
    767 
    768 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
    769 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
    770 		unsetenv("RUMPCLIENT__EXECFD");
    771 		return 0;
    772 	}
    773 
    774 	if (doconnect(true) == -1)
    775 		return -1;
    776 
    777 	error = handshake_req(&clispc, NULL, 0, false);
    778 	if (error) {
    779 		pthread_mutex_destroy(&clispc.spc_mtx);
    780 		pthread_cond_destroy(&clispc.spc_cv);
    781 		if (clispc.spc_fd != -1)
    782 			host_close(clispc.spc_fd);
    783 		errno = error;
    784 		return -1;
    785 	}
    786 
    787 	return 0;
    788 }
    789 
    790 struct rumpclient_fork {
    791 	uint32_t fork_auth[AUTHLEN];
    792 };
    793 
    794 struct rumpclient_fork *
    795 rumpclient_prefork(void)
    796 {
    797 	struct rumpclient_fork *rpf;
    798 	sigset_t omask;
    799 	void *resp;
    800 	int rv;
    801 
    802 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
    803 	rpf = malloc(sizeof(*rpf));
    804 	if (rpf == NULL)
    805 		return NULL;
    806 
    807 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
    808 		free(rpf);
    809 		errno = rv;
    810 		rpf = NULL;
    811 		goto out;
    812 	}
    813 
    814 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
    815 	free(resp);
    816 
    817  out:
    818 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
    819 	return rpf;
    820 }
    821 
    822 int
    823 rumpclient_fork_init(struct rumpclient_fork *rpf)
    824 {
    825 	int error;
    826 	int osock;
    827 
    828 	osock = clispc.spc_fd;
    829 	memset(&clispc, 0, sizeof(clispc));
    830 	clispc.spc_fd = osock;
    831 
    832 	kq = -1; /* kqueue descriptor is not copied over fork() */
    833 
    834 	if (doinit() == -1)
    835 		return -1;
    836 	if (doconnect(false) == -1)
    837 		return -1;
    838 
    839 	error = handshake_req(&clispc, rpf->fork_auth, 0, false);
    840 	if (error) {
    841 		pthread_mutex_destroy(&clispc.spc_mtx);
    842 		pthread_cond_destroy(&clispc.spc_cv);
    843 		errno = error;
    844 		return -1;
    845 	}
    846 
    847 	return 0;
    848 }
    849 
    850 void
    851 rumpclient_setconnretry(time_t timeout)
    852 {
    853 
    854 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
    855 		return; /* gigo */
    856 
    857 	retrytimo = timeout;
    858 }
    859 
    860 int
    861 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
    862 {
    863 	int fd = *fdp;
    864 	int untilfd, rv;
    865 	int newfd;
    866 
    867 	switch (variant) {
    868 	case RUMPCLIENT_CLOSE_FCLOSEM:
    869 		untilfd = MAX(clispc.spc_fd, kq);
    870 		for (; fd <= untilfd; fd++) {
    871 			if (fd == clispc.spc_fd || fd == kq)
    872 				continue;
    873 			rv = host_close(fd);
    874 			if (rv == -1)
    875 				return -1;
    876 		}
    877 		*fdp = fd;
    878 		break;
    879 
    880 	case RUMPCLIENT_CLOSE_CLOSE:
    881 	case RUMPCLIENT_CLOSE_DUP2:
    882 		if (fd == clispc.spc_fd) {
    883 			struct kevent kev[2];
    884 
    885 			newfd = dupgood(clispc.spc_fd, 1);
    886 			if (newfd == -1)
    887 				return -1;
    888 			/*
    889 			 * now, we have a new socket number, so change
    890 			 * the file descriptor that kqueue is
    891 			 * monitoring.  remove old and add new.
    892 			 */
    893 			EV_SET(&kev[0], clispc.spc_fd,
    894 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
    895 			EV_SET(&kev[1], newfd,
    896 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
    897 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
    898 				int sverrno = errno;
    899 				host_close(newfd);
    900 				errno = sverrno;
    901 				return -1;
    902 			}
    903 			clispc.spc_fd = newfd;
    904 		}
    905 		if (fd == kq) {
    906 			newfd = dupgood(kq, 1);
    907 			if (newfd == -1)
    908 				return -1;
    909 			kq = newfd;
    910 		}
    911 		break;
    912 	}
    913 
    914 	return 0;
    915 }
    916 
    917 /*
    918  * Process is about to exec.  Save info about our existing connection
    919  * in the env.  rumpclient will check for this info in init().
    920  * This is mostly for the benefit of rumphijack, but regular applications
    921  * may use it as well.
    922  */
    923 int
    924 rumpclient__exec_augmentenv(char *const oenv1[], char *const oenv2[],
    925 	char ***newenvp)
    926 {
    927 	char buf[4096];
    928 	char **newenv;
    929 	char *envstr, *envstr2;
    930 	size_t nelem1, nelem2;
    931 
    932 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
    933 	    clispc.spc_fd, kq);
    934 	envstr = malloc(strlen(buf)+1);
    935 	if (envstr == NULL) {
    936 		return ENOMEM;
    937 	}
    938 	strcpy(envstr, buf);
    939 
    940 	/* do we have a fully parsed url we want to forward in the env? */
    941 	if (*parsedurl != '\0') {
    942 		snprintf(buf, sizeof(buf),
    943 		    "RUMP__PARSEDSERVER=%s", parsedurl);
    944 		envstr2 = malloc(strlen(buf)+1);
    945 		if (envstr2 == NULL) {
    946 			free(envstr);
    947 			return ENOMEM;
    948 		}
    949 		strcpy(envstr2, buf);
    950 	} else {
    951 		envstr2 = NULL;
    952 	}
    953 
    954 	nelem1 = 0;
    955 	if (oenv1) {
    956 		for (; oenv1[nelem1]; nelem1++)
    957 			continue;
    958 	}
    959 	nelem2 = 0;
    960 	if (oenv2) {
    961 		for (; oenv2[nelem2]; nelem2++)
    962 			continue;
    963 	}
    964 
    965 	newenv = malloc(sizeof(*newenv) * nelem1+nelem2+3);
    966 	if (newenv == NULL) {
    967 		free(envstr2);
    968 		free(envstr);
    969 		return ENOMEM;
    970 	}
    971 	memcpy(&newenv[0], oenv1, sizeof(*oenv1) * nelem1);
    972 	memcpy(&newenv[nelem1], oenv2, sizeof(*oenv2) * nelem2);
    973 
    974 	newenv[nelem1+nelem2] = envstr;
    975 	newenv[nelem1+nelem2+1] = envstr2;
    976 	newenv[nelem1+nelem2+2] = NULL;
    977 
    978 	*newenvp = newenv;
    979 
    980 	return 0;
    981 }
    982