rumpclient.c revision 1.42 1 /* $NetBSD: rumpclient.c,v 1.42 2011/03/08 18:28:01 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: rumpclient.c,v 1.42 2011/03/08 18:28:01 pooka Exp $");
34
35 #include <sys/param.h>
36 #include <sys/event.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <link.h>
50 #include <poll.h>
51 #include <pthread.h>
52 #include <signal.h>
53 #include <stdarg.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59
60 #include <rump/rumpclient.h>
61
62 #define HOSTOPS
63 int (*host_socket)(int, int, int);
64 int (*host_close)(int);
65 int (*host_connect)(int, const struct sockaddr *, socklen_t);
66 int (*host_fcntl)(int, int, ...);
67 int (*host_poll)(struct pollfd *, nfds_t, int);
68 ssize_t (*host_read)(int, void *, size_t);
69 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
70 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
71 int (*host_dup)(int);
72
73 int (*host_kqueue)(void);
74 int (*host_kevent)(int, const struct kevent *, size_t,
75 struct kevent *, size_t, const struct timespec *);
76
77 int (*host_execve)(const char *, char *const[], char *const[]);
78
79 #include "sp_common.c"
80
81 static struct spclient clispc = {
82 .spc_fd = -1,
83 };
84
85 static int kq = -1;
86 static sigset_t fullset;
87
88 static int doconnect(bool);
89 static int handshake_req(struct spclient *, int, void *, int, bool);
90
91 /*
92 * Default: don't retry. Most clients can't handle it
93 * (consider e.g. fds suddenly going missing).
94 */
95 static time_t retrytimo = 0;
96
97 static int
98 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
99 {
100 struct timeval starttime, curtime;
101 time_t prevreconmsg;
102 unsigned reconretries;
103 int rv;
104
105 for (prevreconmsg = 0, reconretries = 0;;) {
106 rv = dosend(spc, iov, iovlen);
107 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
108 /* no persistent connections */
109 if (retrytimo == 0) {
110 rv = ENOTCONN;
111 break;
112 }
113 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
114 exit(1);
115
116 if (!prevreconmsg) {
117 prevreconmsg = time(NULL);
118 gettimeofday(&starttime, NULL);
119 }
120 if (reconretries == 1) {
121 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
122 rv = ENOTCONN;
123 break;
124 }
125 fprintf(stderr, "rump_sp: connection to "
126 "kernel lost, trying to reconnect ...\n");
127 } else if (time(NULL) - prevreconmsg > 120) {
128 fprintf(stderr, "rump_sp: still trying to "
129 "reconnect ...\n");
130 prevreconmsg = time(NULL);
131 }
132
133 /* check that we aren't over the limit */
134 if (retrytimo > 0) {
135 struct timeval tmp;
136
137 gettimeofday(&curtime, NULL);
138 timersub(&curtime, &starttime, &tmp);
139 if (tmp.tv_sec >= retrytimo) {
140 fprintf(stderr, "rump_sp: reconnect "
141 "failed, %lld second timeout\n",
142 (long long)retrytimo);
143 return ENOTCONN;
144 }
145 }
146
147 /* adhoc backoff timer */
148 if (reconretries < 10) {
149 usleep(100000 * reconretries);
150 } else {
151 sleep(MIN(10, reconretries-9));
152 }
153 reconretries++;
154
155 if ((rv = doconnect(false)) != 0)
156 continue;
157 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
158 NULL, 0, true)) != 0)
159 continue;
160
161 /*
162 * ok, reconnect succesful. we need to return to
163 * the upper layer to get the entire PDU resent.
164 */
165 if (reconretries != 1)
166 fprintf(stderr, "rump_sp: reconnected!\n");
167 rv = EAGAIN;
168 break;
169 } else {
170 _DIAGASSERT(errno != EAGAIN);
171 break;
172 }
173 }
174
175 return rv;
176 }
177
178 static int
179 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
180 bool keeplock)
181 {
182 uint64_t mygen;
183 bool imalive = true;
184
185 pthread_mutex_lock(&spc->spc_mtx);
186 if (!keeplock)
187 sendunlockl(spc);
188 mygen = spc->spc_generation;
189
190 rw->rw_error = 0;
191 while (!rw->rw_done && rw->rw_error == 0) {
192 if (__predict_false(spc->spc_generation != mygen || !imalive))
193 break;
194
195 /* are we free to receive? */
196 if (spc->spc_istatus == SPCSTATUS_FREE) {
197 struct kevent kev[8];
198 int gotresp, dosig, rv, i;
199
200 spc->spc_istatus = SPCSTATUS_BUSY;
201 pthread_mutex_unlock(&spc->spc_mtx);
202
203 dosig = 0;
204 for (gotresp = 0; !gotresp; ) {
205 /*
206 * typically we don't have a frame waiting
207 * when we come in here, so call kevent now
208 */
209 rv = host_kevent(kq, NULL, 0,
210 kev, __arraycount(kev), NULL);
211
212 if (__predict_false(rv == -1)) {
213 goto activity;
214 }
215
216 /*
217 * XXX: don't know how this can happen
218 * (timeout cannot expire since there
219 * isn't one), but it does happen.
220 * treat it as an expectional condition
221 * and go through tryread to determine
222 * alive status.
223 */
224 if (__predict_false(rv == 0))
225 goto activity;
226
227 for (i = 0; i < rv; i++) {
228 if (kev[i].filter == EVFILT_SIGNAL)
229 dosig++;
230 }
231 if (dosig)
232 goto cleanup;
233
234 /*
235 * ok, activity. try to read a frame to
236 * determine what happens next.
237 */
238 activity:
239 switch (readframe(spc)) {
240 case 0:
241 continue;
242 case -1:
243 imalive = false;
244 goto cleanup;
245 default:
246 /* case 1 */
247 break;
248 }
249
250 switch (spc->spc_hdr.rsp_class) {
251 case RUMPSP_RESP:
252 case RUMPSP_ERROR:
253 kickwaiter(spc);
254 gotresp = spc->spc_hdr.rsp_reqno ==
255 rw->rw_reqno;
256 break;
257 case RUMPSP_REQ:
258 handlereq(spc);
259 break;
260 default:
261 /* panic */
262 break;
263 }
264 }
265
266 cleanup:
267 pthread_mutex_lock(&spc->spc_mtx);
268 if (spc->spc_istatus == SPCSTATUS_WANTED)
269 kickall(spc);
270 spc->spc_istatus = SPCSTATUS_FREE;
271
272 /* take one for the team */
273 if (dosig) {
274 pthread_mutex_unlock(&spc->spc_mtx);
275 pthread_sigmask(SIG_SETMASK, mask, NULL);
276 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
277 pthread_mutex_lock(&spc->spc_mtx);
278 }
279 } else {
280 spc->spc_istatus = SPCSTATUS_WANTED;
281 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
282 }
283 }
284 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
285 pthread_mutex_unlock(&spc->spc_mtx);
286 pthread_cond_destroy(&rw->rw_cv);
287
288 if (spc->spc_generation != mygen || !imalive) {
289 return ENOTCONN;
290 }
291 return rw->rw_error;
292 }
293
294 static int
295 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
296 const void *data, size_t dlen, void **resp)
297 {
298 struct rsp_hdr rhdr;
299 struct respwait rw;
300 struct iovec iov[2];
301 int rv;
302
303 rhdr.rsp_len = sizeof(rhdr) + dlen;
304 rhdr.rsp_class = RUMPSP_REQ;
305 rhdr.rsp_type = RUMPSP_SYSCALL;
306 rhdr.rsp_sysnum = sysnum;
307
308 IOVPUT(iov[0], rhdr);
309 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
310
311 do {
312 putwait(spc, &rw, &rhdr);
313 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
314 unputwait(spc, &rw);
315 continue;
316 }
317
318 rv = cliwaitresp(spc, &rw, omask, false);
319 if (rv == ENOTCONN)
320 rv = EAGAIN;
321 } while (rv == EAGAIN);
322
323 *resp = rw.rw_data;
324 return rv;
325 }
326
327 static int
328 handshake_req(struct spclient *spc, int type, void *data,
329 int cancel, bool haslock)
330 {
331 struct handshake_fork rf;
332 const char *myprogname;
333 struct rsp_hdr rhdr;
334 struct respwait rw;
335 sigset_t omask;
336 size_t bonus;
337 struct iovec iov[2];
338 int rv;
339
340 if (type == HANDSHAKE_FORK) {
341 bonus = sizeof(rf);
342 } else {
343 myprogname = getprogname();
344 bonus = strlen(myprogname)+1;
345 }
346
347 /* performs server handshake */
348 rhdr.rsp_len = sizeof(rhdr) + bonus;
349 rhdr.rsp_class = RUMPSP_REQ;
350 rhdr.rsp_type = RUMPSP_HANDSHAKE;
351 rhdr.rsp_handshake = type;
352
353 IOVPUT(iov[0], rhdr);
354
355 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
356 if (haslock)
357 putwait_locked(spc, &rw, &rhdr);
358 else
359 putwait(spc, &rw, &rhdr);
360 if (type == HANDSHAKE_FORK) {
361 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
362 rf.rf_cancel = cancel;
363 IOVPUT(iov[1], rf);
364 } else {
365 IOVPUT_WITHSIZE(iov[1], __UNCONST(getprogname()), bonus);
366 }
367 rv = send_with_recon(spc, iov, __arraycount(iov));
368 if (rv || cancel) {
369 if (haslock)
370 unputwait_locked(spc, &rw);
371 else
372 unputwait(spc, &rw);
373 if (cancel) {
374 goto out;
375 }
376 } else {
377 rv = cliwaitresp(spc, &rw, &omask, haslock);
378 }
379 if (rv)
380 goto out;
381
382 rv = *(int *)rw.rw_data;
383 free(rw.rw_data);
384
385 out:
386 pthread_sigmask(SIG_SETMASK, &omask, NULL);
387 return rv;
388 }
389
390 static int
391 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
392 {
393 struct rsp_hdr rhdr;
394 struct respwait rw;
395 struct iovec iov[1];
396 int rv;
397
398 rhdr.rsp_len = sizeof(rhdr);
399 rhdr.rsp_class = RUMPSP_REQ;
400 rhdr.rsp_type = RUMPSP_PREFORK;
401 rhdr.rsp_error = 0;
402
403 IOVPUT(iov[0], rhdr);
404
405 do {
406 putwait(spc, &rw, &rhdr);
407 rv = send_with_recon(spc, iov, __arraycount(iov));
408 if (rv != 0) {
409 unputwait(spc, &rw);
410 continue;
411 }
412
413 rv = cliwaitresp(spc, &rw, omask, false);
414 if (rv == ENOTCONN)
415 rv = EAGAIN;
416 } while (rv == EAGAIN);
417
418 *resp = rw.rw_data;
419 return rv;
420 }
421
422 /*
423 * prevent response code from deadlocking with reconnect code
424 */
425 static int
426 resp_sendlock(struct spclient *spc)
427 {
428 int rv = 0;
429
430 pthread_mutex_lock(&spc->spc_mtx);
431 while (spc->spc_ostatus != SPCSTATUS_FREE) {
432 if (__predict_false(spc->spc_reconnecting)) {
433 rv = EBUSY;
434 goto out;
435 }
436 spc->spc_ostatus = SPCSTATUS_WANTED;
437 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
438 }
439 spc->spc_ostatus = SPCSTATUS_BUSY;
440
441 out:
442 pthread_mutex_unlock(&spc->spc_mtx);
443 return rv;
444 }
445
446 static void
447 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
448 int wantstr)
449 {
450 struct rsp_hdr rhdr;
451 struct iovec iov[2];
452
453 if (wantstr)
454 dlen = MIN(dlen, strlen(data)+1);
455
456 rhdr.rsp_len = sizeof(rhdr) + dlen;
457 rhdr.rsp_reqno = reqno;
458 rhdr.rsp_class = RUMPSP_RESP;
459 rhdr.rsp_type = RUMPSP_COPYIN;
460 rhdr.rsp_sysnum = 0;
461
462 IOVPUT(iov[0], rhdr);
463 IOVPUT_WITHSIZE(iov[1], data, dlen);
464
465 if (resp_sendlock(spc) != 0)
466 return;
467 (void)SENDIOV(spc, iov);
468 sendunlock(spc);
469 }
470
471 static void
472 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
473 {
474 struct rsp_hdr rhdr;
475 struct iovec iov[2];
476
477 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
478 rhdr.rsp_reqno = reqno;
479 rhdr.rsp_class = RUMPSP_RESP;
480 rhdr.rsp_type = RUMPSP_ANONMMAP;
481 rhdr.rsp_sysnum = 0;
482
483 IOVPUT(iov[0], rhdr);
484 IOVPUT(iov[1], addr);
485
486 if (resp_sendlock(spc) != 0)
487 return;
488 (void)SENDIOV(spc, iov);
489 sendunlock(spc);
490 }
491
492 int
493 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
494 register_t *retval)
495 {
496 struct rsp_sysresp *resp;
497 sigset_t omask;
498 void *rdata;
499 int rv;
500
501 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
502
503 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
504 sysnum, data, dlen));
505
506 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
507 if (rv)
508 goto out;
509
510 resp = rdata;
511 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
512 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
513
514 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
515 rv = resp->rsys_error;
516 free(rdata);
517
518 out:
519 pthread_sigmask(SIG_SETMASK, &omask, NULL);
520 return rv;
521 }
522
523 static void
524 handlereq(struct spclient *spc)
525 {
526 struct rsp_copydata *copydata;
527 struct rsp_hdr *rhdr = &spc->spc_hdr;
528 void *mapaddr;
529 size_t maplen;
530 int reqtype = spc->spc_hdr.rsp_type;
531
532 switch (reqtype) {
533 case RUMPSP_COPYIN:
534 case RUMPSP_COPYINSTR:
535 /*LINTED*/
536 copydata = (struct rsp_copydata *)spc->spc_buf;
537 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
538 copydata->rcp_addr, copydata->rcp_len));
539 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
540 copydata->rcp_addr, copydata->rcp_len,
541 reqtype == RUMPSP_COPYINSTR);
542 break;
543 case RUMPSP_COPYOUT:
544 case RUMPSP_COPYOUTSTR:
545 /*LINTED*/
546 copydata = (struct rsp_copydata *)spc->spc_buf;
547 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
548 copydata->rcp_addr, copydata->rcp_len));
549 /*LINTED*/
550 memcpy(copydata->rcp_addr, copydata->rcp_data,
551 copydata->rcp_len);
552 break;
553 case RUMPSP_ANONMMAP:
554 /*LINTED*/
555 maplen = *(size_t *)spc->spc_buf;
556 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
557 MAP_ANON, -1, 0);
558 if (mapaddr == MAP_FAILED)
559 mapaddr = NULL;
560 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
561 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
562 break;
563 case RUMPSP_RAISE:
564 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
565 raise((int)rhdr->rsp_signo);
566 /*
567 * We most likely have signals blocked, but the signal
568 * will be handled soon enough when we return.
569 */
570 break;
571 default:
572 printf("PANIC: INVALID TYPE %d\n", reqtype);
573 abort();
574 break;
575 }
576
577 spcfreebuf(spc);
578 }
579
580 static unsigned ptab_idx;
581 static struct sockaddr *serv_sa;
582
583 /* dup until we get a "good" fd which does not collide with stdio */
584 static int
585 dupgood(int myfd, int mustchange)
586 {
587 int ofds[4];
588 int i;
589
590 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
591 assert(i < __arraycount(ofds));
592 ofds[i] = myfd;
593 myfd = host_dup(myfd);
594 if (mustchange) {
595 i--; /* prevent closing old fd */
596 mustchange = 0;
597 }
598 }
599
600 for (i--; i >= 0; i--) {
601 host_close(ofds[i]);
602 }
603
604 return myfd;
605 }
606
607 static int
608 doconnect(bool noisy)
609 {
610 struct respwait rw;
611 struct rsp_hdr rhdr;
612 struct kevent kev[NSIG+1];
613 char banner[MAXBANNER];
614 struct pollfd pfd;
615 int s, error, flags, i;
616 ssize_t n;
617
618 if (kq != -1)
619 host_close(kq);
620 kq = -1;
621 s = -1;
622
623 if (clispc.spc_fd != -1)
624 host_close(clispc.spc_fd);
625 clispc.spc_fd = -1;
626
627 /*
628 * for reconnect, gate everyone out of the receiver code
629 */
630 putwait_locked(&clispc, &rw, &rhdr);
631
632 pthread_mutex_lock(&clispc.spc_mtx);
633 clispc.spc_reconnecting = 1;
634 pthread_cond_broadcast(&clispc.spc_cv);
635 clispc.spc_generation++;
636 while (clispc.spc_istatus != SPCSTATUS_FREE) {
637 clispc.spc_istatus = SPCSTATUS_WANTED;
638 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
639 }
640 kickall(&clispc);
641
642 /*
643 * we can release it already since we hold the
644 * send lock during reconnect
645 * XXX: assert it
646 */
647 clispc.spc_istatus = SPCSTATUS_FREE;
648 pthread_mutex_unlock(&clispc.spc_mtx);
649 unputwait_locked(&clispc, &rw);
650
651 free(clispc.spc_buf);
652 clispc.spc_off = 0;
653
654 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
655 if (s == -1)
656 return -1;
657
658 pfd.fd = s;
659 pfd.events = POLLIN;
660 while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
661 if (errno == EINTR)
662 continue;
663 error = errno;
664 if (noisy)
665 fprintf(stderr, "rump_sp: client connect failed: %s\n",
666 strerror(errno));
667 errno = error;
668 return -1;
669 }
670
671 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
672 error = errno;
673 if (noisy)
674 fprintf(stderr, "rump_sp: connect hook failed\n");
675 errno = error;
676 return -1;
677 }
678
679 if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
680 error = errno;
681 if (noisy)
682 fprintf(stderr, "rump_sp: failed to read banner\n");
683 errno = error;
684 return -1;
685 }
686
687 if (banner[n-1] != '\n') {
688 if (noisy)
689 fprintf(stderr, "rump_sp: invalid banner\n");
690 errno = EINVAL;
691 return -1;
692 }
693 banner[n] = '\0';
694 /* parse the banner some day */
695
696 flags = host_fcntl(s, F_GETFL, 0);
697 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
698 if (noisy)
699 fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
700 strerror(errno));
701 errno = EINVAL;
702 return -1;
703 }
704 clispc.spc_fd = s;
705 clispc.spc_state = SPCSTATE_RUNNING;
706 clispc.spc_reconnecting = 0;
707
708 /* setup kqueue, we want all signals and the fd */
709 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
710 error = errno;
711 if (noisy)
712 fprintf(stderr, "rump_sp: cannot setup kqueue");
713 errno = error;
714 return -1;
715 }
716
717 for (i = 0; i < NSIG; i++) {
718 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
719 }
720 EV_SET(&kev[NSIG], clispc.spc_fd,
721 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
722 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
723 error = errno;
724 if (noisy)
725 fprintf(stderr, "rump_sp: kevent() failed");
726 errno = error;
727 return -1;
728 }
729
730 return 0;
731 }
732
733 static int
734 doinit(void)
735 {
736
737 TAILQ_INIT(&clispc.spc_respwait);
738 pthread_mutex_init(&clispc.spc_mtx, NULL);
739 pthread_cond_init(&clispc.spc_cv, NULL);
740
741 return 0;
742 }
743
744 void *rumpclient__dlsym(void *, const char *);
745 void *rumphijack_dlsym(void *, const char *);
746 void *
747 rumpclient__dlsym(void *handle, const char *symbol)
748 {
749
750 return dlsym(handle, symbol);
751 }
752 __weak_alias(rumphijack_dlsym,rumpclient__dlsym);
753
754 static pid_t init_done = 0;
755
756 int
757 rumpclient_init()
758 {
759 char *p;
760 int error;
761 int rv = -1;
762 int hstype;
763 pid_t mypid;
764
765 /*
766 * Make sure we're not riding the context of a previous
767 * host fork. Note: it's *possible* that after n>1 forks
768 * we have the same pid as one of our exited parents, but
769 * I'm pretty sure there are 0 practical implications, since
770 * it means generations would have to skip rumpclient init.
771 */
772 if (init_done == (mypid = getpid()))
773 return 0;
774
775 /* kq does not traverse fork() */
776 if (init_done != 0)
777 kq = -1;
778 init_done = mypid;
779
780 sigfillset(&fullset);
781
782 /*
783 * sag mir, wo die symbol sind. zogen fort, der krieg beginnt.
784 * wann wird man je verstehen? wann wird man je verstehen?
785 */
786 #define FINDSYM2(_name_,_syscall_) \
787 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
788 #_syscall_)) == NULL) { \
789 if (rumphijack_dlsym == rumpclient__dlsym) \
790 host_##_name_ = _name_; /* static fallback */ \
791 if (host_##_name_ == NULL) \
792 errx(1, "cannot find %s: %s", #_syscall_, \
793 dlerror()); \
794 }
795 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
796 FINDSYM2(socket,__socket30)
797 FINDSYM(close)
798 FINDSYM(connect)
799 FINDSYM(fcntl)
800 FINDSYM(poll)
801 FINDSYM(read)
802 FINDSYM(sendmsg)
803 FINDSYM(setsockopt)
804 FINDSYM(dup)
805 FINDSYM(kqueue)
806 FINDSYM(execve)
807 #if !__NetBSD_Prereq__(5,99,7)
808 FINDSYM(kevent)
809 #else
810 FINDSYM2(kevent,_sys___kevent50)
811 #endif
812 #undef FINDSYM
813 #undef FINDSY2
814
815 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
816 if ((p = getenv("RUMP_SERVER")) == NULL) {
817 errno = ENOENT;
818 goto out;
819 }
820 }
821
822 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
823 errno = error;
824 goto out;
825 }
826
827 if (doinit() == -1)
828 goto out;
829
830 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
831 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
832 unsetenv("RUMPCLIENT__EXECFD");
833 hstype = HANDSHAKE_EXEC;
834 } else {
835 if (doconnect(true) == -1)
836 goto out;
837 hstype = HANDSHAKE_GUEST;
838 }
839
840 error = handshake_req(&clispc, hstype, NULL, 0, false);
841 if (error) {
842 pthread_mutex_destroy(&clispc.spc_mtx);
843 pthread_cond_destroy(&clispc.spc_cv);
844 if (clispc.spc_fd != -1)
845 host_close(clispc.spc_fd);
846 errno = error;
847 goto out;
848 }
849 rv = 0;
850
851 out:
852 if (rv == -1)
853 init_done = 0;
854 return rv;
855 }
856
857 struct rumpclient_fork {
858 uint32_t fork_auth[AUTHLEN];
859 struct spclient fork_spc;
860 int fork_kq;
861 };
862
863 struct rumpclient_fork *
864 rumpclient_prefork(void)
865 {
866 struct rumpclient_fork *rpf;
867 sigset_t omask;
868 void *resp;
869 int rv;
870
871 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
872 rpf = malloc(sizeof(*rpf));
873 if (rpf == NULL)
874 goto out;
875
876 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
877 free(rpf);
878 errno = rv;
879 rpf = NULL;
880 goto out;
881 }
882
883 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
884 free(resp);
885
886 rpf->fork_spc = clispc;
887 rpf->fork_kq = kq;
888
889 out:
890 pthread_sigmask(SIG_SETMASK, &omask, NULL);
891 return rpf;
892 }
893
894 int
895 rumpclient_fork_init(struct rumpclient_fork *rpf)
896 {
897 int error;
898 int osock;
899
900 osock = clispc.spc_fd;
901 memset(&clispc, 0, sizeof(clispc));
902 clispc.spc_fd = osock;
903
904 kq = -1; /* kqueue descriptor is not copied over fork() */
905
906 if (doinit() == -1)
907 return -1;
908 if (doconnect(false) == -1)
909 return -1;
910
911 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
912 0, false);
913 if (error) {
914 pthread_mutex_destroy(&clispc.spc_mtx);
915 pthread_cond_destroy(&clispc.spc_cv);
916 errno = error;
917 return -1;
918 }
919
920 return 0;
921 }
922
923 /*ARGSUSED*/
924 void
925 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
926 {
927
928 /* EUNIMPL */
929 }
930
931 void
932 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
933 {
934
935 clispc = rpf->fork_spc;
936 kq = rpf->fork_kq;
937 }
938
939 void
940 rumpclient_setconnretry(time_t timeout)
941 {
942
943 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
944 return; /* gigo */
945
946 retrytimo = timeout;
947 }
948
949 int
950 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
951 {
952 int fd = *fdp;
953 int untilfd, rv;
954 int newfd;
955
956 switch (variant) {
957 case RUMPCLIENT_CLOSE_FCLOSEM:
958 untilfd = MAX(clispc.spc_fd, kq);
959 for (; fd <= untilfd; fd++) {
960 if (fd == clispc.spc_fd || fd == kq)
961 continue;
962 rv = host_close(fd);
963 if (rv == -1)
964 return -1;
965 }
966 *fdp = fd;
967 break;
968
969 case RUMPCLIENT_CLOSE_CLOSE:
970 case RUMPCLIENT_CLOSE_DUP2:
971 if (fd == clispc.spc_fd) {
972 struct kevent kev[2];
973
974 newfd = dupgood(clispc.spc_fd, 1);
975 if (newfd == -1)
976 return -1;
977 /*
978 * now, we have a new socket number, so change
979 * the file descriptor that kqueue is
980 * monitoring. remove old and add new.
981 */
982 EV_SET(&kev[0], clispc.spc_fd,
983 EVFILT_READ, EV_DELETE, 0, 0, 0);
984 EV_SET(&kev[1], newfd,
985 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
986 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
987 int sverrno = errno;
988 host_close(newfd);
989 errno = sverrno;
990 return -1;
991 }
992 clispc.spc_fd = newfd;
993 }
994 if (fd == kq) {
995 newfd = dupgood(kq, 1);
996 if (newfd == -1)
997 return -1;
998 kq = newfd;
999 }
1000 break;
1001 }
1002
1003 return 0;
1004 }
1005
1006 pid_t
1007 rumpclient_fork()
1008 {
1009
1010 return rumpclient__dofork(fork);
1011 }
1012
1013 /*
1014 * Process is about to exec. Save info about our existing connection
1015 * in the env. rumpclient will check for this info in init().
1016 * This is mostly for the benefit of rumphijack, but regular applications
1017 * may use it as well.
1018 */
1019 int
1020 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1021 {
1022 char buf[4096];
1023 char **newenv;
1024 char *envstr, *envstr2;
1025 size_t nelem;
1026 int rv, sverrno;
1027
1028 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1029 clispc.spc_fd, kq);
1030 envstr = malloc(strlen(buf)+1);
1031 if (envstr == NULL) {
1032 return ENOMEM;
1033 }
1034 strcpy(envstr, buf);
1035
1036 /* do we have a fully parsed url we want to forward in the env? */
1037 if (*parsedurl != '\0') {
1038 snprintf(buf, sizeof(buf),
1039 "RUMP__PARSEDSERVER=%s", parsedurl);
1040 envstr2 = malloc(strlen(buf)+1);
1041 if (envstr2 == NULL) {
1042 free(envstr);
1043 return ENOMEM;
1044 }
1045 strcpy(envstr2, buf);
1046 } else {
1047 envstr2 = NULL;
1048 }
1049
1050 for (nelem = 0; envp && envp[nelem]; nelem++)
1051 continue;
1052
1053 newenv = malloc(sizeof(*newenv) * (nelem+3));
1054 if (newenv == NULL) {
1055 free(envstr2);
1056 free(envstr);
1057 return ENOMEM;
1058 }
1059 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1060
1061 newenv[nelem] = envstr;
1062 newenv[nelem+1] = envstr2;
1063 newenv[nelem+2] = NULL;
1064
1065 rv = host_execve(path, argv, newenv);
1066
1067 _DIAGASSERT(rv != 0);
1068 sverrno = errno;
1069 free(envstr2);
1070 free(envstr);
1071 free(newenv);
1072 errno = sverrno;
1073 return rv;
1074 }
1075
1076 int
1077 rumpclient_daemon(int nochdir, int noclose)
1078 {
1079 struct rumpclient_fork *rf;
1080 int sverrno;
1081
1082 if ((rf = rumpclient_prefork()) == NULL)
1083 return -1;
1084
1085 if (daemon(nochdir, noclose) == -1) {
1086 sverrno = errno;
1087 rumpclient_fork_cancel(rf);
1088 errno = sverrno;
1089 return -1;
1090 }
1091
1092 if (rumpclient_fork_init(rf) == -1)
1093 return -1;
1094
1095 return 0;
1096 }
1097