rumpclient.c revision 1.47.2.1 1 /* $NetBSD: rumpclient.c,v 1.47.2.1 2012/04/23 16:49:03 riz Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: rumpclient.c,v 1.47.2.1 2012/04/23 16:49:03 riz Exp $");
34
35 #include <sys/param.h>
36 #include <sys/event.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <link.h>
50 #include <poll.h>
51 #include <pthread.h>
52 #include <signal.h>
53 #include <stdarg.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59
60 #include <rump/rumpclient.h>
61
62 #define HOSTOPS
63 int (*host_socket)(int, int, int);
64 int (*host_close)(int);
65 int (*host_connect)(int, const struct sockaddr *, socklen_t);
66 int (*host_fcntl)(int, int, ...);
67 int (*host_poll)(struct pollfd *, nfds_t, int);
68 ssize_t (*host_read)(int, void *, size_t);
69 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
70 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
71 int (*host_dup)(int);
72
73 int (*host_kqueue)(void);
74 int (*host_kevent)(int, const struct kevent *, size_t,
75 struct kevent *, size_t, const struct timespec *);
76
77 int (*host_execve)(const char *, char *const[], char *const[]);
78
79 #include "sp_common.c"
80
81 static struct spclient clispc = {
82 .spc_fd = -1,
83 };
84
85 static int kq = -1;
86 static sigset_t fullset;
87
88 static int doconnect(void);
89 static int handshake_req(struct spclient *, int, void *, int, bool);
90
91 /*
92 * Default: don't retry. Most clients can't handle it
93 * (consider e.g. fds suddenly going missing).
94 */
95 static time_t retrytimo = 0;
96
97 /* always defined to nothingness for now */
98 #define ERRLOG(a)
99
100 static int
101 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
102 {
103 struct timeval starttime, curtime;
104 time_t prevreconmsg;
105 unsigned reconretries;
106 int rv;
107
108 for (prevreconmsg = 0, reconretries = 0;;) {
109 rv = dosend(spc, iov, iovlen);
110 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
111 /* no persistent connections */
112 if (retrytimo == 0) {
113 rv = ENOTCONN;
114 break;
115 }
116 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
117 _exit(1);
118
119 if (!prevreconmsg) {
120 prevreconmsg = time(NULL);
121 gettimeofday(&starttime, NULL);
122 }
123 if (reconretries == 1) {
124 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
125 rv = ENOTCONN;
126 break;
127 }
128 fprintf(stderr, "rump_sp: connection to "
129 "kernel lost, trying to reconnect ...\n");
130 } else if (time(NULL) - prevreconmsg > 120) {
131 fprintf(stderr, "rump_sp: still trying to "
132 "reconnect ...\n");
133 prevreconmsg = time(NULL);
134 }
135
136 /* check that we aren't over the limit */
137 if (retrytimo > 0) {
138 struct timeval tmp;
139
140 gettimeofday(&curtime, NULL);
141 timersub(&curtime, &starttime, &tmp);
142 if (tmp.tv_sec >= retrytimo) {
143 fprintf(stderr, "rump_sp: reconnect "
144 "failed, %lld second timeout\n",
145 (long long)retrytimo);
146 return ENOTCONN;
147 }
148 }
149
150 /* adhoc backoff timer */
151 if (reconretries < 10) {
152 usleep(100000 * reconretries);
153 } else {
154 sleep(MIN(10, reconretries-9));
155 }
156 reconretries++;
157
158 if ((rv = doconnect()) != 0)
159 continue;
160 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
161 NULL, 0, true)) != 0)
162 continue;
163
164 /*
165 * ok, reconnect succesful. we need to return to
166 * the upper layer to get the entire PDU resent.
167 */
168 if (reconretries != 1)
169 fprintf(stderr, "rump_sp: reconnected!\n");
170 rv = EAGAIN;
171 break;
172 } else {
173 _DIAGASSERT(errno != EAGAIN);
174 break;
175 }
176 }
177
178 return rv;
179 }
180
181 static int
182 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
183 bool keeplock)
184 {
185 uint64_t mygen;
186 bool imalive = true;
187
188 pthread_mutex_lock(&spc->spc_mtx);
189 if (!keeplock)
190 sendunlockl(spc);
191 mygen = spc->spc_generation;
192
193 rw->rw_error = 0;
194 while (!rw->rw_done && rw->rw_error == 0) {
195 if (__predict_false(spc->spc_generation != mygen || !imalive))
196 break;
197
198 /* are we free to receive? */
199 if (spc->spc_istatus == SPCSTATUS_FREE) {
200 struct kevent kev[8];
201 int gotresp, dosig, rv, i;
202
203 spc->spc_istatus = SPCSTATUS_BUSY;
204 pthread_mutex_unlock(&spc->spc_mtx);
205
206 dosig = 0;
207 for (gotresp = 0; !gotresp; ) {
208 /*
209 * typically we don't have a frame waiting
210 * when we come in here, so call kevent now
211 */
212 rv = host_kevent(kq, NULL, 0,
213 kev, __arraycount(kev), NULL);
214
215 if (__predict_false(rv == -1)) {
216 goto activity;
217 }
218
219 /*
220 * XXX: don't know how this can happen
221 * (timeout cannot expire since there
222 * isn't one), but it does happen.
223 * treat it as an expectional condition
224 * and go through tryread to determine
225 * alive status.
226 */
227 if (__predict_false(rv == 0))
228 goto activity;
229
230 for (i = 0; i < rv; i++) {
231 if (kev[i].filter == EVFILT_SIGNAL)
232 dosig++;
233 }
234 if (dosig)
235 goto cleanup;
236
237 /*
238 * ok, activity. try to read a frame to
239 * determine what happens next.
240 */
241 activity:
242 switch (readframe(spc)) {
243 case 0:
244 continue;
245 case -1:
246 imalive = false;
247 goto cleanup;
248 default:
249 /* case 1 */
250 break;
251 }
252
253 switch (spc->spc_hdr.rsp_class) {
254 case RUMPSP_RESP:
255 case RUMPSP_ERROR:
256 kickwaiter(spc);
257 gotresp = spc->spc_hdr.rsp_reqno ==
258 rw->rw_reqno;
259 break;
260 case RUMPSP_REQ:
261 handlereq(spc);
262 break;
263 default:
264 /* panic */
265 break;
266 }
267 }
268
269 cleanup:
270 pthread_mutex_lock(&spc->spc_mtx);
271 if (spc->spc_istatus == SPCSTATUS_WANTED)
272 kickall(spc);
273 spc->spc_istatus = SPCSTATUS_FREE;
274
275 /* take one for the team */
276 if (dosig) {
277 pthread_mutex_unlock(&spc->spc_mtx);
278 pthread_sigmask(SIG_SETMASK, mask, NULL);
279 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
280 pthread_mutex_lock(&spc->spc_mtx);
281 }
282 } else {
283 spc->spc_istatus = SPCSTATUS_WANTED;
284 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
285 }
286 }
287 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
288 pthread_mutex_unlock(&spc->spc_mtx);
289 pthread_cond_destroy(&rw->rw_cv);
290
291 if (spc->spc_generation != mygen || !imalive) {
292 return ENOTCONN;
293 }
294 return rw->rw_error;
295 }
296
297 static int
298 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
299 const void *data, size_t dlen, void **resp)
300 {
301 struct rsp_hdr rhdr;
302 struct respwait rw;
303 struct iovec iov[2];
304 int rv;
305
306 rhdr.rsp_len = sizeof(rhdr) + dlen;
307 rhdr.rsp_class = RUMPSP_REQ;
308 rhdr.rsp_type = RUMPSP_SYSCALL;
309 rhdr.rsp_sysnum = sysnum;
310
311 IOVPUT(iov[0], rhdr);
312 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
313
314 do {
315 putwait(spc, &rw, &rhdr);
316 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
317 unputwait(spc, &rw);
318 continue;
319 }
320
321 rv = cliwaitresp(spc, &rw, omask, false);
322 if (rv == ENOTCONN)
323 rv = EAGAIN;
324 } while (rv == EAGAIN);
325
326 *resp = rw.rw_data;
327 return rv;
328 }
329
330 static int
331 handshake_req(struct spclient *spc, int type, void *data,
332 int cancel, bool haslock)
333 {
334 struct handshake_fork rf;
335 const char *myprogname = NULL; /* XXXgcc */
336 struct rsp_hdr rhdr;
337 struct respwait rw;
338 sigset_t omask;
339 size_t bonus;
340 struct iovec iov[2];
341 int rv;
342
343 if (type == HANDSHAKE_FORK) {
344 bonus = sizeof(rf);
345 } else {
346 myprogname = getprogname();
347 bonus = strlen(myprogname)+1;
348 }
349
350 /* performs server handshake */
351 rhdr.rsp_len = sizeof(rhdr) + bonus;
352 rhdr.rsp_class = RUMPSP_REQ;
353 rhdr.rsp_type = RUMPSP_HANDSHAKE;
354 rhdr.rsp_handshake = type;
355
356 IOVPUT(iov[0], rhdr);
357
358 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
359 if (haslock)
360 putwait_locked(spc, &rw, &rhdr);
361 else
362 putwait(spc, &rw, &rhdr);
363 if (type == HANDSHAKE_FORK) {
364 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
365 rf.rf_cancel = cancel;
366 IOVPUT(iov[1], rf);
367 } else {
368 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
369 }
370 rv = send_with_recon(spc, iov, __arraycount(iov));
371 if (rv || cancel) {
372 if (haslock)
373 unputwait_locked(spc, &rw);
374 else
375 unputwait(spc, &rw);
376 if (cancel) {
377 goto out;
378 }
379 } else {
380 rv = cliwaitresp(spc, &rw, &omask, haslock);
381 }
382 if (rv)
383 goto out;
384
385 rv = *(int *)rw.rw_data;
386 free(rw.rw_data);
387
388 out:
389 pthread_sigmask(SIG_SETMASK, &omask, NULL);
390 return rv;
391 }
392
393 static int
394 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
395 {
396 struct rsp_hdr rhdr;
397 struct respwait rw;
398 struct iovec iov[1];
399 int rv;
400
401 rhdr.rsp_len = sizeof(rhdr);
402 rhdr.rsp_class = RUMPSP_REQ;
403 rhdr.rsp_type = RUMPSP_PREFORK;
404 rhdr.rsp_error = 0;
405
406 IOVPUT(iov[0], rhdr);
407
408 do {
409 putwait(spc, &rw, &rhdr);
410 rv = send_with_recon(spc, iov, __arraycount(iov));
411 if (rv != 0) {
412 unputwait(spc, &rw);
413 continue;
414 }
415
416 rv = cliwaitresp(spc, &rw, omask, false);
417 if (rv == ENOTCONN)
418 rv = EAGAIN;
419 } while (rv == EAGAIN);
420
421 *resp = rw.rw_data;
422 return rv;
423 }
424
425 /*
426 * prevent response code from deadlocking with reconnect code
427 */
428 static int
429 resp_sendlock(struct spclient *spc)
430 {
431 int rv = 0;
432
433 pthread_mutex_lock(&spc->spc_mtx);
434 while (spc->spc_ostatus != SPCSTATUS_FREE) {
435 if (__predict_false(spc->spc_reconnecting)) {
436 rv = EBUSY;
437 goto out;
438 }
439 spc->spc_ostatus = SPCSTATUS_WANTED;
440 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
441 }
442 spc->spc_ostatus = SPCSTATUS_BUSY;
443
444 out:
445 pthread_mutex_unlock(&spc->spc_mtx);
446 return rv;
447 }
448
449 static void
450 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
451 int wantstr)
452 {
453 struct rsp_hdr rhdr;
454 struct iovec iov[2];
455
456 if (wantstr)
457 dlen = MIN(dlen, strlen(data)+1);
458
459 rhdr.rsp_len = sizeof(rhdr) + dlen;
460 rhdr.rsp_reqno = reqno;
461 rhdr.rsp_class = RUMPSP_RESP;
462 rhdr.rsp_type = RUMPSP_COPYIN;
463 rhdr.rsp_sysnum = 0;
464
465 IOVPUT(iov[0], rhdr);
466 IOVPUT_WITHSIZE(iov[1], data, dlen);
467
468 if (resp_sendlock(spc) != 0)
469 return;
470 (void)SENDIOV(spc, iov);
471 sendunlock(spc);
472 }
473
474 static void
475 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
476 {
477 struct rsp_hdr rhdr;
478 struct iovec iov[2];
479
480 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
481 rhdr.rsp_reqno = reqno;
482 rhdr.rsp_class = RUMPSP_RESP;
483 rhdr.rsp_type = RUMPSP_ANONMMAP;
484 rhdr.rsp_sysnum = 0;
485
486 IOVPUT(iov[0], rhdr);
487 IOVPUT(iov[1], addr);
488
489 if (resp_sendlock(spc) != 0)
490 return;
491 (void)SENDIOV(spc, iov);
492 sendunlock(spc);
493 }
494
495 int
496 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
497 register_t *retval)
498 {
499 struct rsp_sysresp *resp;
500 sigset_t omask;
501 void *rdata;
502 int rv;
503
504 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
505
506 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
507 sysnum, data, dlen));
508
509 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
510 if (rv)
511 goto out;
512
513 resp = rdata;
514 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
515 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
516
517 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
518 rv = resp->rsys_error;
519 free(rdata);
520
521 out:
522 pthread_sigmask(SIG_SETMASK, &omask, NULL);
523 return rv;
524 }
525
526 static void
527 handlereq(struct spclient *spc)
528 {
529 struct rsp_copydata *copydata;
530 struct rsp_hdr *rhdr = &spc->spc_hdr;
531 void *mapaddr;
532 size_t maplen;
533 int reqtype = spc->spc_hdr.rsp_type;
534
535 switch (reqtype) {
536 case RUMPSP_COPYIN:
537 case RUMPSP_COPYINSTR:
538 /*LINTED*/
539 copydata = (struct rsp_copydata *)spc->spc_buf;
540 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
541 copydata->rcp_addr, copydata->rcp_len));
542 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
543 copydata->rcp_addr, copydata->rcp_len,
544 reqtype == RUMPSP_COPYINSTR);
545 break;
546 case RUMPSP_COPYOUT:
547 case RUMPSP_COPYOUTSTR:
548 /*LINTED*/
549 copydata = (struct rsp_copydata *)spc->spc_buf;
550 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
551 copydata->rcp_addr, copydata->rcp_len));
552 /*LINTED*/
553 memcpy(copydata->rcp_addr, copydata->rcp_data,
554 copydata->rcp_len);
555 break;
556 case RUMPSP_ANONMMAP:
557 /*LINTED*/
558 maplen = *(size_t *)spc->spc_buf;
559 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
560 MAP_ANON, -1, 0);
561 if (mapaddr == MAP_FAILED)
562 mapaddr = NULL;
563 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
564 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
565 break;
566 case RUMPSP_RAISE:
567 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
568 raise((int)rhdr->rsp_signo);
569 /*
570 * We most likely have signals blocked, but the signal
571 * will be handled soon enough when we return.
572 */
573 break;
574 default:
575 printf("PANIC: INVALID TYPE %d\n", reqtype);
576 abort();
577 break;
578 }
579
580 spcfreebuf(spc);
581 }
582
583 static unsigned ptab_idx;
584 static struct sockaddr *serv_sa;
585
586 /* dup until we get a "good" fd which does not collide with stdio */
587 static int
588 dupgood(int myfd, int mustchange)
589 {
590 int ofds[4];
591 int sverrno;
592 unsigned int i;
593
594 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
595 assert(i < __arraycount(ofds));
596 ofds[i] = myfd;
597 myfd = host_dup(myfd);
598 if (mustchange) {
599 i--; /* prevent closing old fd */
600 mustchange = 0;
601 }
602 }
603
604 sverrno = 0;
605 if (myfd == -1 && i > 0)
606 sverrno = errno;
607
608 while (i-- > 0) {
609 host_close(ofds[i]);
610 }
611
612 if (sverrno)
613 errno = sverrno;
614
615 return myfd;
616 }
617
618 static int
619 doconnect(void)
620 {
621 struct respwait rw;
622 struct rsp_hdr rhdr;
623 struct kevent kev[NSIG+1];
624 char banner[MAXBANNER];
625 struct pollfd pfd;
626 int s, error, flags, i;
627 ssize_t n;
628
629 if (kq != -1)
630 host_close(kq);
631 kq = -1;
632 s = -1;
633
634 if (clispc.spc_fd != -1)
635 host_close(clispc.spc_fd);
636 clispc.spc_fd = -1;
637
638 /*
639 * for reconnect, gate everyone out of the receiver code
640 */
641 putwait_locked(&clispc, &rw, &rhdr);
642
643 pthread_mutex_lock(&clispc.spc_mtx);
644 clispc.spc_reconnecting = 1;
645 pthread_cond_broadcast(&clispc.spc_cv);
646 clispc.spc_generation++;
647 while (clispc.spc_istatus != SPCSTATUS_FREE) {
648 clispc.spc_istatus = SPCSTATUS_WANTED;
649 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
650 }
651 kickall(&clispc);
652
653 /*
654 * we can release it already since we hold the
655 * send lock during reconnect
656 * XXX: assert it
657 */
658 clispc.spc_istatus = SPCSTATUS_FREE;
659 pthread_mutex_unlock(&clispc.spc_mtx);
660 unputwait_locked(&clispc, &rw);
661
662 free(clispc.spc_buf);
663 clispc.spc_off = 0;
664
665 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
666 if (s == -1)
667 return -1;
668
669 pfd.fd = s;
670 pfd.events = POLLIN;
671 while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
672 if (errno == EINTR)
673 continue;
674 ERRLOG(("rump_sp: client connect failed: %s\n",
675 strerror(errno)));
676 return -1;
677 }
678
679 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
680 ERRLOG(("rump_sp: connect hook failed\n"));
681 return -1;
682 }
683
684 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
685 ERRLOG(("rump_sp: failed to read banner\n"));
686 return -1;
687 }
688
689 if (banner[n-1] != '\n') {
690 ERRLOG(("rump_sp: invalid banner\n"));
691 return -1;
692 }
693 banner[n] = '\0';
694 /* XXX parse the banner some day */
695
696 flags = host_fcntl(s, F_GETFL, 0);
697 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
698 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
699 return -1;
700 }
701 clispc.spc_fd = s;
702 clispc.spc_state = SPCSTATE_RUNNING;
703 clispc.spc_reconnecting = 0;
704
705 /* setup kqueue, we want all signals and the fd */
706 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
707 ERRLOG(("rump_sp: cannot setup kqueue"));
708 return -1;
709 }
710
711 for (i = 0; i < NSIG; i++) {
712 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
713 }
714 EV_SET(&kev[NSIG], clispc.spc_fd,
715 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
716 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
717 ERRLOG(("rump_sp: kevent() failed"));
718 return -1;
719 }
720
721 return 0;
722 }
723
724 static int
725 doinit(void)
726 {
727
728 TAILQ_INIT(&clispc.spc_respwait);
729 pthread_mutex_init(&clispc.spc_mtx, NULL);
730 pthread_cond_init(&clispc.spc_cv, NULL);
731
732 return 0;
733 }
734
735 void *rumpclient__dlsym(void *, const char *);
736 void *rumphijack_dlsym(void *, const char *) __attribute__((__weak__));
737 void *
738 rumpclient__dlsym(void *handle, const char *symbol)
739 {
740
741 return dlsym(handle, symbol);
742 }
743 __weak_alias(rumphijack_dlsym,rumpclient__dlsym)
744
745 static pid_t init_done = 0;
746
747 int
748 rumpclient_init(void)
749 {
750 char *p;
751 int error;
752 int rv = -1;
753 int hstype;
754 pid_t mypid;
755
756 /*
757 * Make sure we're not riding the context of a previous
758 * host fork. Note: it's *possible* that after n>1 forks
759 * we have the same pid as one of our exited parents, but
760 * I'm pretty sure there are 0 practical implications, since
761 * it means generations would have to skip rumpclient init.
762 */
763 if (init_done == (mypid = getpid()))
764 return 0;
765
766 /* kq does not traverse fork() */
767 if (init_done != 0)
768 kq = -1;
769 init_done = mypid;
770
771 sigfillset(&fullset);
772
773 /*
774 * sag mir, wo die symbol sind. zogen fort, der krieg beginnt.
775 * wann wird man je verstehen? wann wird man je verstehen?
776 */
777 #define FINDSYM2(_name_,_syscall_) \
778 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
779 #_syscall_)) == NULL) { \
780 if (rumphijack_dlsym == rumpclient__dlsym) \
781 host_##_name_ = _name_; /* static fallback */ \
782 if (host_##_name_ == NULL) \
783 errx(1, "cannot find %s: %s", #_syscall_, \
784 dlerror()); \
785 }
786 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
787 FINDSYM2(socket,__socket30)
788 FINDSYM(close)
789 FINDSYM(connect)
790 FINDSYM(fcntl)
791 FINDSYM(poll)
792 FINDSYM(read)
793 FINDSYM(sendmsg)
794 FINDSYM(setsockopt)
795 FINDSYM(dup)
796 FINDSYM(kqueue)
797 FINDSYM(execve)
798 #if !__NetBSD_Prereq__(5,99,7)
799 FINDSYM(kevent)
800 #else
801 FINDSYM2(kevent,_sys___kevent50)
802 #endif
803 #undef FINDSYM
804 #undef FINDSY2
805
806 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
807 if ((p = getenv("RUMP_SERVER")) == NULL) {
808 errno = ENOENT;
809 goto out;
810 }
811 }
812
813 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
814 errno = error;
815 goto out;
816 }
817
818 if (doinit() == -1)
819 goto out;
820
821 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
822 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
823 unsetenv("RUMPCLIENT__EXECFD");
824 hstype = HANDSHAKE_EXEC;
825 } else {
826 if (doconnect() == -1)
827 goto out;
828 hstype = HANDSHAKE_GUEST;
829 }
830
831 error = handshake_req(&clispc, hstype, NULL, 0, false);
832 if (error) {
833 pthread_mutex_destroy(&clispc.spc_mtx);
834 pthread_cond_destroy(&clispc.spc_cv);
835 if (clispc.spc_fd != -1)
836 host_close(clispc.spc_fd);
837 errno = error;
838 goto out;
839 }
840 rv = 0;
841
842 out:
843 if (rv == -1)
844 init_done = 0;
845 return rv;
846 }
847
848 struct rumpclient_fork {
849 uint32_t fork_auth[AUTHLEN];
850 struct spclient fork_spc;
851 int fork_kq;
852 };
853
854 struct rumpclient_fork *
855 rumpclient_prefork(void)
856 {
857 struct rumpclient_fork *rpf;
858 sigset_t omask;
859 void *resp;
860 int rv;
861
862 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
863 rpf = malloc(sizeof(*rpf));
864 if (rpf == NULL)
865 goto out;
866
867 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
868 free(rpf);
869 errno = rv;
870 rpf = NULL;
871 goto out;
872 }
873
874 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
875 free(resp);
876
877 rpf->fork_spc = clispc;
878 rpf->fork_kq = kq;
879
880 out:
881 pthread_sigmask(SIG_SETMASK, &omask, NULL);
882 return rpf;
883 }
884
885 int
886 rumpclient_fork_init(struct rumpclient_fork *rpf)
887 {
888 int error;
889 int osock;
890
891 osock = clispc.spc_fd;
892 memset(&clispc, 0, sizeof(clispc));
893 clispc.spc_fd = osock;
894
895 kq = -1; /* kqueue descriptor is not copied over fork() */
896
897 if (doinit() == -1)
898 return -1;
899 if (doconnect() == -1)
900 return -1;
901
902 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
903 0, false);
904 if (error) {
905 pthread_mutex_destroy(&clispc.spc_mtx);
906 pthread_cond_destroy(&clispc.spc_cv);
907 errno = error;
908 return -1;
909 }
910
911 return 0;
912 }
913
914 /*ARGSUSED*/
915 void
916 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
917 {
918
919 /* EUNIMPL */
920 }
921
922 void
923 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
924 {
925
926 clispc = rpf->fork_spc;
927 kq = rpf->fork_kq;
928 }
929
930 void
931 rumpclient_setconnretry(time_t timeout)
932 {
933
934 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
935 return; /* gigo */
936
937 retrytimo = timeout;
938 }
939
940 int
941 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
942 {
943 int fd = *fdp;
944 int untilfd, rv;
945 int newfd;
946
947 switch (variant) {
948 case RUMPCLIENT_CLOSE_FCLOSEM:
949 untilfd = MAX(clispc.spc_fd, kq);
950 for (; fd <= untilfd; fd++) {
951 if (fd == clispc.spc_fd || fd == kq)
952 continue;
953 rv = host_close(fd);
954 if (rv == -1)
955 return -1;
956 }
957 *fdp = fd;
958 break;
959
960 case RUMPCLIENT_CLOSE_CLOSE:
961 case RUMPCLIENT_CLOSE_DUP2:
962 if (fd == clispc.spc_fd) {
963 struct kevent kev[2];
964
965 newfd = dupgood(clispc.spc_fd, 1);
966 if (newfd == -1)
967 return -1;
968 /*
969 * now, we have a new socket number, so change
970 * the file descriptor that kqueue is
971 * monitoring. remove old and add new.
972 */
973 EV_SET(&kev[0], clispc.spc_fd,
974 EVFILT_READ, EV_DELETE, 0, 0, 0);
975 EV_SET(&kev[1], newfd,
976 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
977 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
978 int sverrno = errno;
979 host_close(newfd);
980 errno = sverrno;
981 return -1;
982 }
983 clispc.spc_fd = newfd;
984 }
985 if (fd == kq) {
986 newfd = dupgood(kq, 1);
987 if (newfd == -1)
988 return -1;
989 kq = newfd;
990 }
991 break;
992 }
993
994 return 0;
995 }
996
997 pid_t
998 rumpclient_fork(void)
999 {
1000
1001 return rumpclient__dofork(fork);
1002 }
1003
1004 /*
1005 * Process is about to exec. Save info about our existing connection
1006 * in the env. rumpclient will check for this info in init().
1007 * This is mostly for the benefit of rumphijack, but regular applications
1008 * may use it as well.
1009 */
1010 int
1011 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1012 {
1013 char buf[4096];
1014 char **newenv;
1015 char *envstr, *envstr2;
1016 size_t nelem;
1017 int rv, sverrno;
1018
1019 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1020 clispc.spc_fd, kq);
1021 envstr = malloc(strlen(buf)+1);
1022 if (envstr == NULL) {
1023 return ENOMEM;
1024 }
1025 strcpy(envstr, buf);
1026
1027 /* do we have a fully parsed url we want to forward in the env? */
1028 if (*parsedurl != '\0') {
1029 snprintf(buf, sizeof(buf),
1030 "RUMP__PARSEDSERVER=%s", parsedurl);
1031 envstr2 = malloc(strlen(buf)+1);
1032 if (envstr2 == NULL) {
1033 free(envstr);
1034 return ENOMEM;
1035 }
1036 strcpy(envstr2, buf);
1037 } else {
1038 envstr2 = NULL;
1039 }
1040
1041 for (nelem = 0; envp && envp[nelem]; nelem++)
1042 continue;
1043
1044 newenv = malloc(sizeof(*newenv) * (nelem+3));
1045 if (newenv == NULL) {
1046 free(envstr2);
1047 free(envstr);
1048 return ENOMEM;
1049 }
1050 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1051
1052 newenv[nelem] = envstr;
1053 newenv[nelem+1] = envstr2;
1054 newenv[nelem+2] = NULL;
1055
1056 rv = host_execve(path, argv, newenv);
1057
1058 _DIAGASSERT(rv != 0);
1059 sverrno = errno;
1060 free(envstr2);
1061 free(envstr);
1062 free(newenv);
1063 errno = sverrno;
1064 return rv;
1065 }
1066
1067 int
1068 rumpclient_daemon(int nochdir, int noclose)
1069 {
1070 struct rumpclient_fork *rf;
1071 int sverrno;
1072
1073 if ((rf = rumpclient_prefork()) == NULL)
1074 return -1;
1075
1076 if (daemon(nochdir, noclose) == -1) {
1077 sverrno = errno;
1078 rumpclient_fork_cancel(rf);
1079 errno = sverrno;
1080 return -1;
1081 }
1082
1083 if (rumpclient_fork_init(rf) == -1)
1084 return -1;
1085
1086 return 0;
1087 }
1088