rumpclient.c revision 1.59 1 /* $NetBSD: rumpclient.c,v 1.59 2014/04/02 15:04:19 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include <rump/rumpuser_port.h>
33
34 /*
35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could
36 * use kqueue on other BSD's too, but I haven't tested those. We
37 * want to use kqueue because it will give us the ability to get signal
38 * notifications but defer their handling to a stage where we do not
39 * hold the communication lock. Taking a signal while holding on to
40 * that lock may cause a deadlock. Therefore, block signals throughout
41 * the RPC when using poll. On Linux, we use signalfd in the same role
42 * as kqueue on NetBSD to be able to take signals while waiting for a
43 * response from the server.
44 */
45
46 #ifdef __NetBSD__
47 #define USE_KQUEUE
48 #endif
49 #ifdef __linux__
50 #define USE_SIGNALFD
51 #endif
52
53 __RCSID("$NetBSD: rumpclient.c,v 1.59 2014/04/02 15:04:19 pooka Exp $");
54
55 #include <sys/param.h>
56 #include <sys/mman.h>
57 #include <sys/socket.h>
58 #include <sys/time.h>
59
60 #ifdef USE_KQUEUE
61 #include <sys/event.h>
62 #endif
63
64 #include <arpa/inet.h>
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67
68 #include <assert.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <poll.h>
73 #include <pthread.h>
74 #include <signal.h>
75 #include <stdarg.h>
76 #include <stdbool.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81
82 #include <rump/rumpclient.h>
83
84 #define HOSTOPS
85 int (*host_socket)(int, int, int);
86 int (*host_close)(int);
87 int (*host_connect)(int, const struct sockaddr *, socklen_t);
88 int (*host_fcntl)(int, int, ...);
89 int (*host_poll)(struct pollfd *, nfds_t, int);
90 ssize_t (*host_read)(int, void *, size_t);
91 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
92 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
93 int (*host_dup)(int);
94
95 #ifdef USE_KQUEUE
96 int (*host_kqueue)(void);
97 int (*host_kevent)(int, const struct kevent *, size_t,
98 struct kevent *, size_t, const struct timespec *);
99 #endif
100
101 #ifdef USE_SIGNALFD
102 #include <sys/signalfd.h>
103
104 int (*host_signalfd)(int, const sigset_t *, int);
105 #endif
106
107 int (*host_execve)(const char *, char *const[], char *const[]);
108
109 #include "sp_common.c"
110 #include "rumpuser_sigtrans.c"
111
112 static struct spclient clispc = {
113 .spc_fd = -1,
114 };
115
116 static int holyfd;
117 static sigset_t fullset;
118
119 static int doconnect(void);
120 static int handshake_req(struct spclient *, int, void *, int, bool);
121
122 /*
123 * Default: don't retry. Most clients can't handle it
124 * (consider e.g. fds suddenly going missing).
125 */
126 static time_t retrytimo = 0;
127
128 /* always defined to nothingness for now */
129 #define ERRLOG(a)
130
131 static int
132 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
133 {
134 struct timeval starttime, curtime;
135 time_t prevreconmsg;
136 unsigned reconretries;
137 int rv;
138
139 for (prevreconmsg = 0, reconretries = 0;;) {
140 rv = dosend(spc, iov, iovlen);
141 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
142 /* no persistent connections */
143 if (retrytimo == 0) {
144 rv = ENOTCONN;
145 break;
146 }
147 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
148 _exit(1);
149
150 if (!prevreconmsg) {
151 prevreconmsg = time(NULL);
152 gettimeofday(&starttime, NULL);
153 }
154 if (reconretries == 1) {
155 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
156 rv = ENOTCONN;
157 break;
158 }
159 fprintf(stderr, "rump_sp: connection to "
160 "kernel lost, trying to reconnect ...\n");
161 } else if (time(NULL) - prevreconmsg > 120) {
162 fprintf(stderr, "rump_sp: still trying to "
163 "reconnect ...\n");
164 prevreconmsg = time(NULL);
165 }
166
167 /* check that we aren't over the limit */
168 if (retrytimo > 0) {
169 time_t tdiff;
170
171 gettimeofday(&curtime, NULL);
172 tdiff = curtime.tv_sec - starttime.tv_sec;
173 if (starttime.tv_usec > curtime.tv_usec)
174 tdiff--;
175 if (tdiff >= retrytimo) {
176 fprintf(stderr, "rump_sp: reconnect "
177 "failed, %lld second timeout\n",
178 (long long)retrytimo);
179 return ENOTCONN;
180 }
181 }
182
183 /* adhoc backoff timer */
184 if (reconretries < 10) {
185 usleep(100000 * reconretries);
186 } else {
187 sleep(MIN(10, reconretries-9));
188 }
189 reconretries++;
190
191 if ((rv = doconnect()) != 0)
192 continue;
193 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
194 NULL, 0, true)) != 0)
195 continue;
196
197 /*
198 * ok, reconnect succesful. we need to return to
199 * the upper layer to get the entire PDU resent.
200 */
201 if (reconretries != 1)
202 fprintf(stderr, "rump_sp: reconnected!\n");
203 rv = EAGAIN;
204 break;
205 } else {
206 _DIAGASSERT(errno != EAGAIN);
207 break;
208 }
209 }
210
211 return rv;
212 }
213
214 static int
215 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
216 bool keeplock)
217 {
218 uint64_t mygen;
219 bool imalive = true;
220
221 pthread_mutex_lock(&spc->spc_mtx);
222 if (!keeplock)
223 sendunlockl(spc);
224 mygen = spc->spc_generation;
225
226 rw->rw_error = 0;
227 while (!rw->rw_done && rw->rw_error == 0) {
228 if (__predict_false(spc->spc_generation != mygen || !imalive))
229 break;
230
231 /* are we free to receive? */
232 if (spc->spc_istatus == SPCSTATUS_FREE) {
233 int gotresp, dosig, rv;
234
235 spc->spc_istatus = SPCSTATUS_BUSY;
236 pthread_mutex_unlock(&spc->spc_mtx);
237
238 dosig = 0;
239 for (gotresp = 0; !gotresp; ) {
240 #ifdef USE_KQUEUE
241 struct kevent kev[8];
242 int i;
243
244 /*
245 * typically we don't have a frame waiting
246 * when we come in here, so call kevent now
247 */
248 rv = host_kevent(holyfd, NULL, 0,
249 kev, __arraycount(kev), NULL);
250
251 if (__predict_false(rv == -1)) {
252 goto activity;
253 }
254
255 /*
256 * XXX: don't know how this can happen
257 * (timeout cannot expire since there
258 * isn't one), but it does happen.
259 * treat it as an expectional condition
260 * and go through tryread to determine
261 * alive status.
262 */
263 if (__predict_false(rv == 0))
264 goto activity;
265
266 for (i = 0; i < rv; i++) {
267 if (kev[i].filter == EVFILT_SIGNAL)
268 dosig++;
269 }
270 if (dosig)
271 goto cleanup;
272
273 /*
274 * ok, activity. try to read a frame to
275 * determine what happens next.
276 */
277 activity:
278 #else /* !USE_KQUEUE */
279 struct pollfd pfd[2];
280
281 pfd[0].fd = clispc.spc_fd;
282 pfd[0].events = POLLIN;
283 pfd[1].fd = holyfd;
284 pfd[1].events = POLLIN;
285
286 rv = host_poll(pfd, 2, -1);
287 if (pfd[1].revents & POLLIN) {
288 dosig = 1;
289 goto cleanup;
290 }
291 #endif /* !USE_KQUEUE */
292
293 switch (readframe(spc)) {
294 case 0:
295 continue;
296 case -1:
297 imalive = false;
298 goto cleanup;
299 default:
300 /* case 1 */
301 break;
302 }
303
304 switch (spc->spc_hdr.rsp_class) {
305 case RUMPSP_RESP:
306 case RUMPSP_ERROR:
307 kickwaiter(spc);
308 gotresp = spc->spc_hdr.rsp_reqno ==
309 rw->rw_reqno;
310 break;
311 case RUMPSP_REQ:
312 handlereq(spc);
313 break;
314 default:
315 /* panic */
316 break;
317 }
318 }
319
320 cleanup:
321 pthread_mutex_lock(&spc->spc_mtx);
322 if (spc->spc_istatus == SPCSTATUS_WANTED)
323 kickall(spc);
324 spc->spc_istatus = SPCSTATUS_FREE;
325
326 /* take one for the team */
327 if (dosig) {
328 pthread_mutex_unlock(&spc->spc_mtx);
329 pthread_sigmask(SIG_SETMASK, mask, NULL);
330 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
331 pthread_mutex_lock(&spc->spc_mtx);
332 }
333 } else {
334 spc->spc_istatus = SPCSTATUS_WANTED;
335 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
336 }
337 }
338 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
339 pthread_mutex_unlock(&spc->spc_mtx);
340 pthread_cond_destroy(&rw->rw_cv);
341
342 if (spc->spc_generation != mygen || !imalive) {
343 return ENOTCONN;
344 }
345 return rw->rw_error;
346 }
347
348 static int
349 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
350 const void *data, size_t dlen, void **resp)
351 {
352 struct rsp_hdr rhdr;
353 struct respwait rw;
354 struct iovec iov[2];
355 int rv;
356
357 rhdr.rsp_len = sizeof(rhdr) + dlen;
358 rhdr.rsp_class = RUMPSP_REQ;
359 rhdr.rsp_type = RUMPSP_SYSCALL;
360 rhdr.rsp_sysnum = sysnum;
361
362 IOVPUT(iov[0], rhdr);
363 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
364
365 do {
366 putwait(spc, &rw, &rhdr);
367 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
368 unputwait(spc, &rw);
369 continue;
370 }
371
372 rv = cliwaitresp(spc, &rw, omask, false);
373 if (rv == ENOTCONN)
374 rv = EAGAIN;
375 } while (rv == EAGAIN);
376
377 *resp = rw.rw_data;
378 return rv;
379 }
380
381 static int
382 handshake_req(struct spclient *spc, int type, void *data,
383 int cancel, bool haslock)
384 {
385 struct handshake_fork rf;
386 const char *myprogname = NULL; /* XXXgcc */
387 struct rsp_hdr rhdr;
388 struct respwait rw;
389 sigset_t omask;
390 size_t bonus;
391 struct iovec iov[2];
392 int rv;
393
394 if (type == HANDSHAKE_FORK) {
395 bonus = sizeof(rf);
396 } else {
397 #ifdef __NetBSD__
398 /* would procfs work on NetBSD too? */
399 myprogname = getprogname();
400 #else
401 int fd = open("/proc/self/comm", O_RDONLY);
402 if (fd == -1) {
403 myprogname = "???";
404 } else {
405 static char commname[128];
406
407 memset(commname, 0, sizeof(commname));
408 if (read(fd, commname, sizeof(commname)) > 0) {
409 char *n;
410
411 n = strrchr(commname, '\n');
412 if (n)
413 *n = '\0';
414 myprogname = commname;
415 } else {
416 myprogname = "???";
417 }
418 close(fd);
419 }
420 #endif
421 bonus = strlen(myprogname)+1;
422 }
423
424 /* performs server handshake */
425 rhdr.rsp_len = sizeof(rhdr) + bonus;
426 rhdr.rsp_class = RUMPSP_REQ;
427 rhdr.rsp_type = RUMPSP_HANDSHAKE;
428 rhdr.rsp_handshake = type;
429
430 IOVPUT(iov[0], rhdr);
431
432 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
433 if (haslock)
434 putwait_locked(spc, &rw, &rhdr);
435 else
436 putwait(spc, &rw, &rhdr);
437 if (type == HANDSHAKE_FORK) {
438 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
439 rf.rf_cancel = cancel;
440 IOVPUT(iov[1], rf);
441 } else {
442 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
443 }
444 rv = send_with_recon(spc, iov, __arraycount(iov));
445 if (rv || cancel) {
446 if (haslock)
447 unputwait_locked(spc, &rw);
448 else
449 unputwait(spc, &rw);
450 if (cancel) {
451 goto out;
452 }
453 } else {
454 rv = cliwaitresp(spc, &rw, &omask, haslock);
455 }
456 if (rv)
457 goto out;
458
459 rv = *(int *)rw.rw_data;
460 free(rw.rw_data);
461
462 out:
463 pthread_sigmask(SIG_SETMASK, &omask, NULL);
464 return rv;
465 }
466
467 static int
468 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
469 {
470 struct rsp_hdr rhdr;
471 struct respwait rw;
472 struct iovec iov[1];
473 int rv;
474
475 rhdr.rsp_len = sizeof(rhdr);
476 rhdr.rsp_class = RUMPSP_REQ;
477 rhdr.rsp_type = RUMPSP_PREFORK;
478 rhdr.rsp_error = 0;
479
480 IOVPUT(iov[0], rhdr);
481
482 do {
483 putwait(spc, &rw, &rhdr);
484 rv = send_with_recon(spc, iov, __arraycount(iov));
485 if (rv != 0) {
486 unputwait(spc, &rw);
487 continue;
488 }
489
490 rv = cliwaitresp(spc, &rw, omask, false);
491 if (rv == ENOTCONN)
492 rv = EAGAIN;
493 } while (rv == EAGAIN);
494
495 *resp = rw.rw_data;
496 return rv;
497 }
498
499 /*
500 * prevent response code from deadlocking with reconnect code
501 */
502 static int
503 resp_sendlock(struct spclient *spc)
504 {
505 int rv = 0;
506
507 pthread_mutex_lock(&spc->spc_mtx);
508 while (spc->spc_ostatus != SPCSTATUS_FREE) {
509 if (__predict_false(spc->spc_reconnecting)) {
510 rv = EBUSY;
511 goto out;
512 }
513 spc->spc_ostatus = SPCSTATUS_WANTED;
514 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
515 }
516 spc->spc_ostatus = SPCSTATUS_BUSY;
517
518 out:
519 pthread_mutex_unlock(&spc->spc_mtx);
520 return rv;
521 }
522
523 static void
524 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
525 int wantstr)
526 {
527 struct rsp_hdr rhdr;
528 struct iovec iov[2];
529
530 if (wantstr)
531 dlen = MIN(dlen, strlen(data)+1);
532
533 rhdr.rsp_len = sizeof(rhdr) + dlen;
534 rhdr.rsp_reqno = reqno;
535 rhdr.rsp_class = RUMPSP_RESP;
536 rhdr.rsp_type = RUMPSP_COPYIN;
537 rhdr.rsp_sysnum = 0;
538
539 IOVPUT(iov[0], rhdr);
540 IOVPUT_WITHSIZE(iov[1], data, dlen);
541
542 if (resp_sendlock(spc) != 0)
543 return;
544 (void)SENDIOV(spc, iov);
545 sendunlock(spc);
546 }
547
548 static void
549 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
550 {
551 struct rsp_hdr rhdr;
552 struct iovec iov[2];
553
554 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
555 rhdr.rsp_reqno = reqno;
556 rhdr.rsp_class = RUMPSP_RESP;
557 rhdr.rsp_type = RUMPSP_ANONMMAP;
558 rhdr.rsp_sysnum = 0;
559
560 IOVPUT(iov[0], rhdr);
561 IOVPUT(iov[1], addr);
562
563 if (resp_sendlock(spc) != 0)
564 return;
565 (void)SENDIOV(spc, iov);
566 sendunlock(spc);
567 }
568
569 int
570 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
571 register_t *retval)
572 {
573 struct rsp_sysresp *resp;
574 sigset_t omask;
575 void *rdata;
576 int rv;
577
578 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
579
580 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
581 sysnum, data, dlen));
582
583 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
584 if (rv)
585 goto out;
586
587 resp = rdata;
588 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
589 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
590
591 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
592 rv = resp->rsys_error;
593 free(rdata);
594
595 out:
596 pthread_sigmask(SIG_SETMASK, &omask, NULL);
597 return rv;
598 }
599
600 static void
601 handlereq(struct spclient *spc)
602 {
603 struct rsp_copydata *copydata;
604 struct rsp_hdr *rhdr = &spc->spc_hdr;
605 void *mapaddr;
606 size_t maplen;
607 int reqtype = spc->spc_hdr.rsp_type;
608 int sig;
609
610 switch (reqtype) {
611 case RUMPSP_COPYIN:
612 case RUMPSP_COPYINSTR:
613 /*LINTED*/
614 copydata = (struct rsp_copydata *)spc->spc_buf;
615 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
616 copydata->rcp_addr, copydata->rcp_len));
617 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
618 copydata->rcp_addr, copydata->rcp_len,
619 reqtype == RUMPSP_COPYINSTR);
620 break;
621 case RUMPSP_COPYOUT:
622 case RUMPSP_COPYOUTSTR:
623 /*LINTED*/
624 copydata = (struct rsp_copydata *)spc->spc_buf;
625 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
626 copydata->rcp_addr, copydata->rcp_len));
627 /*LINTED*/
628 memcpy(copydata->rcp_addr, copydata->rcp_data,
629 copydata->rcp_len);
630 break;
631 case RUMPSP_ANONMMAP:
632 /*LINTED*/
633 maplen = *(size_t *)spc->spc_buf;
634 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
635 MAP_ANON|MAP_PRIVATE, -1, 0);
636 if (mapaddr == MAP_FAILED)
637 mapaddr = NULL;
638 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
639 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
640 break;
641 case RUMPSP_RAISE:
642 sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
643 DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
644 raise(sig);
645 /*
646 * We most likely have signals blocked, but the signal
647 * will be handled soon enough when we return.
648 */
649 break;
650 default:
651 printf("PANIC: INVALID TYPE %d\n", reqtype);
652 abort();
653 break;
654 }
655
656 spcfreebuf(spc);
657 }
658
659 static unsigned ptab_idx;
660 static struct sockaddr *serv_sa;
661
662 /* dup until we get a "good" fd which does not collide with stdio */
663 static int
664 dupgood(int myfd, int mustchange)
665 {
666 int ofds[4];
667 int sverrno;
668 unsigned int i;
669
670 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
671 assert(i < __arraycount(ofds));
672 ofds[i] = myfd;
673 myfd = host_dup(myfd);
674 if (mustchange) {
675 i--; /* prevent closing old fd */
676 mustchange = 0;
677 }
678 }
679
680 sverrno = 0;
681 if (myfd == -1 && i > 0)
682 sverrno = errno;
683
684 while (i-- > 0) {
685 host_close(ofds[i]);
686 }
687
688 if (sverrno)
689 errno = sverrno;
690
691 return myfd;
692 }
693
694 #if defined(USE_KQUEUE)
695
696 static int
697 makeholyfd(void)
698 {
699 struct kevent kev[NSIG+1];
700 int i, fd;
701
702 /* setup kqueue, we want all signals and the fd */
703 if ((fd = dupgood(host_kqueue(), 0)) == -1) {
704 ERRLOG(("rump_sp: cannot setup kqueue"));
705 return -1;
706 }
707
708 for (i = 0; i < NSIG; i++) {
709 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
710 }
711 EV_SET(&kev[NSIG], clispc.spc_fd,
712 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
713 if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
714 ERRLOG(("rump_sp: kevent() failed"));
715 host_close(fd);
716 return -1;
717 }
718
719 return fd;
720 }
721
722 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
723
724 static int
725 makeholyfd(void)
726 {
727
728 return host_signalfd(-1, &fullset, 0);
729 }
730
731 #else /* !USE_KQUEUE && !USE_SIGNALFD */
732
733 static int
734 makeholyfd(void)
735 {
736
737 return -1;
738 }
739
740 #endif
741
742 static int
743 doconnect(void)
744 {
745 struct respwait rw;
746 struct rsp_hdr rhdr;
747 char banner[MAXBANNER];
748 int s, error, flags;
749 ssize_t n;
750
751 if (holyfd != -1)
752 host_close(holyfd);
753 holyfd = -1;
754 s = -1;
755
756 if (clispc.spc_fd != -1)
757 host_close(clispc.spc_fd);
758 clispc.spc_fd = -1;
759
760 /*
761 * for reconnect, gate everyone out of the receiver code
762 */
763 putwait_locked(&clispc, &rw, &rhdr);
764
765 pthread_mutex_lock(&clispc.spc_mtx);
766 clispc.spc_reconnecting = 1;
767 pthread_cond_broadcast(&clispc.spc_cv);
768 clispc.spc_generation++;
769 while (clispc.spc_istatus != SPCSTATUS_FREE) {
770 clispc.spc_istatus = SPCSTATUS_WANTED;
771 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
772 }
773 kickall(&clispc);
774
775 /*
776 * we can release it already since we hold the
777 * send lock during reconnect
778 * XXX: assert it
779 */
780 clispc.spc_istatus = SPCSTATUS_FREE;
781 pthread_mutex_unlock(&clispc.spc_mtx);
782 unputwait_locked(&clispc, &rw);
783
784 free(clispc.spc_buf);
785 clispc.spc_off = 0;
786
787 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
788 if (s == -1)
789 return -1;
790
791 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
792 if (errno == EINTR)
793 continue;
794 ERRLOG(("rump_sp: client connect failed: %s\n",
795 strerror(errno)));
796 return -1;
797 }
798
799 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
800 ERRLOG(("rump_sp: connect hook failed\n"));
801 return -1;
802 }
803
804 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
805 ERRLOG(("rump_sp: failed to read banner\n"));
806 return -1;
807 }
808
809 if (banner[n-1] != '\n') {
810 ERRLOG(("rump_sp: invalid banner\n"));
811 return -1;
812 }
813 banner[n] = '\0';
814 /* XXX parse the banner some day */
815
816 flags = host_fcntl(s, F_GETFL, 0);
817 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
818 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
819 return -1;
820 }
821 clispc.spc_fd = s;
822 clispc.spc_state = SPCSTATE_RUNNING;
823 clispc.spc_reconnecting = 0;
824 holyfd = makeholyfd();
825
826 return 0;
827 }
828
829 static int
830 doinit(void)
831 {
832
833 TAILQ_INIT(&clispc.spc_respwait);
834 pthread_mutex_init(&clispc.spc_mtx, NULL);
835 pthread_cond_init(&clispc.spc_cv, NULL);
836
837 return 0;
838 }
839
840 #ifdef RTLD_NEXT
841 void *rumpclient__dlsym(void *, const char *);
842 void *
843 rumpclient__dlsym(void *handle, const char *symbol)
844 {
845
846 return dlsym(handle, symbol);
847 }
848 void *rumphijack_dlsym(void *, const char *)
849 __attribute__((__weak__, alias("rumpclient__dlsym")));
850 #endif
851
852 static pid_t init_done = 0;
853
854 int
855 rumpclient_init(void)
856 {
857 char *p;
858 int error;
859 int rv = -1;
860 int hstype;
861 pid_t mypid;
862
863 /*
864 * Make sure we're not riding the context of a previous
865 * host fork. Note: it's *possible* that after n>1 forks
866 * we have the same pid as one of our exited parents, but
867 * I'm pretty sure there are 0 practical implications, since
868 * it means generations would have to skip rumpclient init.
869 */
870 if (init_done == (mypid = getpid()))
871 return 0;
872
873 /* kq does not traverse fork() */
874 #ifdef USE_KQUEUE
875 if (init_done != 0)
876 holyfd = -1;
877 #endif
878 init_done = mypid;
879
880 sigfillset(&fullset);
881
882 /*
883 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
884 * wann wird man je verstehen? wann wird man je verstehen?
885 */
886 #ifdef RTLD_NEXT
887 #define FINDSYM2(_name_,_syscall_) \
888 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
889 #_syscall_)) == NULL) { \
890 if (rumphijack_dlsym == rumpclient__dlsym) \
891 host_##_name_ = _name_; /* static fallback */ \
892 if (host_##_name_ == NULL) { \
893 fprintf(stderr,"cannot find %s: %s", #_syscall_,\
894 dlerror()); \
895 exit(1); \
896 } \
897 }
898 #else
899 #define FINDSYM2(_name_,_syscall) \
900 host_##_name_ = _name_;
901 #endif
902 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
903 #ifdef __NetBSD__
904 FINDSYM2(socket,__socket30)
905 #else
906 FINDSYM(socket)
907 #endif
908
909 FINDSYM(close)
910 FINDSYM(connect)
911 FINDSYM(fcntl)
912 FINDSYM(poll)
913 FINDSYM(read)
914 FINDSYM(sendmsg)
915 FINDSYM(setsockopt)
916 FINDSYM(dup)
917 FINDSYM(execve)
918
919 #ifdef USE_KQUEUE
920 FINDSYM(kqueue)
921 #if !__NetBSD_Prereq__(5,99,7)
922 FINDSYM(kevent)
923 #else
924 FINDSYM2(kevent,_sys___kevent50)
925 #endif
926 #endif /* USE_KQUEUE */
927
928 #ifdef USE_SIGNALFD
929 FINDSYM(signalfd)
930 #endif
931
932 #undef FINDSYM
933 #undef FINDSY2
934
935 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
936 if ((p = getenv("RUMP_SERVER")) == NULL) {
937 fprintf(stderr, "error: RUMP_SERVER not set\n");
938 errno = ENOENT;
939 goto out;
940 }
941 }
942
943 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
944 errno = error;
945 goto out;
946 }
947
948 if (doinit() == -1)
949 goto out;
950
951 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
952 sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
953 unsetenv("RUMPCLIENT__EXECFD");
954 hstype = HANDSHAKE_EXEC;
955 } else {
956 if (doconnect() == -1)
957 goto out;
958 hstype = HANDSHAKE_GUEST;
959 }
960
961 error = handshake_req(&clispc, hstype, NULL, 0, false);
962 if (error) {
963 pthread_mutex_destroy(&clispc.spc_mtx);
964 pthread_cond_destroy(&clispc.spc_cv);
965 if (clispc.spc_fd != -1)
966 host_close(clispc.spc_fd);
967 errno = error;
968 goto out;
969 }
970 rv = 0;
971
972 out:
973 if (rv == -1)
974 init_done = 0;
975 return rv;
976 }
977
978 struct rumpclient_fork {
979 uint32_t fork_auth[AUTHLEN];
980 struct spclient fork_spc;
981 int fork_holyfd;
982 };
983
984 struct rumpclient_fork *
985 rumpclient_prefork(void)
986 {
987 struct rumpclient_fork *rpf;
988 sigset_t omask;
989 void *resp;
990 int rv;
991
992 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
993 rpf = malloc(sizeof(*rpf));
994 if (rpf == NULL)
995 goto out;
996
997 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
998 free(rpf);
999 errno = rv;
1000 rpf = NULL;
1001 goto out;
1002 }
1003
1004 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
1005 free(resp);
1006
1007 rpf->fork_spc = clispc;
1008 rpf->fork_holyfd = holyfd;
1009
1010 out:
1011 pthread_sigmask(SIG_SETMASK, &omask, NULL);
1012 return rpf;
1013 }
1014
1015 int
1016 rumpclient_fork_init(struct rumpclient_fork *rpf)
1017 {
1018 int error;
1019 int osock;
1020
1021 osock = clispc.spc_fd;
1022 memset(&clispc, 0, sizeof(clispc));
1023 clispc.spc_fd = osock;
1024
1025 #ifdef USE_KQUEUE
1026 holyfd = -1; /* kqueue descriptor is not copied over fork() */
1027 #else
1028 if (holyfd != -1) {
1029 host_close(holyfd);
1030 holyfd = -1;
1031 }
1032 #endif
1033
1034 if (doinit() == -1)
1035 return -1;
1036 if (doconnect() == -1)
1037 return -1;
1038
1039 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
1040 0, false);
1041 if (error) {
1042 pthread_mutex_destroy(&clispc.spc_mtx);
1043 pthread_cond_destroy(&clispc.spc_cv);
1044 errno = error;
1045 return -1;
1046 }
1047
1048 return 0;
1049 }
1050
1051 /*ARGSUSED*/
1052 void
1053 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
1054 {
1055
1056 /* EUNIMPL */
1057 }
1058
1059 void
1060 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1061 {
1062
1063 clispc = rpf->fork_spc;
1064 holyfd = rpf->fork_holyfd;
1065 }
1066
1067 void
1068 rumpclient_setconnretry(time_t timeout)
1069 {
1070
1071 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1072 return; /* gigo */
1073
1074 retrytimo = timeout;
1075 }
1076
1077 int
1078 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1079 {
1080 int fd = *fdp;
1081 int untilfd, rv;
1082 int newfd;
1083
1084 switch (variant) {
1085 case RUMPCLIENT_CLOSE_FCLOSEM:
1086 untilfd = MAX(clispc.spc_fd, holyfd);
1087 for (; fd <= untilfd; fd++) {
1088 if (fd == clispc.spc_fd || fd == holyfd)
1089 continue;
1090 rv = host_close(fd);
1091 if (rv == -1)
1092 return -1;
1093 }
1094 *fdp = fd;
1095 break;
1096
1097 case RUMPCLIENT_CLOSE_CLOSE:
1098 case RUMPCLIENT_CLOSE_DUP2:
1099 if (fd == clispc.spc_fd) {
1100 newfd = dupgood(clispc.spc_fd, 1);
1101 if (newfd == -1)
1102 return -1;
1103
1104 #ifdef USE_KQUEUE
1105 {
1106 struct kevent kev[2];
1107
1108 /*
1109 * now, we have a new socket number, so change
1110 * the file descriptor that kqueue is
1111 * monitoring. remove old and add new.
1112 */
1113 EV_SET(&kev[0], clispc.spc_fd,
1114 EVFILT_READ, EV_DELETE, 0, 0, 0);
1115 EV_SET(&kev[1], newfd,
1116 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1117 if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
1118 int sverrno = errno;
1119 host_close(newfd);
1120 errno = sverrno;
1121 return -1;
1122 }}
1123 #endif /* !USE_KQUEUE */
1124 clispc.spc_fd = newfd;
1125 }
1126 if (holyfd != -1 && fd == holyfd) {
1127 newfd = dupgood(holyfd, 1);
1128 if (newfd == -1)
1129 return -1;
1130 holyfd = newfd;
1131 }
1132 break;
1133 }
1134
1135 return 0;
1136 }
1137
1138 pid_t
1139 rumpclient_fork(void)
1140 {
1141
1142 return rumpclient__dofork(fork);
1143 }
1144
1145 /*
1146 * Process is about to exec. Save info about our existing connection
1147 * in the env. rumpclient will check for this info in init().
1148 * This is mostly for the benefit of rumphijack, but regular applications
1149 * may use it as well.
1150 */
1151 int
1152 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1153 {
1154 char buf[4096];
1155 char **newenv;
1156 char *envstr, *envstr2;
1157 size_t nelem;
1158 int rv, sverrno;
1159
1160 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1161 clispc.spc_fd, holyfd);
1162 envstr = malloc(strlen(buf)+1);
1163 if (envstr == NULL) {
1164 return ENOMEM;
1165 }
1166 strcpy(envstr, buf);
1167
1168 /* do we have a fully parsed url we want to forward in the env? */
1169 if (*parsedurl != '\0') {
1170 snprintf(buf, sizeof(buf),
1171 "RUMP__PARSEDSERVER=%s", parsedurl);
1172 envstr2 = malloc(strlen(buf)+1);
1173 if (envstr2 == NULL) {
1174 free(envstr);
1175 return ENOMEM;
1176 }
1177 strcpy(envstr2, buf);
1178 } else {
1179 envstr2 = NULL;
1180 }
1181
1182 for (nelem = 0; envp && envp[nelem]; nelem++)
1183 continue;
1184
1185 newenv = malloc(sizeof(*newenv) * (nelem+3));
1186 if (newenv == NULL) {
1187 free(envstr2);
1188 free(envstr);
1189 return ENOMEM;
1190 }
1191 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1192
1193 newenv[nelem] = envstr;
1194 newenv[nelem+1] = envstr2;
1195 newenv[nelem+2] = NULL;
1196
1197 rv = host_execve(path, argv, newenv);
1198
1199 _DIAGASSERT(rv != 0);
1200 sverrno = errno;
1201 free(envstr2);
1202 free(envstr);
1203 free(newenv);
1204 errno = sverrno;
1205 return rv;
1206 }
1207
1208 /*
1209 * daemon() is handwritten for the benefit of platforms which
1210 * do not support daemon().
1211 */
1212 int
1213 rumpclient_daemon(int nochdir, int noclose)
1214 {
1215 struct rumpclient_fork *rf;
1216 int sverrno;
1217
1218 if ((rf = rumpclient_prefork()) == NULL)
1219 return -1;
1220
1221 switch (fork()) {
1222 case 0:
1223 break;
1224 case -1:
1225 goto daemonerr;
1226 default:
1227 _exit(0);
1228 }
1229
1230 if (setsid() == -1)
1231 goto daemonerr;
1232 if (!nochdir && chdir("/") == -1)
1233 goto daemonerr;
1234 if (!noclose) {
1235 int fd = open("/dev/null", O_RDWR);
1236 dup2(fd, 0);
1237 dup2(fd, 1);
1238 dup2(fd, 2);
1239 if (fd > 2)
1240 close(fd);
1241 }
1242
1243 /* note: fork is either completed or cancelled by the call */
1244 if (rumpclient_fork_init(rf) == -1)
1245 return -1;
1246
1247 return 0;
1248
1249 daemonerr:
1250 sverrno = errno;
1251 rumpclient_fork_cancel(rf);
1252 errno = sverrno;
1253 return -1;
1254 }
1255