rumpclient.c revision 1.50 1 /* $NetBSD: rumpclient.c,v 1.50 2012/08/03 14:52:31 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include "rumpuser_port.h"
33
34 /*
35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could
36 * use kqueue on other BSD's too, but I haven't tested those. We
37 * want to use kqueue because it will give us the ability to get signal
38 * notifications but defer their handling to a stage where we do not
39 * hold the communication lock. Taking a signal while holding on to
40 * that lock may cause a deadlock. Therefore, block signals throughout
41 * the RPC when using poll. This unfortunately means that the normal
42 * SIGINT way of stopping a process while it is undergoing rump kernel
43 * RPC will not work. If anyone know which Linux system call handles
44 * the above scenario correctly, I'm all ears.
45 */
46
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50
51 #include <sys/cdefs.h>
52 __RCSID("$NetBSD: rumpclient.c,v 1.50 2012/08/03 14:52:31 pooka Exp $");
53
54 #include <sys/param.h>
55 #include <sys/mman.h>
56 #include <sys/socket.h>
57 #include <sys/time.h>
58
59 #ifdef USE_KQUEUE
60 #include <sys/event.h>
61 #endif
62
63 #include <arpa/inet.h>
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66
67 #include <assert.h>
68 #include <dlfcn.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <link.h>
73 #include <poll.h>
74 #include <pthread.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdbool.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82
83 #include <rump/rumpclient.h>
84
85 #define HOSTOPS
86 int (*host_socket)(int, int, int);
87 int (*host_close)(int);
88 int (*host_connect)(int, const struct sockaddr *, socklen_t);
89 int (*host_fcntl)(int, int, ...);
90 int (*host_poll)(struct pollfd *, nfds_t, int);
91 ssize_t (*host_read)(int, void *, size_t);
92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
93 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
94 int (*host_dup)(int);
95
96 #ifdef USE_KQUEUE
97 int (*host_kqueue)(void);
98 int (*host_kevent)(int, const struct kevent *, size_t,
99 struct kevent *, size_t, const struct timespec *);
100 #endif
101
102 int (*host_execve)(const char *, char *const[], char *const[]);
103
104 #include "sp_common.c"
105
106 static struct spclient clispc = {
107 .spc_fd = -1,
108 };
109
110 static int kq = -1;
111 static sigset_t fullset;
112
113 static int doconnect(void);
114 static int handshake_req(struct spclient *, int, void *, int, bool);
115
116 /*
117 * Default: don't retry. Most clients can't handle it
118 * (consider e.g. fds suddenly going missing).
119 */
120 static time_t retrytimo = 0;
121
122 /* always defined to nothingness for now */
123 #define ERRLOG(a)
124
125 static int
126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
127 {
128 struct timeval starttime, curtime;
129 time_t prevreconmsg;
130 unsigned reconretries;
131 int rv;
132
133 for (prevreconmsg = 0, reconretries = 0;;) {
134 rv = dosend(spc, iov, iovlen);
135 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
136 /* no persistent connections */
137 if (retrytimo == 0) {
138 rv = ENOTCONN;
139 break;
140 }
141 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
142 _exit(1);
143
144 if (!prevreconmsg) {
145 prevreconmsg = time(NULL);
146 gettimeofday(&starttime, NULL);
147 }
148 if (reconretries == 1) {
149 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
150 rv = ENOTCONN;
151 break;
152 }
153 fprintf(stderr, "rump_sp: connection to "
154 "kernel lost, trying to reconnect ...\n");
155 } else if (time(NULL) - prevreconmsg > 120) {
156 fprintf(stderr, "rump_sp: still trying to "
157 "reconnect ...\n");
158 prevreconmsg = time(NULL);
159 }
160
161 /* check that we aren't over the limit */
162 if (retrytimo > 0) {
163 struct timeval tmp;
164
165 gettimeofday(&curtime, NULL);
166 timersub(&curtime, &starttime, &tmp);
167 if (tmp.tv_sec >= retrytimo) {
168 fprintf(stderr, "rump_sp: reconnect "
169 "failed, %lld second timeout\n",
170 (long long)retrytimo);
171 return ENOTCONN;
172 }
173 }
174
175 /* adhoc backoff timer */
176 if (reconretries < 10) {
177 usleep(100000 * reconretries);
178 } else {
179 sleep(MIN(10, reconretries-9));
180 }
181 reconretries++;
182
183 if ((rv = doconnect()) != 0)
184 continue;
185 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 NULL, 0, true)) != 0)
187 continue;
188
189 /*
190 * ok, reconnect succesful. we need to return to
191 * the upper layer to get the entire PDU resent.
192 */
193 if (reconretries != 1)
194 fprintf(stderr, "rump_sp: reconnected!\n");
195 rv = EAGAIN;
196 break;
197 } else {
198 _DIAGASSERT(errno != EAGAIN);
199 break;
200 }
201 }
202
203 return rv;
204 }
205
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 bool keeplock)
209 {
210 uint64_t mygen;
211 bool imalive = true;
212
213 pthread_mutex_lock(&spc->spc_mtx);
214 if (!keeplock)
215 sendunlockl(spc);
216 mygen = spc->spc_generation;
217
218 rw->rw_error = 0;
219 while (!rw->rw_done && rw->rw_error == 0) {
220 if (__predict_false(spc->spc_generation != mygen || !imalive))
221 break;
222
223 /* are we free to receive? */
224 if (spc->spc_istatus == SPCSTATUS_FREE) {
225 int gotresp, dosig, rv;
226
227 spc->spc_istatus = SPCSTATUS_BUSY;
228 pthread_mutex_unlock(&spc->spc_mtx);
229
230 dosig = 0;
231 for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 struct kevent kev[8];
234 int i;
235
236 /*
237 * typically we don't have a frame waiting
238 * when we come in here, so call kevent now
239 */
240 rv = host_kevent(kq, NULL, 0,
241 kev, __arraycount(kev), NULL);
242
243 if (__predict_false(rv == -1)) {
244 goto activity;
245 }
246
247 /*
248 * XXX: don't know how this can happen
249 * (timeout cannot expire since there
250 * isn't one), but it does happen.
251 * treat it as an expectional condition
252 * and go through tryread to determine
253 * alive status.
254 */
255 if (__predict_false(rv == 0))
256 goto activity;
257
258 for (i = 0; i < rv; i++) {
259 if (kev[i].filter == EVFILT_SIGNAL)
260 dosig++;
261 }
262 if (dosig)
263 goto cleanup;
264
265 /*
266 * ok, activity. try to read a frame to
267 * determine what happens next.
268 */
269 activity:
270 #else /* USE_KQUEUE */
271 struct pollfd pfd;
272
273 pfd.fd = clispc.spc_fd;
274 pfd.events = POLLIN;
275
276 rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278
279 switch (readframe(spc)) {
280 case 0:
281 continue;
282 case -1:
283 imalive = false;
284 goto cleanup;
285 default:
286 /* case 1 */
287 break;
288 }
289
290 switch (spc->spc_hdr.rsp_class) {
291 case RUMPSP_RESP:
292 case RUMPSP_ERROR:
293 kickwaiter(spc);
294 gotresp = spc->spc_hdr.rsp_reqno ==
295 rw->rw_reqno;
296 break;
297 case RUMPSP_REQ:
298 handlereq(spc);
299 break;
300 default:
301 /* panic */
302 break;
303 }
304 }
305
306 cleanup:
307 pthread_mutex_lock(&spc->spc_mtx);
308 if (spc->spc_istatus == SPCSTATUS_WANTED)
309 kickall(spc);
310 spc->spc_istatus = SPCSTATUS_FREE;
311
312 /* take one for the team */
313 if (dosig) {
314 pthread_mutex_unlock(&spc->spc_mtx);
315 pthread_sigmask(SIG_SETMASK, mask, NULL);
316 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 pthread_mutex_lock(&spc->spc_mtx);
318 }
319 } else {
320 spc->spc_istatus = SPCSTATUS_WANTED;
321 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 }
323 }
324 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 pthread_mutex_unlock(&spc->spc_mtx);
326 pthread_cond_destroy(&rw->rw_cv);
327
328 if (spc->spc_generation != mygen || !imalive) {
329 return ENOTCONN;
330 }
331 return rw->rw_error;
332 }
333
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 const void *data, size_t dlen, void **resp)
337 {
338 struct rsp_hdr rhdr;
339 struct respwait rw;
340 struct iovec iov[2];
341 int rv;
342
343 rhdr.rsp_len = sizeof(rhdr) + dlen;
344 rhdr.rsp_class = RUMPSP_REQ;
345 rhdr.rsp_type = RUMPSP_SYSCALL;
346 rhdr.rsp_sysnum = sysnum;
347
348 IOVPUT(iov[0], rhdr);
349 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350
351 do {
352 putwait(spc, &rw, &rhdr);
353 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 unputwait(spc, &rw);
355 continue;
356 }
357
358 rv = cliwaitresp(spc, &rw, omask, false);
359 if (rv == ENOTCONN)
360 rv = EAGAIN;
361 } while (rv == EAGAIN);
362
363 *resp = rw.rw_data;
364 return rv;
365 }
366
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 int cancel, bool haslock)
370 {
371 struct handshake_fork rf;
372 const char *myprogname = NULL; /* XXXgcc */
373 struct rsp_hdr rhdr;
374 struct respwait rw;
375 sigset_t omask;
376 size_t bonus;
377 struct iovec iov[2];
378 int rv;
379
380 if (type == HANDSHAKE_FORK) {
381 bonus = sizeof(rf);
382 } else {
383 #ifdef __NetBSD__
384 /* would procfs work on NetBSD too? */
385 myprogname = getprogname();
386 #else
387 int fd = open("/proc/self/comm", O_RDONLY);
388 if (fd == -1) {
389 myprogname = "???";
390 } else {
391 static char commname[128];
392
393 memset(commname, 0, sizeof(commname));
394 if (read(fd, commname, sizeof(commname)) > 0) {
395 char *n;
396
397 n = strrchr(commname, '\n');
398 if (n)
399 *n = '\0';
400 myprogname = commname;
401 } else {
402 myprogname = "???";
403 }
404 close(fd);
405 }
406 #endif
407 bonus = strlen(myprogname)+1;
408 }
409
410 /* performs server handshake */
411 rhdr.rsp_len = sizeof(rhdr) + bonus;
412 rhdr.rsp_class = RUMPSP_REQ;
413 rhdr.rsp_type = RUMPSP_HANDSHAKE;
414 rhdr.rsp_handshake = type;
415
416 IOVPUT(iov[0], rhdr);
417
418 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
419 if (haslock)
420 putwait_locked(spc, &rw, &rhdr);
421 else
422 putwait(spc, &rw, &rhdr);
423 if (type == HANDSHAKE_FORK) {
424 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
425 rf.rf_cancel = cancel;
426 IOVPUT(iov[1], rf);
427 } else {
428 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
429 }
430 rv = send_with_recon(spc, iov, __arraycount(iov));
431 if (rv || cancel) {
432 if (haslock)
433 unputwait_locked(spc, &rw);
434 else
435 unputwait(spc, &rw);
436 if (cancel) {
437 goto out;
438 }
439 } else {
440 rv = cliwaitresp(spc, &rw, &omask, haslock);
441 }
442 if (rv)
443 goto out;
444
445 rv = *(int *)rw.rw_data;
446 free(rw.rw_data);
447
448 out:
449 pthread_sigmask(SIG_SETMASK, &omask, NULL);
450 return rv;
451 }
452
453 static int
454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
455 {
456 struct rsp_hdr rhdr;
457 struct respwait rw;
458 struct iovec iov[1];
459 int rv;
460
461 rhdr.rsp_len = sizeof(rhdr);
462 rhdr.rsp_class = RUMPSP_REQ;
463 rhdr.rsp_type = RUMPSP_PREFORK;
464 rhdr.rsp_error = 0;
465
466 IOVPUT(iov[0], rhdr);
467
468 do {
469 putwait(spc, &rw, &rhdr);
470 rv = send_with_recon(spc, iov, __arraycount(iov));
471 if (rv != 0) {
472 unputwait(spc, &rw);
473 continue;
474 }
475
476 rv = cliwaitresp(spc, &rw, omask, false);
477 if (rv == ENOTCONN)
478 rv = EAGAIN;
479 } while (rv == EAGAIN);
480
481 *resp = rw.rw_data;
482 return rv;
483 }
484
485 /*
486 * prevent response code from deadlocking with reconnect code
487 */
488 static int
489 resp_sendlock(struct spclient *spc)
490 {
491 int rv = 0;
492
493 pthread_mutex_lock(&spc->spc_mtx);
494 while (spc->spc_ostatus != SPCSTATUS_FREE) {
495 if (__predict_false(spc->spc_reconnecting)) {
496 rv = EBUSY;
497 goto out;
498 }
499 spc->spc_ostatus = SPCSTATUS_WANTED;
500 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
501 }
502 spc->spc_ostatus = SPCSTATUS_BUSY;
503
504 out:
505 pthread_mutex_unlock(&spc->spc_mtx);
506 return rv;
507 }
508
509 static void
510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
511 int wantstr)
512 {
513 struct rsp_hdr rhdr;
514 struct iovec iov[2];
515
516 if (wantstr)
517 dlen = MIN(dlen, strlen(data)+1);
518
519 rhdr.rsp_len = sizeof(rhdr) + dlen;
520 rhdr.rsp_reqno = reqno;
521 rhdr.rsp_class = RUMPSP_RESP;
522 rhdr.rsp_type = RUMPSP_COPYIN;
523 rhdr.rsp_sysnum = 0;
524
525 IOVPUT(iov[0], rhdr);
526 IOVPUT_WITHSIZE(iov[1], data, dlen);
527
528 if (resp_sendlock(spc) != 0)
529 return;
530 (void)SENDIOV(spc, iov);
531 sendunlock(spc);
532 }
533
534 static void
535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
536 {
537 struct rsp_hdr rhdr;
538 struct iovec iov[2];
539
540 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
541 rhdr.rsp_reqno = reqno;
542 rhdr.rsp_class = RUMPSP_RESP;
543 rhdr.rsp_type = RUMPSP_ANONMMAP;
544 rhdr.rsp_sysnum = 0;
545
546 IOVPUT(iov[0], rhdr);
547 IOVPUT(iov[1], addr);
548
549 if (resp_sendlock(spc) != 0)
550 return;
551 (void)SENDIOV(spc, iov);
552 sendunlock(spc);
553 }
554
555 int
556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
557 register_t *retval)
558 {
559 struct rsp_sysresp *resp;
560 sigset_t omask;
561 void *rdata;
562 int rv;
563
564 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
565
566 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
567 sysnum, data, dlen));
568
569 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
570 if (rv)
571 goto out;
572
573 resp = rdata;
574 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
575 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
576
577 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
578 rv = resp->rsys_error;
579 free(rdata);
580
581 out:
582 pthread_sigmask(SIG_SETMASK, &omask, NULL);
583 return rv;
584 }
585
586 static void
587 handlereq(struct spclient *spc)
588 {
589 struct rsp_copydata *copydata;
590 struct rsp_hdr *rhdr = &spc->spc_hdr;
591 void *mapaddr;
592 size_t maplen;
593 int reqtype = spc->spc_hdr.rsp_type;
594
595 switch (reqtype) {
596 case RUMPSP_COPYIN:
597 case RUMPSP_COPYINSTR:
598 /*LINTED*/
599 copydata = (struct rsp_copydata *)spc->spc_buf;
600 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
601 copydata->rcp_addr, copydata->rcp_len));
602 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
603 copydata->rcp_addr, copydata->rcp_len,
604 reqtype == RUMPSP_COPYINSTR);
605 break;
606 case RUMPSP_COPYOUT:
607 case RUMPSP_COPYOUTSTR:
608 /*LINTED*/
609 copydata = (struct rsp_copydata *)spc->spc_buf;
610 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
611 copydata->rcp_addr, copydata->rcp_len));
612 /*LINTED*/
613 memcpy(copydata->rcp_addr, copydata->rcp_data,
614 copydata->rcp_len);
615 break;
616 case RUMPSP_ANONMMAP:
617 /*LINTED*/
618 maplen = *(size_t *)spc->spc_buf;
619 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
620 MAP_ANON, -1, 0);
621 if (mapaddr == MAP_FAILED)
622 mapaddr = NULL;
623 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
624 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
625 break;
626 case RUMPSP_RAISE:
627 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
628 raise((int)rhdr->rsp_signo);
629 /*
630 * We most likely have signals blocked, but the signal
631 * will be handled soon enough when we return.
632 */
633 break;
634 default:
635 printf("PANIC: INVALID TYPE %d\n", reqtype);
636 abort();
637 break;
638 }
639
640 spcfreebuf(spc);
641 }
642
643 static unsigned ptab_idx;
644 static struct sockaddr *serv_sa;
645
646 /* dup until we get a "good" fd which does not collide with stdio */
647 static int
648 dupgood(int myfd, int mustchange)
649 {
650 int ofds[4];
651 int sverrno;
652 unsigned int i;
653
654 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
655 assert(i < __arraycount(ofds));
656 ofds[i] = myfd;
657 myfd = host_dup(myfd);
658 if (mustchange) {
659 i--; /* prevent closing old fd */
660 mustchange = 0;
661 }
662 }
663
664 sverrno = 0;
665 if (myfd == -1 && i > 0)
666 sverrno = errno;
667
668 while (i-- > 0) {
669 host_close(ofds[i]);
670 }
671
672 if (sverrno)
673 errno = sverrno;
674
675 return myfd;
676 }
677
678 static int
679 doconnect(void)
680 {
681 struct respwait rw;
682 struct rsp_hdr rhdr;
683 char banner[MAXBANNER];
684 struct pollfd pfd;
685 int s, error, flags;
686 ssize_t n;
687
688 if (kq != -1)
689 host_close(kq);
690 kq = -1;
691 s = -1;
692
693 if (clispc.spc_fd != -1)
694 host_close(clispc.spc_fd);
695 clispc.spc_fd = -1;
696
697 /*
698 * for reconnect, gate everyone out of the receiver code
699 */
700 putwait_locked(&clispc, &rw, &rhdr);
701
702 pthread_mutex_lock(&clispc.spc_mtx);
703 clispc.spc_reconnecting = 1;
704 pthread_cond_broadcast(&clispc.spc_cv);
705 clispc.spc_generation++;
706 while (clispc.spc_istatus != SPCSTATUS_FREE) {
707 clispc.spc_istatus = SPCSTATUS_WANTED;
708 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
709 }
710 kickall(&clispc);
711
712 /*
713 * we can release it already since we hold the
714 * send lock during reconnect
715 * XXX: assert it
716 */
717 clispc.spc_istatus = SPCSTATUS_FREE;
718 pthread_mutex_unlock(&clispc.spc_mtx);
719 unputwait_locked(&clispc, &rw);
720
721 free(clispc.spc_buf);
722 clispc.spc_off = 0;
723
724 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
725 if (s == -1)
726 return -1;
727
728 pfd.fd = s;
729 pfd.events = POLLIN;
730 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
731 if (errno == EINTR)
732 continue;
733 ERRLOG(("rump_sp: client connect failed: %s\n",
734 strerror(errno)));
735 return -1;
736 }
737
738 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
739 ERRLOG(("rump_sp: connect hook failed\n"));
740 return -1;
741 }
742
743 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
744 ERRLOG(("rump_sp: failed to read banner\n"));
745 return -1;
746 }
747
748 if (banner[n-1] != '\n') {
749 ERRLOG(("rump_sp: invalid banner\n"));
750 return -1;
751 }
752 banner[n] = '\0';
753 /* XXX parse the banner some day */
754
755 flags = host_fcntl(s, F_GETFL, 0);
756 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
757 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
758 return -1;
759 }
760 clispc.spc_fd = s;
761 clispc.spc_state = SPCSTATE_RUNNING;
762 clispc.spc_reconnecting = 0;
763
764 #ifdef USE_KQUEUE
765 {
766 struct kevent kev[NSIG+1];
767 int i;
768
769 /* setup kqueue, we want all signals and the fd */
770 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
771 ERRLOG(("rump_sp: cannot setup kqueue"));
772 return -1;
773 }
774
775 for (i = 0; i < NSIG; i++) {
776 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
777 }
778 EV_SET(&kev[NSIG], clispc.spc_fd,
779 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
780 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
781 ERRLOG(("rump_sp: kevent() failed"));
782 return -1;
783 }
784 }
785 #endif /* USE_KQUEUE */
786
787 return 0;
788 }
789
790 static int
791 doinit(void)
792 {
793
794 TAILQ_INIT(&clispc.spc_respwait);
795 pthread_mutex_init(&clispc.spc_mtx, NULL);
796 pthread_cond_init(&clispc.spc_cv, NULL);
797
798 return 0;
799 }
800
801 void *rumpclient__dlsym(void *, const char *);
802 void *
803 rumpclient__dlsym(void *handle, const char *symbol)
804 {
805
806 return dlsym(handle, symbol);
807 }
808 void *rumphijack_dlsym(void *, const char *)
809 __attribute__((__weak__, alias("rumpclient__dlsym")));
810
811 static pid_t init_done = 0;
812
813 int
814 rumpclient_init(void)
815 {
816 char *p;
817 int error;
818 int rv = -1;
819 int hstype;
820 pid_t mypid;
821
822 /*
823 * Make sure we're not riding the context of a previous
824 * host fork. Note: it's *possible* that after n>1 forks
825 * we have the same pid as one of our exited parents, but
826 * I'm pretty sure there are 0 practical implications, since
827 * it means generations would have to skip rumpclient init.
828 */
829 if (init_done == (mypid = getpid()))
830 return 0;
831
832 /* kq does not traverse fork() */
833 if (init_done != 0)
834 kq = -1;
835 init_done = mypid;
836
837 sigfillset(&fullset);
838
839 /*
840 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
841 * wann wird man je verstehen? wann wird man je verstehen?
842 */
843 #define FINDSYM2(_name_,_syscall_) \
844 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
845 #_syscall_)) == NULL) { \
846 if (rumphijack_dlsym == rumpclient__dlsym) \
847 host_##_name_ = _name_; /* static fallback */ \
848 if (host_##_name_ == NULL) \
849 errx(1, "cannot find %s: %s", #_syscall_, \
850 dlerror()); \
851 }
852 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
853 #ifdef __NetBSD__
854 FINDSYM2(socket,__socket30)
855 #else
856 FINDSYM(socket)
857 #endif
858
859 FINDSYM(close)
860 FINDSYM(connect)
861 FINDSYM(fcntl)
862 FINDSYM(poll)
863 FINDSYM(read)
864 FINDSYM(sendmsg)
865 FINDSYM(setsockopt)
866 FINDSYM(dup)
867 FINDSYM(execve)
868
869 #ifdef USE_KQUEUE
870 FINDSYM(kqueue)
871 #if !__NetBSD_Prereq__(5,99,7)
872 FINDSYM(kevent)
873 #else
874 FINDSYM2(kevent,_sys___kevent50)
875 #endif
876 #endif /* USE_KQUEUE */
877
878 #undef FINDSYM
879 #undef FINDSY2
880
881 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
882 if ((p = getenv("RUMP_SERVER")) == NULL) {
883 errno = ENOENT;
884 goto out;
885 }
886 }
887
888 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
889 errno = error;
890 goto out;
891 }
892
893 if (doinit() == -1)
894 goto out;
895
896 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
897 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
898 unsetenv("RUMPCLIENT__EXECFD");
899 hstype = HANDSHAKE_EXEC;
900 } else {
901 if (doconnect() == -1)
902 goto out;
903 hstype = HANDSHAKE_GUEST;
904 }
905
906 error = handshake_req(&clispc, hstype, NULL, 0, false);
907 if (error) {
908 pthread_mutex_destroy(&clispc.spc_mtx);
909 pthread_cond_destroy(&clispc.spc_cv);
910 if (clispc.spc_fd != -1)
911 host_close(clispc.spc_fd);
912 errno = error;
913 goto out;
914 }
915 rv = 0;
916
917 out:
918 if (rv == -1)
919 init_done = 0;
920 return rv;
921 }
922
923 struct rumpclient_fork {
924 uint32_t fork_auth[AUTHLEN];
925 struct spclient fork_spc;
926 int fork_kq;
927 };
928
929 struct rumpclient_fork *
930 rumpclient_prefork(void)
931 {
932 struct rumpclient_fork *rpf;
933 sigset_t omask;
934 void *resp;
935 int rv;
936
937 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
938 rpf = malloc(sizeof(*rpf));
939 if (rpf == NULL)
940 goto out;
941
942 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
943 free(rpf);
944 errno = rv;
945 rpf = NULL;
946 goto out;
947 }
948
949 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
950 free(resp);
951
952 rpf->fork_spc = clispc;
953 rpf->fork_kq = kq;
954
955 out:
956 pthread_sigmask(SIG_SETMASK, &omask, NULL);
957 return rpf;
958 }
959
960 int
961 rumpclient_fork_init(struct rumpclient_fork *rpf)
962 {
963 int error;
964 int osock;
965
966 osock = clispc.spc_fd;
967 memset(&clispc, 0, sizeof(clispc));
968 clispc.spc_fd = osock;
969
970 kq = -1; /* kqueue descriptor is not copied over fork() */
971
972 if (doinit() == -1)
973 return -1;
974 if (doconnect() == -1)
975 return -1;
976
977 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
978 0, false);
979 if (error) {
980 pthread_mutex_destroy(&clispc.spc_mtx);
981 pthread_cond_destroy(&clispc.spc_cv);
982 errno = error;
983 return -1;
984 }
985
986 return 0;
987 }
988
989 /*ARGSUSED*/
990 void
991 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
992 {
993
994 /* EUNIMPL */
995 }
996
997 void
998 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
999 {
1000
1001 clispc = rpf->fork_spc;
1002 kq = rpf->fork_kq;
1003 }
1004
1005 void
1006 rumpclient_setconnretry(time_t timeout)
1007 {
1008
1009 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1010 return; /* gigo */
1011
1012 retrytimo = timeout;
1013 }
1014
1015 int
1016 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1017 {
1018 int fd = *fdp;
1019 int untilfd, rv;
1020 int newfd;
1021
1022 switch (variant) {
1023 case RUMPCLIENT_CLOSE_FCLOSEM:
1024 untilfd = MAX(clispc.spc_fd, kq);
1025 for (; fd <= untilfd; fd++) {
1026 if (fd == clispc.spc_fd || fd == kq)
1027 continue;
1028 rv = host_close(fd);
1029 if (rv == -1)
1030 return -1;
1031 }
1032 *fdp = fd;
1033 break;
1034
1035 case RUMPCLIENT_CLOSE_CLOSE:
1036 case RUMPCLIENT_CLOSE_DUP2:
1037 if (fd == clispc.spc_fd) {
1038 newfd = dupgood(clispc.spc_fd, 1);
1039 if (newfd == -1)
1040 return -1;
1041
1042 #ifdef USE_KQUEUE
1043 {
1044 struct kevent kev[2];
1045
1046 /*
1047 * now, we have a new socket number, so change
1048 * the file descriptor that kqueue is
1049 * monitoring. remove old and add new.
1050 */
1051 EV_SET(&kev[0], clispc.spc_fd,
1052 EVFILT_READ, EV_DELETE, 0, 0, 0);
1053 EV_SET(&kev[1], newfd,
1054 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1055 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1056 int sverrno = errno;
1057 host_close(newfd);
1058 errno = sverrno;
1059 return -1;
1060 }
1061 clispc.spc_fd = newfd;
1062 }
1063 }
1064 if (fd == kq) {
1065 newfd = dupgood(kq, 1);
1066 if (newfd == -1)
1067 return -1;
1068 kq = newfd;
1069 #else /* USE_KQUEUE */
1070 clispc.spc_fd = newfd;
1071 #endif /* !USE_KQUEUE */
1072 }
1073 break;
1074 }
1075
1076 return 0;
1077 }
1078
1079 pid_t
1080 rumpclient_fork(void)
1081 {
1082
1083 return rumpclient__dofork(fork);
1084 }
1085
1086 /*
1087 * Process is about to exec. Save info about our existing connection
1088 * in the env. rumpclient will check for this info in init().
1089 * This is mostly for the benefit of rumphijack, but regular applications
1090 * may use it as well.
1091 */
1092 int
1093 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1094 {
1095 char buf[4096];
1096 char **newenv;
1097 char *envstr, *envstr2;
1098 size_t nelem;
1099 int rv, sverrno;
1100
1101 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1102 clispc.spc_fd, kq);
1103 envstr = malloc(strlen(buf)+1);
1104 if (envstr == NULL) {
1105 return ENOMEM;
1106 }
1107 strcpy(envstr, buf);
1108
1109 /* do we have a fully parsed url we want to forward in the env? */
1110 if (*parsedurl != '\0') {
1111 snprintf(buf, sizeof(buf),
1112 "RUMP__PARSEDSERVER=%s", parsedurl);
1113 envstr2 = malloc(strlen(buf)+1);
1114 if (envstr2 == NULL) {
1115 free(envstr);
1116 return ENOMEM;
1117 }
1118 strcpy(envstr2, buf);
1119 } else {
1120 envstr2 = NULL;
1121 }
1122
1123 for (nelem = 0; envp && envp[nelem]; nelem++)
1124 continue;
1125
1126 newenv = malloc(sizeof(*newenv) * (nelem+3));
1127 if (newenv == NULL) {
1128 free(envstr2);
1129 free(envstr);
1130 return ENOMEM;
1131 }
1132 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1133
1134 newenv[nelem] = envstr;
1135 newenv[nelem+1] = envstr2;
1136 newenv[nelem+2] = NULL;
1137
1138 rv = host_execve(path, argv, newenv);
1139
1140 _DIAGASSERT(rv != 0);
1141 sverrno = errno;
1142 free(envstr2);
1143 free(envstr);
1144 free(newenv);
1145 errno = sverrno;
1146 return rv;
1147 }
1148
1149 int
1150 rumpclient_daemon(int nochdir, int noclose)
1151 {
1152 struct rumpclient_fork *rf;
1153 int sverrno;
1154
1155 if ((rf = rumpclient_prefork()) == NULL)
1156 return -1;
1157
1158 if (daemon(nochdir, noclose) == -1) {
1159 sverrno = errno;
1160 rumpclient_fork_cancel(rf);
1161 errno = sverrno;
1162 return -1;
1163 }
1164
1165 if (rumpclient_fork_init(rf) == -1)
1166 return -1;
1167
1168 return 0;
1169 }
1170