rumpclient.c revision 1.51.2.1 1 /* $NetBSD: rumpclient.c,v 1.51.2.1 2012/11/20 03:00:45 tls Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include "rumpuser_port.h"
33
34 /*
35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could
36 * use kqueue on other BSD's too, but I haven't tested those. We
37 * want to use kqueue because it will give us the ability to get signal
38 * notifications but defer their handling to a stage where we do not
39 * hold the communication lock. Taking a signal while holding on to
40 * that lock may cause a deadlock. Therefore, block signals throughout
41 * the RPC when using poll. This unfortunately means that the normal
42 * SIGINT way of stopping a process while it is undergoing rump kernel
43 * RPC will not work. If anyone know which Linux system call handles
44 * the above scenario correctly, I'm all ears.
45 */
46
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50
51 #include <sys/cdefs.h>
52 __RCSID("$NetBSD: rumpclient.c,v 1.51.2.1 2012/11/20 03:00:45 tls Exp $");
53
54 #include <sys/param.h>
55 #include <sys/mman.h>
56 #include <sys/socket.h>
57 #include <sys/time.h>
58
59 #ifdef USE_KQUEUE
60 #include <sys/event.h>
61 #endif
62
63 #include <arpa/inet.h>
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66
67 #include <assert.h>
68 #include <dlfcn.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <link.h>
73 #include <poll.h>
74 #include <pthread.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdbool.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82
83 #include <rump/rumpclient.h>
84
85 #define HOSTOPS
86 int (*host_socket)(int, int, int);
87 int (*host_close)(int);
88 int (*host_connect)(int, const struct sockaddr *, socklen_t);
89 int (*host_fcntl)(int, int, ...);
90 int (*host_poll)(struct pollfd *, nfds_t, int);
91 ssize_t (*host_read)(int, void *, size_t);
92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
93 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
94 int (*host_dup)(int);
95
96 #ifdef USE_KQUEUE
97 int (*host_kqueue)(void);
98 int (*host_kevent)(int, const struct kevent *, size_t,
99 struct kevent *, size_t, const struct timespec *);
100 #endif
101
102 int (*host_execve)(const char *, char *const[], char *const[]);
103
104 #include "sp_common.c"
105
106 static struct spclient clispc = {
107 .spc_fd = -1,
108 };
109
110 static int kq = -1;
111 static sigset_t fullset;
112
113 static int doconnect(void);
114 static int handshake_req(struct spclient *, int, void *, int, bool);
115
116 /*
117 * Default: don't retry. Most clients can't handle it
118 * (consider e.g. fds suddenly going missing).
119 */
120 static time_t retrytimo = 0;
121
122 /* always defined to nothingness for now */
123 #define ERRLOG(a)
124
125 static int
126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
127 {
128 struct timeval starttime, curtime;
129 time_t prevreconmsg;
130 unsigned reconretries;
131 int rv;
132
133 for (prevreconmsg = 0, reconretries = 0;;) {
134 rv = dosend(spc, iov, iovlen);
135 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
136 /* no persistent connections */
137 if (retrytimo == 0) {
138 rv = ENOTCONN;
139 break;
140 }
141 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
142 _exit(1);
143
144 if (!prevreconmsg) {
145 prevreconmsg = time(NULL);
146 gettimeofday(&starttime, NULL);
147 }
148 if (reconretries == 1) {
149 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
150 rv = ENOTCONN;
151 break;
152 }
153 fprintf(stderr, "rump_sp: connection to "
154 "kernel lost, trying to reconnect ...\n");
155 } else if (time(NULL) - prevreconmsg > 120) {
156 fprintf(stderr, "rump_sp: still trying to "
157 "reconnect ...\n");
158 prevreconmsg = time(NULL);
159 }
160
161 /* check that we aren't over the limit */
162 if (retrytimo > 0) {
163 struct timeval tmp;
164
165 gettimeofday(&curtime, NULL);
166 timersub(&curtime, &starttime, &tmp);
167 if (tmp.tv_sec >= retrytimo) {
168 fprintf(stderr, "rump_sp: reconnect "
169 "failed, %lld second timeout\n",
170 (long long)retrytimo);
171 return ENOTCONN;
172 }
173 }
174
175 /* adhoc backoff timer */
176 if (reconretries < 10) {
177 usleep(100000 * reconretries);
178 } else {
179 sleep(MIN(10, reconretries-9));
180 }
181 reconretries++;
182
183 if ((rv = doconnect()) != 0)
184 continue;
185 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 NULL, 0, true)) != 0)
187 continue;
188
189 /*
190 * ok, reconnect succesful. we need to return to
191 * the upper layer to get the entire PDU resent.
192 */
193 if (reconretries != 1)
194 fprintf(stderr, "rump_sp: reconnected!\n");
195 rv = EAGAIN;
196 break;
197 } else {
198 _DIAGASSERT(errno != EAGAIN);
199 break;
200 }
201 }
202
203 return rv;
204 }
205
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 bool keeplock)
209 {
210 uint64_t mygen;
211 bool imalive = true;
212
213 pthread_mutex_lock(&spc->spc_mtx);
214 if (!keeplock)
215 sendunlockl(spc);
216 mygen = spc->spc_generation;
217
218 rw->rw_error = 0;
219 while (!rw->rw_done && rw->rw_error == 0) {
220 if (__predict_false(spc->spc_generation != mygen || !imalive))
221 break;
222
223 /* are we free to receive? */
224 if (spc->spc_istatus == SPCSTATUS_FREE) {
225 int gotresp, dosig, rv;
226
227 spc->spc_istatus = SPCSTATUS_BUSY;
228 pthread_mutex_unlock(&spc->spc_mtx);
229
230 dosig = 0;
231 for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 struct kevent kev[8];
234 int i;
235
236 /*
237 * typically we don't have a frame waiting
238 * when we come in here, so call kevent now
239 */
240 rv = host_kevent(kq, NULL, 0,
241 kev, __arraycount(kev), NULL);
242
243 if (__predict_false(rv == -1)) {
244 goto activity;
245 }
246
247 /*
248 * XXX: don't know how this can happen
249 * (timeout cannot expire since there
250 * isn't one), but it does happen.
251 * treat it as an expectional condition
252 * and go through tryread to determine
253 * alive status.
254 */
255 if (__predict_false(rv == 0))
256 goto activity;
257
258 for (i = 0; i < rv; i++) {
259 if (kev[i].filter == EVFILT_SIGNAL)
260 dosig++;
261 }
262 if (dosig)
263 goto cleanup;
264
265 /*
266 * ok, activity. try to read a frame to
267 * determine what happens next.
268 */
269 activity:
270 #else /* USE_KQUEUE */
271 struct pollfd pfd;
272
273 pfd.fd = clispc.spc_fd;
274 pfd.events = POLLIN;
275
276 rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278
279 switch (readframe(spc)) {
280 case 0:
281 continue;
282 case -1:
283 imalive = false;
284 goto cleanup;
285 default:
286 /* case 1 */
287 break;
288 }
289
290 switch (spc->spc_hdr.rsp_class) {
291 case RUMPSP_RESP:
292 case RUMPSP_ERROR:
293 kickwaiter(spc);
294 gotresp = spc->spc_hdr.rsp_reqno ==
295 rw->rw_reqno;
296 break;
297 case RUMPSP_REQ:
298 handlereq(spc);
299 break;
300 default:
301 /* panic */
302 break;
303 }
304 }
305
306 cleanup:
307 pthread_mutex_lock(&spc->spc_mtx);
308 if (spc->spc_istatus == SPCSTATUS_WANTED)
309 kickall(spc);
310 spc->spc_istatus = SPCSTATUS_FREE;
311
312 /* take one for the team */
313 if (dosig) {
314 pthread_mutex_unlock(&spc->spc_mtx);
315 pthread_sigmask(SIG_SETMASK, mask, NULL);
316 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 pthread_mutex_lock(&spc->spc_mtx);
318 }
319 } else {
320 spc->spc_istatus = SPCSTATUS_WANTED;
321 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 }
323 }
324 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 pthread_mutex_unlock(&spc->spc_mtx);
326 pthread_cond_destroy(&rw->rw_cv);
327
328 if (spc->spc_generation != mygen || !imalive) {
329 return ENOTCONN;
330 }
331 return rw->rw_error;
332 }
333
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 const void *data, size_t dlen, void **resp)
337 {
338 struct rsp_hdr rhdr;
339 struct respwait rw;
340 struct iovec iov[2];
341 int rv;
342
343 rhdr.rsp_len = sizeof(rhdr) + dlen;
344 rhdr.rsp_class = RUMPSP_REQ;
345 rhdr.rsp_type = RUMPSP_SYSCALL;
346 rhdr.rsp_sysnum = sysnum;
347
348 IOVPUT(iov[0], rhdr);
349 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350
351 do {
352 putwait(spc, &rw, &rhdr);
353 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 unputwait(spc, &rw);
355 continue;
356 }
357
358 rv = cliwaitresp(spc, &rw, omask, false);
359 if (rv == ENOTCONN)
360 rv = EAGAIN;
361 } while (rv == EAGAIN);
362
363 *resp = rw.rw_data;
364 return rv;
365 }
366
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 int cancel, bool haslock)
370 {
371 struct handshake_fork rf;
372 const char *myprogname = NULL; /* XXXgcc */
373 struct rsp_hdr rhdr;
374 struct respwait rw;
375 sigset_t omask;
376 size_t bonus;
377 struct iovec iov[2];
378 int rv;
379
380 if (type == HANDSHAKE_FORK) {
381 bonus = sizeof(rf);
382 } else {
383 #ifdef __NetBSD__
384 /* would procfs work on NetBSD too? */
385 myprogname = getprogname();
386 #else
387 int fd = open("/proc/self/comm", O_RDONLY);
388 if (fd == -1) {
389 myprogname = "???";
390 } else {
391 static char commname[128];
392
393 memset(commname, 0, sizeof(commname));
394 if (read(fd, commname, sizeof(commname)) > 0) {
395 char *n;
396
397 n = strrchr(commname, '\n');
398 if (n)
399 *n = '\0';
400 myprogname = commname;
401 } else {
402 myprogname = "???";
403 }
404 close(fd);
405 }
406 #endif
407 bonus = strlen(myprogname)+1;
408 }
409
410 /* performs server handshake */
411 rhdr.rsp_len = sizeof(rhdr) + bonus;
412 rhdr.rsp_class = RUMPSP_REQ;
413 rhdr.rsp_type = RUMPSP_HANDSHAKE;
414 rhdr.rsp_handshake = type;
415
416 IOVPUT(iov[0], rhdr);
417
418 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
419 if (haslock)
420 putwait_locked(spc, &rw, &rhdr);
421 else
422 putwait(spc, &rw, &rhdr);
423 if (type == HANDSHAKE_FORK) {
424 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
425 rf.rf_cancel = cancel;
426 IOVPUT(iov[1], rf);
427 } else {
428 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
429 }
430 rv = send_with_recon(spc, iov, __arraycount(iov));
431 if (rv || cancel) {
432 if (haslock)
433 unputwait_locked(spc, &rw);
434 else
435 unputwait(spc, &rw);
436 if (cancel) {
437 goto out;
438 }
439 } else {
440 rv = cliwaitresp(spc, &rw, &omask, haslock);
441 }
442 if (rv)
443 goto out;
444
445 rv = *(int *)rw.rw_data;
446 free(rw.rw_data);
447
448 out:
449 pthread_sigmask(SIG_SETMASK, &omask, NULL);
450 return rv;
451 }
452
453 static int
454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
455 {
456 struct rsp_hdr rhdr;
457 struct respwait rw;
458 struct iovec iov[1];
459 int rv;
460
461 rhdr.rsp_len = sizeof(rhdr);
462 rhdr.rsp_class = RUMPSP_REQ;
463 rhdr.rsp_type = RUMPSP_PREFORK;
464 rhdr.rsp_error = 0;
465
466 IOVPUT(iov[0], rhdr);
467
468 do {
469 putwait(spc, &rw, &rhdr);
470 rv = send_with_recon(spc, iov, __arraycount(iov));
471 if (rv != 0) {
472 unputwait(spc, &rw);
473 continue;
474 }
475
476 rv = cliwaitresp(spc, &rw, omask, false);
477 if (rv == ENOTCONN)
478 rv = EAGAIN;
479 } while (rv == EAGAIN);
480
481 *resp = rw.rw_data;
482 return rv;
483 }
484
485 /*
486 * prevent response code from deadlocking with reconnect code
487 */
488 static int
489 resp_sendlock(struct spclient *spc)
490 {
491 int rv = 0;
492
493 pthread_mutex_lock(&spc->spc_mtx);
494 while (spc->spc_ostatus != SPCSTATUS_FREE) {
495 if (__predict_false(spc->spc_reconnecting)) {
496 rv = EBUSY;
497 goto out;
498 }
499 spc->spc_ostatus = SPCSTATUS_WANTED;
500 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
501 }
502 spc->spc_ostatus = SPCSTATUS_BUSY;
503
504 out:
505 pthread_mutex_unlock(&spc->spc_mtx);
506 return rv;
507 }
508
509 static void
510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
511 int wantstr)
512 {
513 struct rsp_hdr rhdr;
514 struct iovec iov[2];
515
516 if (wantstr)
517 dlen = MIN(dlen, strlen(data)+1);
518
519 rhdr.rsp_len = sizeof(rhdr) + dlen;
520 rhdr.rsp_reqno = reqno;
521 rhdr.rsp_class = RUMPSP_RESP;
522 rhdr.rsp_type = RUMPSP_COPYIN;
523 rhdr.rsp_sysnum = 0;
524
525 IOVPUT(iov[0], rhdr);
526 IOVPUT_WITHSIZE(iov[1], data, dlen);
527
528 if (resp_sendlock(spc) != 0)
529 return;
530 (void)SENDIOV(spc, iov);
531 sendunlock(spc);
532 }
533
534 static void
535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
536 {
537 struct rsp_hdr rhdr;
538 struct iovec iov[2];
539
540 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
541 rhdr.rsp_reqno = reqno;
542 rhdr.rsp_class = RUMPSP_RESP;
543 rhdr.rsp_type = RUMPSP_ANONMMAP;
544 rhdr.rsp_sysnum = 0;
545
546 IOVPUT(iov[0], rhdr);
547 IOVPUT(iov[1], addr);
548
549 if (resp_sendlock(spc) != 0)
550 return;
551 (void)SENDIOV(spc, iov);
552 sendunlock(spc);
553 }
554
555 int
556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
557 register_t *retval)
558 {
559 struct rsp_sysresp *resp;
560 sigset_t omask;
561 void *rdata;
562 int rv;
563
564 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
565
566 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
567 sysnum, data, dlen));
568
569 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
570 if (rv)
571 goto out;
572
573 resp = rdata;
574 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
575 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
576
577 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
578 rv = resp->rsys_error;
579 free(rdata);
580
581 out:
582 pthread_sigmask(SIG_SETMASK, &omask, NULL);
583 return rv;
584 }
585
586 static void
587 handlereq(struct spclient *spc)
588 {
589 struct rsp_copydata *copydata;
590 struct rsp_hdr *rhdr = &spc->spc_hdr;
591 void *mapaddr;
592 size_t maplen;
593 int reqtype = spc->spc_hdr.rsp_type;
594
595 switch (reqtype) {
596 case RUMPSP_COPYIN:
597 case RUMPSP_COPYINSTR:
598 /*LINTED*/
599 copydata = (struct rsp_copydata *)spc->spc_buf;
600 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
601 copydata->rcp_addr, copydata->rcp_len));
602 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
603 copydata->rcp_addr, copydata->rcp_len,
604 reqtype == RUMPSP_COPYINSTR);
605 break;
606 case RUMPSP_COPYOUT:
607 case RUMPSP_COPYOUTSTR:
608 /*LINTED*/
609 copydata = (struct rsp_copydata *)spc->spc_buf;
610 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
611 copydata->rcp_addr, copydata->rcp_len));
612 /*LINTED*/
613 memcpy(copydata->rcp_addr, copydata->rcp_data,
614 copydata->rcp_len);
615 break;
616 case RUMPSP_ANONMMAP:
617 /*LINTED*/
618 maplen = *(size_t *)spc->spc_buf;
619 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
620 MAP_ANON, -1, 0);
621 if (mapaddr == MAP_FAILED)
622 mapaddr = NULL;
623 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
624 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
625 break;
626 case RUMPSP_RAISE:
627 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
628 raise((int)rhdr->rsp_signo);
629 /*
630 * We most likely have signals blocked, but the signal
631 * will be handled soon enough when we return.
632 */
633 break;
634 default:
635 printf("PANIC: INVALID TYPE %d\n", reqtype);
636 abort();
637 break;
638 }
639
640 spcfreebuf(spc);
641 }
642
643 static unsigned ptab_idx;
644 static struct sockaddr *serv_sa;
645
646 /* dup until we get a "good" fd which does not collide with stdio */
647 static int
648 dupgood(int myfd, int mustchange)
649 {
650 int ofds[4];
651 int sverrno;
652 unsigned int i;
653
654 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
655 assert(i < __arraycount(ofds));
656 ofds[i] = myfd;
657 myfd = host_dup(myfd);
658 if (mustchange) {
659 i--; /* prevent closing old fd */
660 mustchange = 0;
661 }
662 }
663
664 sverrno = 0;
665 if (myfd == -1 && i > 0)
666 sverrno = errno;
667
668 while (i-- > 0) {
669 host_close(ofds[i]);
670 }
671
672 if (sverrno)
673 errno = sverrno;
674
675 return myfd;
676 }
677
678 static int
679 doconnect(void)
680 {
681 struct respwait rw;
682 struct rsp_hdr rhdr;
683 char banner[MAXBANNER];
684 int s, error, flags;
685 ssize_t n;
686
687 if (kq != -1)
688 host_close(kq);
689 kq = -1;
690 s = -1;
691
692 if (clispc.spc_fd != -1)
693 host_close(clispc.spc_fd);
694 clispc.spc_fd = -1;
695
696 /*
697 * for reconnect, gate everyone out of the receiver code
698 */
699 putwait_locked(&clispc, &rw, &rhdr);
700
701 pthread_mutex_lock(&clispc.spc_mtx);
702 clispc.spc_reconnecting = 1;
703 pthread_cond_broadcast(&clispc.spc_cv);
704 clispc.spc_generation++;
705 while (clispc.spc_istatus != SPCSTATUS_FREE) {
706 clispc.spc_istatus = SPCSTATUS_WANTED;
707 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
708 }
709 kickall(&clispc);
710
711 /*
712 * we can release it already since we hold the
713 * send lock during reconnect
714 * XXX: assert it
715 */
716 clispc.spc_istatus = SPCSTATUS_FREE;
717 pthread_mutex_unlock(&clispc.spc_mtx);
718 unputwait_locked(&clispc, &rw);
719
720 free(clispc.spc_buf);
721 clispc.spc_off = 0;
722
723 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
724 if (s == -1)
725 return -1;
726
727 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
728 if (errno == EINTR)
729 continue;
730 ERRLOG(("rump_sp: client connect failed: %s\n",
731 strerror(errno)));
732 return -1;
733 }
734
735 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
736 ERRLOG(("rump_sp: connect hook failed\n"));
737 return -1;
738 }
739
740 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
741 ERRLOG(("rump_sp: failed to read banner\n"));
742 return -1;
743 }
744
745 if (banner[n-1] != '\n') {
746 ERRLOG(("rump_sp: invalid banner\n"));
747 return -1;
748 }
749 banner[n] = '\0';
750 /* XXX parse the banner some day */
751
752 flags = host_fcntl(s, F_GETFL, 0);
753 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
754 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
755 return -1;
756 }
757 clispc.spc_fd = s;
758 clispc.spc_state = SPCSTATE_RUNNING;
759 clispc.spc_reconnecting = 0;
760
761 #ifdef USE_KQUEUE
762 {
763 struct kevent kev[NSIG+1];
764 int i;
765
766 /* setup kqueue, we want all signals and the fd */
767 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
768 ERRLOG(("rump_sp: cannot setup kqueue"));
769 return -1;
770 }
771
772 for (i = 0; i < NSIG; i++) {
773 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
774 }
775 EV_SET(&kev[NSIG], clispc.spc_fd,
776 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
777 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
778 ERRLOG(("rump_sp: kevent() failed"));
779 return -1;
780 }
781 }
782 #endif /* USE_KQUEUE */
783
784 return 0;
785 }
786
787 static int
788 doinit(void)
789 {
790
791 TAILQ_INIT(&clispc.spc_respwait);
792 pthread_mutex_init(&clispc.spc_mtx, NULL);
793 pthread_cond_init(&clispc.spc_cv, NULL);
794
795 return 0;
796 }
797
798 void *rumpclient__dlsym(void *, const char *);
799 void *
800 rumpclient__dlsym(void *handle, const char *symbol)
801 {
802
803 return dlsym(handle, symbol);
804 }
805 void *rumphijack_dlsym(void *, const char *)
806 __attribute__((__weak__, alias("rumpclient__dlsym")));
807
808 static pid_t init_done = 0;
809
810 int
811 rumpclient_init(void)
812 {
813 char *p;
814 int error;
815 int rv = -1;
816 int hstype;
817 pid_t mypid;
818
819 /*
820 * Make sure we're not riding the context of a previous
821 * host fork. Note: it's *possible* that after n>1 forks
822 * we have the same pid as one of our exited parents, but
823 * I'm pretty sure there are 0 practical implications, since
824 * it means generations would have to skip rumpclient init.
825 */
826 if (init_done == (mypid = getpid()))
827 return 0;
828
829 /* kq does not traverse fork() */
830 if (init_done != 0)
831 kq = -1;
832 init_done = mypid;
833
834 sigfillset(&fullset);
835
836 /*
837 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
838 * wann wird man je verstehen? wann wird man je verstehen?
839 */
840 #define FINDSYM2(_name_,_syscall_) \
841 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
842 #_syscall_)) == NULL) { \
843 if (rumphijack_dlsym == rumpclient__dlsym) \
844 host_##_name_ = _name_; /* static fallback */ \
845 if (host_##_name_ == NULL) \
846 errx(1, "cannot find %s: %s", #_syscall_, \
847 dlerror()); \
848 }
849 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
850 #ifdef __NetBSD__
851 FINDSYM2(socket,__socket30)
852 #else
853 FINDSYM(socket)
854 #endif
855
856 FINDSYM(close)
857 FINDSYM(connect)
858 FINDSYM(fcntl)
859 FINDSYM(poll)
860 FINDSYM(read)
861 FINDSYM(sendmsg)
862 FINDSYM(setsockopt)
863 FINDSYM(dup)
864 FINDSYM(execve)
865
866 #ifdef USE_KQUEUE
867 FINDSYM(kqueue)
868 #if !__NetBSD_Prereq__(5,99,7)
869 FINDSYM(kevent)
870 #else
871 FINDSYM2(kevent,_sys___kevent50)
872 #endif
873 #endif /* USE_KQUEUE */
874
875 #undef FINDSYM
876 #undef FINDSY2
877
878 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
879 if ((p = getenv("RUMP_SERVER")) == NULL) {
880 fprintf(stderr, "error: RUMP_SERVER not set\n");
881 errno = ENOENT;
882 goto out;
883 }
884 }
885
886 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
887 errno = error;
888 goto out;
889 }
890
891 if (doinit() == -1)
892 goto out;
893
894 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
895 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
896 unsetenv("RUMPCLIENT__EXECFD");
897 hstype = HANDSHAKE_EXEC;
898 } else {
899 if (doconnect() == -1)
900 goto out;
901 hstype = HANDSHAKE_GUEST;
902 }
903
904 error = handshake_req(&clispc, hstype, NULL, 0, false);
905 if (error) {
906 pthread_mutex_destroy(&clispc.spc_mtx);
907 pthread_cond_destroy(&clispc.spc_cv);
908 if (clispc.spc_fd != -1)
909 host_close(clispc.spc_fd);
910 errno = error;
911 goto out;
912 }
913 rv = 0;
914
915 out:
916 if (rv == -1)
917 init_done = 0;
918 return rv;
919 }
920
921 struct rumpclient_fork {
922 uint32_t fork_auth[AUTHLEN];
923 struct spclient fork_spc;
924 int fork_kq;
925 };
926
927 struct rumpclient_fork *
928 rumpclient_prefork(void)
929 {
930 struct rumpclient_fork *rpf;
931 sigset_t omask;
932 void *resp;
933 int rv;
934
935 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
936 rpf = malloc(sizeof(*rpf));
937 if (rpf == NULL)
938 goto out;
939
940 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
941 free(rpf);
942 errno = rv;
943 rpf = NULL;
944 goto out;
945 }
946
947 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
948 free(resp);
949
950 rpf->fork_spc = clispc;
951 rpf->fork_kq = kq;
952
953 out:
954 pthread_sigmask(SIG_SETMASK, &omask, NULL);
955 return rpf;
956 }
957
958 int
959 rumpclient_fork_init(struct rumpclient_fork *rpf)
960 {
961 int error;
962 int osock;
963
964 osock = clispc.spc_fd;
965 memset(&clispc, 0, sizeof(clispc));
966 clispc.spc_fd = osock;
967
968 kq = -1; /* kqueue descriptor is not copied over fork() */
969
970 if (doinit() == -1)
971 return -1;
972 if (doconnect() == -1)
973 return -1;
974
975 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
976 0, false);
977 if (error) {
978 pthread_mutex_destroy(&clispc.spc_mtx);
979 pthread_cond_destroy(&clispc.spc_cv);
980 errno = error;
981 return -1;
982 }
983
984 return 0;
985 }
986
987 /*ARGSUSED*/
988 void
989 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
990 {
991
992 /* EUNIMPL */
993 }
994
995 void
996 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
997 {
998
999 clispc = rpf->fork_spc;
1000 kq = rpf->fork_kq;
1001 }
1002
1003 void
1004 rumpclient_setconnretry(time_t timeout)
1005 {
1006
1007 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1008 return; /* gigo */
1009
1010 retrytimo = timeout;
1011 }
1012
1013 int
1014 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1015 {
1016 int fd = *fdp;
1017 int untilfd, rv;
1018 int newfd;
1019
1020 switch (variant) {
1021 case RUMPCLIENT_CLOSE_FCLOSEM:
1022 untilfd = MAX(clispc.spc_fd, kq);
1023 for (; fd <= untilfd; fd++) {
1024 if (fd == clispc.spc_fd || fd == kq)
1025 continue;
1026 rv = host_close(fd);
1027 if (rv == -1)
1028 return -1;
1029 }
1030 *fdp = fd;
1031 break;
1032
1033 case RUMPCLIENT_CLOSE_CLOSE:
1034 case RUMPCLIENT_CLOSE_DUP2:
1035 if (fd == clispc.spc_fd) {
1036 newfd = dupgood(clispc.spc_fd, 1);
1037 if (newfd == -1)
1038 return -1;
1039
1040 #ifdef USE_KQUEUE
1041 {
1042 struct kevent kev[2];
1043
1044 /*
1045 * now, we have a new socket number, so change
1046 * the file descriptor that kqueue is
1047 * monitoring. remove old and add new.
1048 */
1049 EV_SET(&kev[0], clispc.spc_fd,
1050 EVFILT_READ, EV_DELETE, 0, 0, 0);
1051 EV_SET(&kev[1], newfd,
1052 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1053 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1054 int sverrno = errno;
1055 host_close(newfd);
1056 errno = sverrno;
1057 return -1;
1058 }
1059 clispc.spc_fd = newfd;
1060 }
1061 }
1062 if (fd == kq) {
1063 newfd = dupgood(kq, 1);
1064 if (newfd == -1)
1065 return -1;
1066 kq = newfd;
1067 #else /* USE_KQUEUE */
1068 clispc.spc_fd = newfd;
1069 #endif /* !USE_KQUEUE */
1070 }
1071 break;
1072 }
1073
1074 return 0;
1075 }
1076
1077 pid_t
1078 rumpclient_fork(void)
1079 {
1080
1081 return rumpclient__dofork(fork);
1082 }
1083
1084 /*
1085 * Process is about to exec. Save info about our existing connection
1086 * in the env. rumpclient will check for this info in init().
1087 * This is mostly for the benefit of rumphijack, but regular applications
1088 * may use it as well.
1089 */
1090 int
1091 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1092 {
1093 char buf[4096];
1094 char **newenv;
1095 char *envstr, *envstr2;
1096 size_t nelem;
1097 int rv, sverrno;
1098
1099 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1100 clispc.spc_fd, kq);
1101 envstr = malloc(strlen(buf)+1);
1102 if (envstr == NULL) {
1103 return ENOMEM;
1104 }
1105 strcpy(envstr, buf);
1106
1107 /* do we have a fully parsed url we want to forward in the env? */
1108 if (*parsedurl != '\0') {
1109 snprintf(buf, sizeof(buf),
1110 "RUMP__PARSEDSERVER=%s", parsedurl);
1111 envstr2 = malloc(strlen(buf)+1);
1112 if (envstr2 == NULL) {
1113 free(envstr);
1114 return ENOMEM;
1115 }
1116 strcpy(envstr2, buf);
1117 } else {
1118 envstr2 = NULL;
1119 }
1120
1121 for (nelem = 0; envp && envp[nelem]; nelem++)
1122 continue;
1123
1124 newenv = malloc(sizeof(*newenv) * (nelem+3));
1125 if (newenv == NULL) {
1126 free(envstr2);
1127 free(envstr);
1128 return ENOMEM;
1129 }
1130 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1131
1132 newenv[nelem] = envstr;
1133 newenv[nelem+1] = envstr2;
1134 newenv[nelem+2] = NULL;
1135
1136 rv = host_execve(path, argv, newenv);
1137
1138 _DIAGASSERT(rv != 0);
1139 sverrno = errno;
1140 free(envstr2);
1141 free(envstr);
1142 free(newenv);
1143 errno = sverrno;
1144 return rv;
1145 }
1146
1147 int
1148 rumpclient_daemon(int nochdir, int noclose)
1149 {
1150 struct rumpclient_fork *rf;
1151 int sverrno;
1152
1153 if ((rf = rumpclient_prefork()) == NULL)
1154 return -1;
1155
1156 if (daemon(nochdir, noclose) == -1) {
1157 sverrno = errno;
1158 rumpclient_fork_cancel(rf);
1159 errno = sverrno;
1160 return -1;
1161 }
1162
1163 if (rumpclient_fork_init(rf) == -1)
1164 return -1;
1165
1166 return 0;
1167 }
1168