rumpclient.c revision 1.51 1 /* $NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include "rumpuser_port.h"
33
34 /*
35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could
36 * use kqueue on other BSD's too, but I haven't tested those. We
37 * want to use kqueue because it will give us the ability to get signal
38 * notifications but defer their handling to a stage where we do not
39 * hold the communication lock. Taking a signal while holding on to
40 * that lock may cause a deadlock. Therefore, block signals throughout
41 * the RPC when using poll. This unfortunately means that the normal
42 * SIGINT way of stopping a process while it is undergoing rump kernel
43 * RPC will not work. If anyone know which Linux system call handles
44 * the above scenario correctly, I'm all ears.
45 */
46
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50
51 #include <sys/cdefs.h>
52 __RCSID("$NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $");
53
54 #include <sys/param.h>
55 #include <sys/mman.h>
56 #include <sys/socket.h>
57 #include <sys/time.h>
58
59 #ifdef USE_KQUEUE
60 #include <sys/event.h>
61 #endif
62
63 #include <arpa/inet.h>
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66
67 #include <assert.h>
68 #include <dlfcn.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <link.h>
73 #include <poll.h>
74 #include <pthread.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdbool.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82
83 #include <rump/rumpclient.h>
84
85 #define HOSTOPS
86 int (*host_socket)(int, int, int);
87 int (*host_close)(int);
88 int (*host_connect)(int, const struct sockaddr *, socklen_t);
89 int (*host_fcntl)(int, int, ...);
90 int (*host_poll)(struct pollfd *, nfds_t, int);
91 ssize_t (*host_read)(int, void *, size_t);
92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
93 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
94 int (*host_dup)(int);
95
96 #ifdef USE_KQUEUE
97 int (*host_kqueue)(void);
98 int (*host_kevent)(int, const struct kevent *, size_t,
99 struct kevent *, size_t, const struct timespec *);
100 #endif
101
102 int (*host_execve)(const char *, char *const[], char *const[]);
103
104 #include "sp_common.c"
105
106 static struct spclient clispc = {
107 .spc_fd = -1,
108 };
109
110 static int kq = -1;
111 static sigset_t fullset;
112
113 static int doconnect(void);
114 static int handshake_req(struct spclient *, int, void *, int, bool);
115
116 /*
117 * Default: don't retry. Most clients can't handle it
118 * (consider e.g. fds suddenly going missing).
119 */
120 static time_t retrytimo = 0;
121
122 /* always defined to nothingness for now */
123 #define ERRLOG(a)
124
125 static int
126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
127 {
128 struct timeval starttime, curtime;
129 time_t prevreconmsg;
130 unsigned reconretries;
131 int rv;
132
133 for (prevreconmsg = 0, reconretries = 0;;) {
134 rv = dosend(spc, iov, iovlen);
135 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
136 /* no persistent connections */
137 if (retrytimo == 0) {
138 rv = ENOTCONN;
139 break;
140 }
141 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
142 _exit(1);
143
144 if (!prevreconmsg) {
145 prevreconmsg = time(NULL);
146 gettimeofday(&starttime, NULL);
147 }
148 if (reconretries == 1) {
149 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
150 rv = ENOTCONN;
151 break;
152 }
153 fprintf(stderr, "rump_sp: connection to "
154 "kernel lost, trying to reconnect ...\n");
155 } else if (time(NULL) - prevreconmsg > 120) {
156 fprintf(stderr, "rump_sp: still trying to "
157 "reconnect ...\n");
158 prevreconmsg = time(NULL);
159 }
160
161 /* check that we aren't over the limit */
162 if (retrytimo > 0) {
163 struct timeval tmp;
164
165 gettimeofday(&curtime, NULL);
166 timersub(&curtime, &starttime, &tmp);
167 if (tmp.tv_sec >= retrytimo) {
168 fprintf(stderr, "rump_sp: reconnect "
169 "failed, %lld second timeout\n",
170 (long long)retrytimo);
171 return ENOTCONN;
172 }
173 }
174
175 /* adhoc backoff timer */
176 if (reconretries < 10) {
177 usleep(100000 * reconretries);
178 } else {
179 sleep(MIN(10, reconretries-9));
180 }
181 reconretries++;
182
183 if ((rv = doconnect()) != 0)
184 continue;
185 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 NULL, 0, true)) != 0)
187 continue;
188
189 /*
190 * ok, reconnect succesful. we need to return to
191 * the upper layer to get the entire PDU resent.
192 */
193 if (reconretries != 1)
194 fprintf(stderr, "rump_sp: reconnected!\n");
195 rv = EAGAIN;
196 break;
197 } else {
198 _DIAGASSERT(errno != EAGAIN);
199 break;
200 }
201 }
202
203 return rv;
204 }
205
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 bool keeplock)
209 {
210 uint64_t mygen;
211 bool imalive = true;
212
213 pthread_mutex_lock(&spc->spc_mtx);
214 if (!keeplock)
215 sendunlockl(spc);
216 mygen = spc->spc_generation;
217
218 rw->rw_error = 0;
219 while (!rw->rw_done && rw->rw_error == 0) {
220 if (__predict_false(spc->spc_generation != mygen || !imalive))
221 break;
222
223 /* are we free to receive? */
224 if (spc->spc_istatus == SPCSTATUS_FREE) {
225 int gotresp, dosig, rv;
226
227 spc->spc_istatus = SPCSTATUS_BUSY;
228 pthread_mutex_unlock(&spc->spc_mtx);
229
230 dosig = 0;
231 for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 struct kevent kev[8];
234 int i;
235
236 /*
237 * typically we don't have a frame waiting
238 * when we come in here, so call kevent now
239 */
240 rv = host_kevent(kq, NULL, 0,
241 kev, __arraycount(kev), NULL);
242
243 if (__predict_false(rv == -1)) {
244 goto activity;
245 }
246
247 /*
248 * XXX: don't know how this can happen
249 * (timeout cannot expire since there
250 * isn't one), but it does happen.
251 * treat it as an expectional condition
252 * and go through tryread to determine
253 * alive status.
254 */
255 if (__predict_false(rv == 0))
256 goto activity;
257
258 for (i = 0; i < rv; i++) {
259 if (kev[i].filter == EVFILT_SIGNAL)
260 dosig++;
261 }
262 if (dosig)
263 goto cleanup;
264
265 /*
266 * ok, activity. try to read a frame to
267 * determine what happens next.
268 */
269 activity:
270 #else /* USE_KQUEUE */
271 struct pollfd pfd;
272
273 pfd.fd = clispc.spc_fd;
274 pfd.events = POLLIN;
275
276 rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278
279 switch (readframe(spc)) {
280 case 0:
281 continue;
282 case -1:
283 imalive = false;
284 goto cleanup;
285 default:
286 /* case 1 */
287 break;
288 }
289
290 switch (spc->spc_hdr.rsp_class) {
291 case RUMPSP_RESP:
292 case RUMPSP_ERROR:
293 kickwaiter(spc);
294 gotresp = spc->spc_hdr.rsp_reqno ==
295 rw->rw_reqno;
296 break;
297 case RUMPSP_REQ:
298 handlereq(spc);
299 break;
300 default:
301 /* panic */
302 break;
303 }
304 }
305
306 cleanup:
307 pthread_mutex_lock(&spc->spc_mtx);
308 if (spc->spc_istatus == SPCSTATUS_WANTED)
309 kickall(spc);
310 spc->spc_istatus = SPCSTATUS_FREE;
311
312 /* take one for the team */
313 if (dosig) {
314 pthread_mutex_unlock(&spc->spc_mtx);
315 pthread_sigmask(SIG_SETMASK, mask, NULL);
316 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 pthread_mutex_lock(&spc->spc_mtx);
318 }
319 } else {
320 spc->spc_istatus = SPCSTATUS_WANTED;
321 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 }
323 }
324 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 pthread_mutex_unlock(&spc->spc_mtx);
326 pthread_cond_destroy(&rw->rw_cv);
327
328 if (spc->spc_generation != mygen || !imalive) {
329 return ENOTCONN;
330 }
331 return rw->rw_error;
332 }
333
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 const void *data, size_t dlen, void **resp)
337 {
338 struct rsp_hdr rhdr;
339 struct respwait rw;
340 struct iovec iov[2];
341 int rv;
342
343 rhdr.rsp_len = sizeof(rhdr) + dlen;
344 rhdr.rsp_class = RUMPSP_REQ;
345 rhdr.rsp_type = RUMPSP_SYSCALL;
346 rhdr.rsp_sysnum = sysnum;
347
348 IOVPUT(iov[0], rhdr);
349 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350
351 do {
352 putwait(spc, &rw, &rhdr);
353 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 unputwait(spc, &rw);
355 continue;
356 }
357
358 rv = cliwaitresp(spc, &rw, omask, false);
359 if (rv == ENOTCONN)
360 rv = EAGAIN;
361 } while (rv == EAGAIN);
362
363 *resp = rw.rw_data;
364 return rv;
365 }
366
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 int cancel, bool haslock)
370 {
371 struct handshake_fork rf;
372 const char *myprogname = NULL; /* XXXgcc */
373 struct rsp_hdr rhdr;
374 struct respwait rw;
375 sigset_t omask;
376 size_t bonus;
377 struct iovec iov[2];
378 int rv;
379
380 if (type == HANDSHAKE_FORK) {
381 bonus = sizeof(rf);
382 } else {
383 #ifdef __NetBSD__
384 /* would procfs work on NetBSD too? */
385 myprogname = getprogname();
386 #else
387 int fd = open("/proc/self/comm", O_RDONLY);
388 if (fd == -1) {
389 myprogname = "???";
390 } else {
391 static char commname[128];
392
393 memset(commname, 0, sizeof(commname));
394 if (read(fd, commname, sizeof(commname)) > 0) {
395 char *n;
396
397 n = strrchr(commname, '\n');
398 if (n)
399 *n = '\0';
400 myprogname = commname;
401 } else {
402 myprogname = "???";
403 }
404 close(fd);
405 }
406 #endif
407 bonus = strlen(myprogname)+1;
408 }
409
410 /* performs server handshake */
411 rhdr.rsp_len = sizeof(rhdr) + bonus;
412 rhdr.rsp_class = RUMPSP_REQ;
413 rhdr.rsp_type = RUMPSP_HANDSHAKE;
414 rhdr.rsp_handshake = type;
415
416 IOVPUT(iov[0], rhdr);
417
418 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
419 if (haslock)
420 putwait_locked(spc, &rw, &rhdr);
421 else
422 putwait(spc, &rw, &rhdr);
423 if (type == HANDSHAKE_FORK) {
424 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
425 rf.rf_cancel = cancel;
426 IOVPUT(iov[1], rf);
427 } else {
428 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
429 }
430 rv = send_with_recon(spc, iov, __arraycount(iov));
431 if (rv || cancel) {
432 if (haslock)
433 unputwait_locked(spc, &rw);
434 else
435 unputwait(spc, &rw);
436 if (cancel) {
437 goto out;
438 }
439 } else {
440 rv = cliwaitresp(spc, &rw, &omask, haslock);
441 }
442 if (rv)
443 goto out;
444
445 rv = *(int *)rw.rw_data;
446 free(rw.rw_data);
447
448 out:
449 pthread_sigmask(SIG_SETMASK, &omask, NULL);
450 return rv;
451 }
452
453 static int
454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
455 {
456 struct rsp_hdr rhdr;
457 struct respwait rw;
458 struct iovec iov[1];
459 int rv;
460
461 rhdr.rsp_len = sizeof(rhdr);
462 rhdr.rsp_class = RUMPSP_REQ;
463 rhdr.rsp_type = RUMPSP_PREFORK;
464 rhdr.rsp_error = 0;
465
466 IOVPUT(iov[0], rhdr);
467
468 do {
469 putwait(spc, &rw, &rhdr);
470 rv = send_with_recon(spc, iov, __arraycount(iov));
471 if (rv != 0) {
472 unputwait(spc, &rw);
473 continue;
474 }
475
476 rv = cliwaitresp(spc, &rw, omask, false);
477 if (rv == ENOTCONN)
478 rv = EAGAIN;
479 } while (rv == EAGAIN);
480
481 *resp = rw.rw_data;
482 return rv;
483 }
484
485 /*
486 * prevent response code from deadlocking with reconnect code
487 */
488 static int
489 resp_sendlock(struct spclient *spc)
490 {
491 int rv = 0;
492
493 pthread_mutex_lock(&spc->spc_mtx);
494 while (spc->spc_ostatus != SPCSTATUS_FREE) {
495 if (__predict_false(spc->spc_reconnecting)) {
496 rv = EBUSY;
497 goto out;
498 }
499 spc->spc_ostatus = SPCSTATUS_WANTED;
500 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
501 }
502 spc->spc_ostatus = SPCSTATUS_BUSY;
503
504 out:
505 pthread_mutex_unlock(&spc->spc_mtx);
506 return rv;
507 }
508
509 static void
510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
511 int wantstr)
512 {
513 struct rsp_hdr rhdr;
514 struct iovec iov[2];
515
516 if (wantstr)
517 dlen = MIN(dlen, strlen(data)+1);
518
519 rhdr.rsp_len = sizeof(rhdr) + dlen;
520 rhdr.rsp_reqno = reqno;
521 rhdr.rsp_class = RUMPSP_RESP;
522 rhdr.rsp_type = RUMPSP_COPYIN;
523 rhdr.rsp_sysnum = 0;
524
525 IOVPUT(iov[0], rhdr);
526 IOVPUT_WITHSIZE(iov[1], data, dlen);
527
528 if (resp_sendlock(spc) != 0)
529 return;
530 (void)SENDIOV(spc, iov);
531 sendunlock(spc);
532 }
533
534 static void
535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
536 {
537 struct rsp_hdr rhdr;
538 struct iovec iov[2];
539
540 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
541 rhdr.rsp_reqno = reqno;
542 rhdr.rsp_class = RUMPSP_RESP;
543 rhdr.rsp_type = RUMPSP_ANONMMAP;
544 rhdr.rsp_sysnum = 0;
545
546 IOVPUT(iov[0], rhdr);
547 IOVPUT(iov[1], addr);
548
549 if (resp_sendlock(spc) != 0)
550 return;
551 (void)SENDIOV(spc, iov);
552 sendunlock(spc);
553 }
554
555 int
556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
557 register_t *retval)
558 {
559 struct rsp_sysresp *resp;
560 sigset_t omask;
561 void *rdata;
562 int rv;
563
564 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
565
566 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
567 sysnum, data, dlen));
568
569 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
570 if (rv)
571 goto out;
572
573 resp = rdata;
574 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
575 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
576
577 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
578 rv = resp->rsys_error;
579 free(rdata);
580
581 out:
582 pthread_sigmask(SIG_SETMASK, &omask, NULL);
583 return rv;
584 }
585
586 static void
587 handlereq(struct spclient *spc)
588 {
589 struct rsp_copydata *copydata;
590 struct rsp_hdr *rhdr = &spc->spc_hdr;
591 void *mapaddr;
592 size_t maplen;
593 int reqtype = spc->spc_hdr.rsp_type;
594
595 switch (reqtype) {
596 case RUMPSP_COPYIN:
597 case RUMPSP_COPYINSTR:
598 /*LINTED*/
599 copydata = (struct rsp_copydata *)spc->spc_buf;
600 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
601 copydata->rcp_addr, copydata->rcp_len));
602 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
603 copydata->rcp_addr, copydata->rcp_len,
604 reqtype == RUMPSP_COPYINSTR);
605 break;
606 case RUMPSP_COPYOUT:
607 case RUMPSP_COPYOUTSTR:
608 /*LINTED*/
609 copydata = (struct rsp_copydata *)spc->spc_buf;
610 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
611 copydata->rcp_addr, copydata->rcp_len));
612 /*LINTED*/
613 memcpy(copydata->rcp_addr, copydata->rcp_data,
614 copydata->rcp_len);
615 break;
616 case RUMPSP_ANONMMAP:
617 /*LINTED*/
618 maplen = *(size_t *)spc->spc_buf;
619 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
620 MAP_ANON, -1, 0);
621 if (mapaddr == MAP_FAILED)
622 mapaddr = NULL;
623 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
624 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
625 break;
626 case RUMPSP_RAISE:
627 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
628 raise((int)rhdr->rsp_signo);
629 /*
630 * We most likely have signals blocked, but the signal
631 * will be handled soon enough when we return.
632 */
633 break;
634 default:
635 printf("PANIC: INVALID TYPE %d\n", reqtype);
636 abort();
637 break;
638 }
639
640 spcfreebuf(spc);
641 }
642
643 static unsigned ptab_idx;
644 static struct sockaddr *serv_sa;
645
646 /* dup until we get a "good" fd which does not collide with stdio */
647 static int
648 dupgood(int myfd, int mustchange)
649 {
650 int ofds[4];
651 int sverrno;
652 unsigned int i;
653
654 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
655 assert(i < __arraycount(ofds));
656 ofds[i] = myfd;
657 myfd = host_dup(myfd);
658 if (mustchange) {
659 i--; /* prevent closing old fd */
660 mustchange = 0;
661 }
662 }
663
664 sverrno = 0;
665 if (myfd == -1 && i > 0)
666 sverrno = errno;
667
668 while (i-- > 0) {
669 host_close(ofds[i]);
670 }
671
672 if (sverrno)
673 errno = sverrno;
674
675 return myfd;
676 }
677
678 static int
679 doconnect(void)
680 {
681 struct respwait rw;
682 struct rsp_hdr rhdr;
683 char banner[MAXBANNER];
684 int s, error, flags;
685 ssize_t n;
686
687 if (kq != -1)
688 host_close(kq);
689 kq = -1;
690 s = -1;
691
692 if (clispc.spc_fd != -1)
693 host_close(clispc.spc_fd);
694 clispc.spc_fd = -1;
695
696 /*
697 * for reconnect, gate everyone out of the receiver code
698 */
699 putwait_locked(&clispc, &rw, &rhdr);
700
701 pthread_mutex_lock(&clispc.spc_mtx);
702 clispc.spc_reconnecting = 1;
703 pthread_cond_broadcast(&clispc.spc_cv);
704 clispc.spc_generation++;
705 while (clispc.spc_istatus != SPCSTATUS_FREE) {
706 clispc.spc_istatus = SPCSTATUS_WANTED;
707 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
708 }
709 kickall(&clispc);
710
711 /*
712 * we can release it already since we hold the
713 * send lock during reconnect
714 * XXX: assert it
715 */
716 clispc.spc_istatus = SPCSTATUS_FREE;
717 pthread_mutex_unlock(&clispc.spc_mtx);
718 unputwait_locked(&clispc, &rw);
719
720 free(clispc.spc_buf);
721 clispc.spc_off = 0;
722
723 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
724 if (s == -1)
725 return -1;
726
727 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
728 if (errno == EINTR)
729 continue;
730 ERRLOG(("rump_sp: client connect failed: %s\n",
731 strerror(errno)));
732 return -1;
733 }
734
735 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
736 ERRLOG(("rump_sp: connect hook failed\n"));
737 return -1;
738 }
739
740 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
741 ERRLOG(("rump_sp: failed to read banner\n"));
742 return -1;
743 }
744
745 if (banner[n-1] != '\n') {
746 ERRLOG(("rump_sp: invalid banner\n"));
747 return -1;
748 }
749 banner[n] = '\0';
750 /* XXX parse the banner some day */
751
752 flags = host_fcntl(s, F_GETFL, 0);
753 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
754 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
755 return -1;
756 }
757 clispc.spc_fd = s;
758 clispc.spc_state = SPCSTATE_RUNNING;
759 clispc.spc_reconnecting = 0;
760
761 #ifdef USE_KQUEUE
762 {
763 struct kevent kev[NSIG+1];
764 int i;
765
766 /* setup kqueue, we want all signals and the fd */
767 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
768 ERRLOG(("rump_sp: cannot setup kqueue"));
769 return -1;
770 }
771
772 for (i = 0; i < NSIG; i++) {
773 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
774 }
775 EV_SET(&kev[NSIG], clispc.spc_fd,
776 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
777 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
778 ERRLOG(("rump_sp: kevent() failed"));
779 return -1;
780 }
781 }
782 #endif /* USE_KQUEUE */
783
784 return 0;
785 }
786
787 static int
788 doinit(void)
789 {
790
791 TAILQ_INIT(&clispc.spc_respwait);
792 pthread_mutex_init(&clispc.spc_mtx, NULL);
793 pthread_cond_init(&clispc.spc_cv, NULL);
794
795 return 0;
796 }
797
798 void *rumpclient__dlsym(void *, const char *);
799 void *
800 rumpclient__dlsym(void *handle, const char *symbol)
801 {
802
803 return dlsym(handle, symbol);
804 }
805 void *rumphijack_dlsym(void *, const char *)
806 __attribute__((__weak__, alias("rumpclient__dlsym")));
807
808 static pid_t init_done = 0;
809
810 int
811 rumpclient_init(void)
812 {
813 char *p;
814 int error;
815 int rv = -1;
816 int hstype;
817 pid_t mypid;
818
819 /*
820 * Make sure we're not riding the context of a previous
821 * host fork. Note: it's *possible* that after n>1 forks
822 * we have the same pid as one of our exited parents, but
823 * I'm pretty sure there are 0 practical implications, since
824 * it means generations would have to skip rumpclient init.
825 */
826 if (init_done == (mypid = getpid()))
827 return 0;
828
829 /* kq does not traverse fork() */
830 if (init_done != 0)
831 kq = -1;
832 init_done = mypid;
833
834 sigfillset(&fullset);
835
836 /*
837 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
838 * wann wird man je verstehen? wann wird man je verstehen?
839 */
840 #define FINDSYM2(_name_,_syscall_) \
841 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
842 #_syscall_)) == NULL) { \
843 if (rumphijack_dlsym == rumpclient__dlsym) \
844 host_##_name_ = _name_; /* static fallback */ \
845 if (host_##_name_ == NULL) \
846 errx(1, "cannot find %s: %s", #_syscall_, \
847 dlerror()); \
848 }
849 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
850 #ifdef __NetBSD__
851 FINDSYM2(socket,__socket30)
852 #else
853 FINDSYM(socket)
854 #endif
855
856 FINDSYM(close)
857 FINDSYM(connect)
858 FINDSYM(fcntl)
859 FINDSYM(poll)
860 FINDSYM(read)
861 FINDSYM(sendmsg)
862 FINDSYM(setsockopt)
863 FINDSYM(dup)
864 FINDSYM(execve)
865
866 #ifdef USE_KQUEUE
867 FINDSYM(kqueue)
868 #if !__NetBSD_Prereq__(5,99,7)
869 FINDSYM(kevent)
870 #else
871 FINDSYM2(kevent,_sys___kevent50)
872 #endif
873 #endif /* USE_KQUEUE */
874
875 #undef FINDSYM
876 #undef FINDSY2
877
878 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
879 if ((p = getenv("RUMP_SERVER")) == NULL) {
880 errno = ENOENT;
881 goto out;
882 }
883 }
884
885 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
886 errno = error;
887 goto out;
888 }
889
890 if (doinit() == -1)
891 goto out;
892
893 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
894 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
895 unsetenv("RUMPCLIENT__EXECFD");
896 hstype = HANDSHAKE_EXEC;
897 } else {
898 if (doconnect() == -1)
899 goto out;
900 hstype = HANDSHAKE_GUEST;
901 }
902
903 error = handshake_req(&clispc, hstype, NULL, 0, false);
904 if (error) {
905 pthread_mutex_destroy(&clispc.spc_mtx);
906 pthread_cond_destroy(&clispc.spc_cv);
907 if (clispc.spc_fd != -1)
908 host_close(clispc.spc_fd);
909 errno = error;
910 goto out;
911 }
912 rv = 0;
913
914 out:
915 if (rv == -1)
916 init_done = 0;
917 return rv;
918 }
919
920 struct rumpclient_fork {
921 uint32_t fork_auth[AUTHLEN];
922 struct spclient fork_spc;
923 int fork_kq;
924 };
925
926 struct rumpclient_fork *
927 rumpclient_prefork(void)
928 {
929 struct rumpclient_fork *rpf;
930 sigset_t omask;
931 void *resp;
932 int rv;
933
934 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
935 rpf = malloc(sizeof(*rpf));
936 if (rpf == NULL)
937 goto out;
938
939 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
940 free(rpf);
941 errno = rv;
942 rpf = NULL;
943 goto out;
944 }
945
946 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
947 free(resp);
948
949 rpf->fork_spc = clispc;
950 rpf->fork_kq = kq;
951
952 out:
953 pthread_sigmask(SIG_SETMASK, &omask, NULL);
954 return rpf;
955 }
956
957 int
958 rumpclient_fork_init(struct rumpclient_fork *rpf)
959 {
960 int error;
961 int osock;
962
963 osock = clispc.spc_fd;
964 memset(&clispc, 0, sizeof(clispc));
965 clispc.spc_fd = osock;
966
967 kq = -1; /* kqueue descriptor is not copied over fork() */
968
969 if (doinit() == -1)
970 return -1;
971 if (doconnect() == -1)
972 return -1;
973
974 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
975 0, false);
976 if (error) {
977 pthread_mutex_destroy(&clispc.spc_mtx);
978 pthread_cond_destroy(&clispc.spc_cv);
979 errno = error;
980 return -1;
981 }
982
983 return 0;
984 }
985
986 /*ARGSUSED*/
987 void
988 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
989 {
990
991 /* EUNIMPL */
992 }
993
994 void
995 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
996 {
997
998 clispc = rpf->fork_spc;
999 kq = rpf->fork_kq;
1000 }
1001
1002 void
1003 rumpclient_setconnretry(time_t timeout)
1004 {
1005
1006 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1007 return; /* gigo */
1008
1009 retrytimo = timeout;
1010 }
1011
1012 int
1013 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1014 {
1015 int fd = *fdp;
1016 int untilfd, rv;
1017 int newfd;
1018
1019 switch (variant) {
1020 case RUMPCLIENT_CLOSE_FCLOSEM:
1021 untilfd = MAX(clispc.spc_fd, kq);
1022 for (; fd <= untilfd; fd++) {
1023 if (fd == clispc.spc_fd || fd == kq)
1024 continue;
1025 rv = host_close(fd);
1026 if (rv == -1)
1027 return -1;
1028 }
1029 *fdp = fd;
1030 break;
1031
1032 case RUMPCLIENT_CLOSE_CLOSE:
1033 case RUMPCLIENT_CLOSE_DUP2:
1034 if (fd == clispc.spc_fd) {
1035 newfd = dupgood(clispc.spc_fd, 1);
1036 if (newfd == -1)
1037 return -1;
1038
1039 #ifdef USE_KQUEUE
1040 {
1041 struct kevent kev[2];
1042
1043 /*
1044 * now, we have a new socket number, so change
1045 * the file descriptor that kqueue is
1046 * monitoring. remove old and add new.
1047 */
1048 EV_SET(&kev[0], clispc.spc_fd,
1049 EVFILT_READ, EV_DELETE, 0, 0, 0);
1050 EV_SET(&kev[1], newfd,
1051 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1052 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1053 int sverrno = errno;
1054 host_close(newfd);
1055 errno = sverrno;
1056 return -1;
1057 }
1058 clispc.spc_fd = newfd;
1059 }
1060 }
1061 if (fd == kq) {
1062 newfd = dupgood(kq, 1);
1063 if (newfd == -1)
1064 return -1;
1065 kq = newfd;
1066 #else /* USE_KQUEUE */
1067 clispc.spc_fd = newfd;
1068 #endif /* !USE_KQUEUE */
1069 }
1070 break;
1071 }
1072
1073 return 0;
1074 }
1075
1076 pid_t
1077 rumpclient_fork(void)
1078 {
1079
1080 return rumpclient__dofork(fork);
1081 }
1082
1083 /*
1084 * Process is about to exec. Save info about our existing connection
1085 * in the env. rumpclient will check for this info in init().
1086 * This is mostly for the benefit of rumphijack, but regular applications
1087 * may use it as well.
1088 */
1089 int
1090 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1091 {
1092 char buf[4096];
1093 char **newenv;
1094 char *envstr, *envstr2;
1095 size_t nelem;
1096 int rv, sverrno;
1097
1098 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1099 clispc.spc_fd, kq);
1100 envstr = malloc(strlen(buf)+1);
1101 if (envstr == NULL) {
1102 return ENOMEM;
1103 }
1104 strcpy(envstr, buf);
1105
1106 /* do we have a fully parsed url we want to forward in the env? */
1107 if (*parsedurl != '\0') {
1108 snprintf(buf, sizeof(buf),
1109 "RUMP__PARSEDSERVER=%s", parsedurl);
1110 envstr2 = malloc(strlen(buf)+1);
1111 if (envstr2 == NULL) {
1112 free(envstr);
1113 return ENOMEM;
1114 }
1115 strcpy(envstr2, buf);
1116 } else {
1117 envstr2 = NULL;
1118 }
1119
1120 for (nelem = 0; envp && envp[nelem]; nelem++)
1121 continue;
1122
1123 newenv = malloc(sizeof(*newenv) * (nelem+3));
1124 if (newenv == NULL) {
1125 free(envstr2);
1126 free(envstr);
1127 return ENOMEM;
1128 }
1129 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1130
1131 newenv[nelem] = envstr;
1132 newenv[nelem+1] = envstr2;
1133 newenv[nelem+2] = NULL;
1134
1135 rv = host_execve(path, argv, newenv);
1136
1137 _DIAGASSERT(rv != 0);
1138 sverrno = errno;
1139 free(envstr2);
1140 free(envstr);
1141 free(newenv);
1142 errno = sverrno;
1143 return rv;
1144 }
1145
1146 int
1147 rumpclient_daemon(int nochdir, int noclose)
1148 {
1149 struct rumpclient_fork *rf;
1150 int sverrno;
1151
1152 if ((rf = rumpclient_prefork()) == NULL)
1153 return -1;
1154
1155 if (daemon(nochdir, noclose) == -1) {
1156 sverrno = errno;
1157 rumpclient_fork_cancel(rf);
1158 errno = sverrno;
1159 return -1;
1160 }
1161
1162 if (rumpclient_fork_init(rf) == -1)
1163 return -1;
1164
1165 return 0;
1166 }
1167