rumpclient.c revision 1.49 1 /* $NetBSD: rumpclient.c,v 1.49 2012/08/03 11:31:34 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include "rumpuser_port.h"
33
34 /*
35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could
36 * use kqueue on other BSD's too, but I haven't tested those. We
37 * want to use kqueue because it will give us the ability to get signal
38 * notifications but defer their handling to a stage where we do not
39 * hold the communication lock. Taking a signal while holding on to
40 * that lock may cause a deadlock. Therefore, block signals throughout
41 * the RPC when using poll. This unfortunately means that the normal
42 * SIGINT way of stopping a process while it is undergoing rump kernel
43 * RPC will not work. If anyone know which Linux system call handles
44 * the above scenario correctly, I'm all ears.
45 */
46
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50
51 #include <sys/cdefs.h>
52 __RCSID("$NetBSD: rumpclient.c,v 1.49 2012/08/03 11:31:34 pooka Exp $");
53
54 #include <sys/param.h>
55 #include <sys/mman.h>
56 #include <sys/socket.h>
57 #include <sys/time.h>
58
59 #ifdef USE_KQUEUE
60 #include <sys/event.h>
61 #endif
62
63 #include <arpa/inet.h>
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66
67 #include <assert.h>
68 #include <dlfcn.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <link.h>
73 #include <poll.h>
74 #include <pthread.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdbool.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82
83 #include <rump/rumpclient.h>
84
85 #define HOSTOPS
86 int (*host_socket)(int, int, int);
87 int (*host_close)(int);
88 int (*host_connect)(int, const struct sockaddr *, socklen_t);
89 int (*host_fcntl)(int, int, ...);
90 int (*host_poll)(struct pollfd *, nfds_t, int);
91 ssize_t (*host_read)(int, void *, size_t);
92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
93 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
94 int (*host_dup)(int);
95
96 #ifdef USE_KQUEUE
97 int (*host_kqueue)(void);
98 int (*host_kevent)(int, const struct kevent *, size_t,
99 struct kevent *, size_t, const struct timespec *);
100 #endif
101
102 int (*host_execve)(const char *, char *const[], char *const[]);
103
104 #include "sp_common.c"
105
106 static struct spclient clispc = {
107 .spc_fd = -1,
108 };
109
110 static int kq = -1;
111 static sigset_t fullset;
112
113 static int doconnect(void);
114 static int handshake_req(struct spclient *, int, void *, int, bool);
115
116 /*
117 * Default: don't retry. Most clients can't handle it
118 * (consider e.g. fds suddenly going missing).
119 */
120 static time_t retrytimo = 0;
121
122 /* always defined to nothingness for now */
123 #define ERRLOG(a)
124
125 static int
126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
127 {
128 struct timeval starttime, curtime;
129 time_t prevreconmsg;
130 unsigned reconretries;
131 int rv;
132
133 for (prevreconmsg = 0, reconretries = 0;;) {
134 rv = dosend(spc, iov, iovlen);
135 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
136 /* no persistent connections */
137 if (retrytimo == 0) {
138 rv = ENOTCONN;
139 break;
140 }
141 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
142 _exit(1);
143
144 if (!prevreconmsg) {
145 prevreconmsg = time(NULL);
146 gettimeofday(&starttime, NULL);
147 }
148 if (reconretries == 1) {
149 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
150 rv = ENOTCONN;
151 break;
152 }
153 fprintf(stderr, "rump_sp: connection to "
154 "kernel lost, trying to reconnect ...\n");
155 } else if (time(NULL) - prevreconmsg > 120) {
156 fprintf(stderr, "rump_sp: still trying to "
157 "reconnect ...\n");
158 prevreconmsg = time(NULL);
159 }
160
161 /* check that we aren't over the limit */
162 if (retrytimo > 0) {
163 struct timeval tmp;
164
165 gettimeofday(&curtime, NULL);
166 timersub(&curtime, &starttime, &tmp);
167 if (tmp.tv_sec >= retrytimo) {
168 fprintf(stderr, "rump_sp: reconnect "
169 "failed, %lld second timeout\n",
170 (long long)retrytimo);
171 return ENOTCONN;
172 }
173 }
174
175 /* adhoc backoff timer */
176 if (reconretries < 10) {
177 usleep(100000 * reconretries);
178 } else {
179 sleep(MIN(10, reconretries-9));
180 }
181 reconretries++;
182
183 if ((rv = doconnect()) != 0)
184 continue;
185 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 NULL, 0, true)) != 0)
187 continue;
188
189 /*
190 * ok, reconnect succesful. we need to return to
191 * the upper layer to get the entire PDU resent.
192 */
193 if (reconretries != 1)
194 fprintf(stderr, "rump_sp: reconnected!\n");
195 rv = EAGAIN;
196 break;
197 } else {
198 _DIAGASSERT(errno != EAGAIN);
199 break;
200 }
201 }
202
203 return rv;
204 }
205
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 bool keeplock)
209 {
210 uint64_t mygen;
211 bool imalive = true;
212
213 pthread_mutex_lock(&spc->spc_mtx);
214 if (!keeplock)
215 sendunlockl(spc);
216 mygen = spc->spc_generation;
217
218 rw->rw_error = 0;
219 while (!rw->rw_done && rw->rw_error == 0) {
220 if (__predict_false(spc->spc_generation != mygen || !imalive))
221 break;
222
223 /* are we free to receive? */
224 if (spc->spc_istatus == SPCSTATUS_FREE) {
225 int gotresp, dosig, rv;
226
227 spc->spc_istatus = SPCSTATUS_BUSY;
228 pthread_mutex_unlock(&spc->spc_mtx);
229
230 dosig = 0;
231 for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 struct kevent kev[8];
234 int i;
235
236 /*
237 * typically we don't have a frame waiting
238 * when we come in here, so call kevent now
239 */
240 rv = host_kevent(kq, NULL, 0,
241 kev, __arraycount(kev), NULL);
242
243 if (__predict_false(rv == -1)) {
244 goto activity;
245 }
246
247 /*
248 * XXX: don't know how this can happen
249 * (timeout cannot expire since there
250 * isn't one), but it does happen.
251 * treat it as an expectional condition
252 * and go through tryread to determine
253 * alive status.
254 */
255 if (__predict_false(rv == 0))
256 goto activity;
257
258 for (i = 0; i < rv; i++) {
259 if (kev[i].filter == EVFILT_SIGNAL)
260 dosig++;
261 }
262 if (dosig)
263 goto cleanup;
264
265 /*
266 * ok, activity. try to read a frame to
267 * determine what happens next.
268 */
269 activity:
270 #else /* USE_KQUEUE */
271 struct pollfd pfd;
272
273 pfd.fd = clispc.spc_fd;
274 pfd.events = POLLIN;
275
276 rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278
279 switch (readframe(spc)) {
280 case 0:
281 continue;
282 case -1:
283 imalive = false;
284 goto cleanup;
285 default:
286 /* case 1 */
287 break;
288 }
289
290 switch (spc->spc_hdr.rsp_class) {
291 case RUMPSP_RESP:
292 case RUMPSP_ERROR:
293 kickwaiter(spc);
294 gotresp = spc->spc_hdr.rsp_reqno ==
295 rw->rw_reqno;
296 break;
297 case RUMPSP_REQ:
298 handlereq(spc);
299 break;
300 default:
301 /* panic */
302 break;
303 }
304 }
305
306 cleanup:
307 pthread_mutex_lock(&spc->spc_mtx);
308 if (spc->spc_istatus == SPCSTATUS_WANTED)
309 kickall(spc);
310 spc->spc_istatus = SPCSTATUS_FREE;
311
312 /* take one for the team */
313 if (dosig) {
314 pthread_mutex_unlock(&spc->spc_mtx);
315 pthread_sigmask(SIG_SETMASK, mask, NULL);
316 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 pthread_mutex_lock(&spc->spc_mtx);
318 }
319 } else {
320 spc->spc_istatus = SPCSTATUS_WANTED;
321 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 }
323 }
324 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 pthread_mutex_unlock(&spc->spc_mtx);
326 pthread_cond_destroy(&rw->rw_cv);
327
328 if (spc->spc_generation != mygen || !imalive) {
329 return ENOTCONN;
330 }
331 return rw->rw_error;
332 }
333
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 const void *data, size_t dlen, void **resp)
337 {
338 struct rsp_hdr rhdr;
339 struct respwait rw;
340 struct iovec iov[2];
341 int rv;
342
343 rhdr.rsp_len = sizeof(rhdr) + dlen;
344 rhdr.rsp_class = RUMPSP_REQ;
345 rhdr.rsp_type = RUMPSP_SYSCALL;
346 rhdr.rsp_sysnum = sysnum;
347
348 IOVPUT(iov[0], rhdr);
349 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350
351 do {
352 putwait(spc, &rw, &rhdr);
353 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 unputwait(spc, &rw);
355 continue;
356 }
357
358 rv = cliwaitresp(spc, &rw, omask, false);
359 if (rv == ENOTCONN)
360 rv = EAGAIN;
361 } while (rv == EAGAIN);
362
363 *resp = rw.rw_data;
364 return rv;
365 }
366
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 int cancel, bool haslock)
370 {
371 struct handshake_fork rf;
372 const char *myprogname = NULL; /* XXXgcc */
373 struct rsp_hdr rhdr;
374 struct respwait rw;
375 sigset_t omask;
376 size_t bonus;
377 struct iovec iov[2];
378 int rv;
379
380 if (type == HANDSHAKE_FORK) {
381 bonus = sizeof(rf);
382 } else {
383 #ifdef __NetBSD__
384 /* would procfs work on NetBSD too? */
385 myprogname = getprogname();
386 #else
387 int fd = open("/proc/self/comm", O_RDONLY);
388 if (fd == -1) {
389 myprogname = "???";
390 } else {
391 static char commname[128];
392
393 if (read(fd, commname, sizeof(commname)) > 0) {
394 char *n;
395
396 n = strrchr(commname, '\n');
397 if (n)
398 *n = '\0';
399 myprogname = commname;
400 } else {
401 myprogname = "???";
402 }
403 close(fd);
404 }
405 #endif
406 bonus = strlen(myprogname)+1;
407 }
408
409 /* performs server handshake */
410 rhdr.rsp_len = sizeof(rhdr) + bonus;
411 rhdr.rsp_class = RUMPSP_REQ;
412 rhdr.rsp_type = RUMPSP_HANDSHAKE;
413 rhdr.rsp_handshake = type;
414
415 IOVPUT(iov[0], rhdr);
416
417 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
418 if (haslock)
419 putwait_locked(spc, &rw, &rhdr);
420 else
421 putwait(spc, &rw, &rhdr);
422 if (type == HANDSHAKE_FORK) {
423 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
424 rf.rf_cancel = cancel;
425 IOVPUT(iov[1], rf);
426 } else {
427 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
428 }
429 rv = send_with_recon(spc, iov, __arraycount(iov));
430 if (rv || cancel) {
431 if (haslock)
432 unputwait_locked(spc, &rw);
433 else
434 unputwait(spc, &rw);
435 if (cancel) {
436 goto out;
437 }
438 } else {
439 rv = cliwaitresp(spc, &rw, &omask, haslock);
440 }
441 if (rv)
442 goto out;
443
444 rv = *(int *)rw.rw_data;
445 free(rw.rw_data);
446
447 out:
448 pthread_sigmask(SIG_SETMASK, &omask, NULL);
449 return rv;
450 }
451
452 static int
453 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
454 {
455 struct rsp_hdr rhdr;
456 struct respwait rw;
457 struct iovec iov[1];
458 int rv;
459
460 rhdr.rsp_len = sizeof(rhdr);
461 rhdr.rsp_class = RUMPSP_REQ;
462 rhdr.rsp_type = RUMPSP_PREFORK;
463 rhdr.rsp_error = 0;
464
465 IOVPUT(iov[0], rhdr);
466
467 do {
468 putwait(spc, &rw, &rhdr);
469 rv = send_with_recon(spc, iov, __arraycount(iov));
470 if (rv != 0) {
471 unputwait(spc, &rw);
472 continue;
473 }
474
475 rv = cliwaitresp(spc, &rw, omask, false);
476 if (rv == ENOTCONN)
477 rv = EAGAIN;
478 } while (rv == EAGAIN);
479
480 *resp = rw.rw_data;
481 return rv;
482 }
483
484 /*
485 * prevent response code from deadlocking with reconnect code
486 */
487 static int
488 resp_sendlock(struct spclient *spc)
489 {
490 int rv = 0;
491
492 pthread_mutex_lock(&spc->spc_mtx);
493 while (spc->spc_ostatus != SPCSTATUS_FREE) {
494 if (__predict_false(spc->spc_reconnecting)) {
495 rv = EBUSY;
496 goto out;
497 }
498 spc->spc_ostatus = SPCSTATUS_WANTED;
499 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
500 }
501 spc->spc_ostatus = SPCSTATUS_BUSY;
502
503 out:
504 pthread_mutex_unlock(&spc->spc_mtx);
505 return rv;
506 }
507
508 static void
509 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
510 int wantstr)
511 {
512 struct rsp_hdr rhdr;
513 struct iovec iov[2];
514
515 if (wantstr)
516 dlen = MIN(dlen, strlen(data)+1);
517
518 rhdr.rsp_len = sizeof(rhdr) + dlen;
519 rhdr.rsp_reqno = reqno;
520 rhdr.rsp_class = RUMPSP_RESP;
521 rhdr.rsp_type = RUMPSP_COPYIN;
522 rhdr.rsp_sysnum = 0;
523
524 IOVPUT(iov[0], rhdr);
525 IOVPUT_WITHSIZE(iov[1], data, dlen);
526
527 if (resp_sendlock(spc) != 0)
528 return;
529 (void)SENDIOV(spc, iov);
530 sendunlock(spc);
531 }
532
533 static void
534 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
535 {
536 struct rsp_hdr rhdr;
537 struct iovec iov[2];
538
539 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
540 rhdr.rsp_reqno = reqno;
541 rhdr.rsp_class = RUMPSP_RESP;
542 rhdr.rsp_type = RUMPSP_ANONMMAP;
543 rhdr.rsp_sysnum = 0;
544
545 IOVPUT(iov[0], rhdr);
546 IOVPUT(iov[1], addr);
547
548 if (resp_sendlock(spc) != 0)
549 return;
550 (void)SENDIOV(spc, iov);
551 sendunlock(spc);
552 }
553
554 int
555 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
556 register_t *retval)
557 {
558 struct rsp_sysresp *resp;
559 sigset_t omask;
560 void *rdata;
561 int rv;
562
563 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
564
565 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
566 sysnum, data, dlen));
567
568 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
569 if (rv)
570 goto out;
571
572 resp = rdata;
573 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
574 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
575
576 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
577 rv = resp->rsys_error;
578 free(rdata);
579
580 out:
581 pthread_sigmask(SIG_SETMASK, &omask, NULL);
582 return rv;
583 }
584
585 static void
586 handlereq(struct spclient *spc)
587 {
588 struct rsp_copydata *copydata;
589 struct rsp_hdr *rhdr = &spc->spc_hdr;
590 void *mapaddr;
591 size_t maplen;
592 int reqtype = spc->spc_hdr.rsp_type;
593
594 switch (reqtype) {
595 case RUMPSP_COPYIN:
596 case RUMPSP_COPYINSTR:
597 /*LINTED*/
598 copydata = (struct rsp_copydata *)spc->spc_buf;
599 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
600 copydata->rcp_addr, copydata->rcp_len));
601 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
602 copydata->rcp_addr, copydata->rcp_len,
603 reqtype == RUMPSP_COPYINSTR);
604 break;
605 case RUMPSP_COPYOUT:
606 case RUMPSP_COPYOUTSTR:
607 /*LINTED*/
608 copydata = (struct rsp_copydata *)spc->spc_buf;
609 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
610 copydata->rcp_addr, copydata->rcp_len));
611 /*LINTED*/
612 memcpy(copydata->rcp_addr, copydata->rcp_data,
613 copydata->rcp_len);
614 break;
615 case RUMPSP_ANONMMAP:
616 /*LINTED*/
617 maplen = *(size_t *)spc->spc_buf;
618 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
619 MAP_ANON, -1, 0);
620 if (mapaddr == MAP_FAILED)
621 mapaddr = NULL;
622 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
623 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
624 break;
625 case RUMPSP_RAISE:
626 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
627 raise((int)rhdr->rsp_signo);
628 /*
629 * We most likely have signals blocked, but the signal
630 * will be handled soon enough when we return.
631 */
632 break;
633 default:
634 printf("PANIC: INVALID TYPE %d\n", reqtype);
635 abort();
636 break;
637 }
638
639 spcfreebuf(spc);
640 }
641
642 static unsigned ptab_idx;
643 static struct sockaddr *serv_sa;
644
645 /* dup until we get a "good" fd which does not collide with stdio */
646 static int
647 dupgood(int myfd, int mustchange)
648 {
649 int ofds[4];
650 int sverrno;
651 unsigned int i;
652
653 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
654 assert(i < __arraycount(ofds));
655 ofds[i] = myfd;
656 myfd = host_dup(myfd);
657 if (mustchange) {
658 i--; /* prevent closing old fd */
659 mustchange = 0;
660 }
661 }
662
663 sverrno = 0;
664 if (myfd == -1 && i > 0)
665 sverrno = errno;
666
667 while (i-- > 0) {
668 host_close(ofds[i]);
669 }
670
671 if (sverrno)
672 errno = sverrno;
673
674 return myfd;
675 }
676
677 static int
678 doconnect(void)
679 {
680 struct respwait rw;
681 struct rsp_hdr rhdr;
682 char banner[MAXBANNER];
683 struct pollfd pfd;
684 int s, error, flags;
685 ssize_t n;
686
687 if (kq != -1)
688 host_close(kq);
689 kq = -1;
690 s = -1;
691
692 if (clispc.spc_fd != -1)
693 host_close(clispc.spc_fd);
694 clispc.spc_fd = -1;
695
696 /*
697 * for reconnect, gate everyone out of the receiver code
698 */
699 putwait_locked(&clispc, &rw, &rhdr);
700
701 pthread_mutex_lock(&clispc.spc_mtx);
702 clispc.spc_reconnecting = 1;
703 pthread_cond_broadcast(&clispc.spc_cv);
704 clispc.spc_generation++;
705 while (clispc.spc_istatus != SPCSTATUS_FREE) {
706 clispc.spc_istatus = SPCSTATUS_WANTED;
707 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
708 }
709 kickall(&clispc);
710
711 /*
712 * we can release it already since we hold the
713 * send lock during reconnect
714 * XXX: assert it
715 */
716 clispc.spc_istatus = SPCSTATUS_FREE;
717 pthread_mutex_unlock(&clispc.spc_mtx);
718 unputwait_locked(&clispc, &rw);
719
720 free(clispc.spc_buf);
721 clispc.spc_off = 0;
722
723 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
724 if (s == -1)
725 return -1;
726
727 pfd.fd = s;
728 pfd.events = POLLIN;
729 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
730 if (errno == EINTR)
731 continue;
732 ERRLOG(("rump_sp: client connect failed: %s\n",
733 strerror(errno)));
734 return -1;
735 }
736
737 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
738 ERRLOG(("rump_sp: connect hook failed\n"));
739 return -1;
740 }
741
742 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
743 ERRLOG(("rump_sp: failed to read banner\n"));
744 return -1;
745 }
746
747 if (banner[n-1] != '\n') {
748 ERRLOG(("rump_sp: invalid banner\n"));
749 return -1;
750 }
751 banner[n] = '\0';
752 /* XXX parse the banner some day */
753
754 flags = host_fcntl(s, F_GETFL, 0);
755 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
756 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
757 return -1;
758 }
759 clispc.spc_fd = s;
760 clispc.spc_state = SPCSTATE_RUNNING;
761 clispc.spc_reconnecting = 0;
762
763 #ifdef USE_KQUEUE
764 {
765 struct kevent kev[NSIG+1];
766 int i;
767
768 /* setup kqueue, we want all signals and the fd */
769 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
770 ERRLOG(("rump_sp: cannot setup kqueue"));
771 return -1;
772 }
773
774 for (i = 0; i < NSIG; i++) {
775 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
776 }
777 EV_SET(&kev[NSIG], clispc.spc_fd,
778 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
779 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
780 ERRLOG(("rump_sp: kevent() failed"));
781 return -1;
782 }
783 }
784 #endif /* USE_KQUEUE */
785
786 return 0;
787 }
788
789 static int
790 doinit(void)
791 {
792
793 TAILQ_INIT(&clispc.spc_respwait);
794 pthread_mutex_init(&clispc.spc_mtx, NULL);
795 pthread_cond_init(&clispc.spc_cv, NULL);
796
797 return 0;
798 }
799
800 void *rumpclient__dlsym(void *, const char *);
801 void *
802 rumpclient__dlsym(void *handle, const char *symbol)
803 {
804
805 return dlsym(handle, symbol);
806 }
807 void *rumphijack_dlsym(void *, const char *)
808 __attribute__((__weak__, alias("rumpclient__dlsym")));
809
810 static pid_t init_done = 0;
811
812 int
813 rumpclient_init(void)
814 {
815 char *p;
816 int error;
817 int rv = -1;
818 int hstype;
819 pid_t mypid;
820
821 /*
822 * Make sure we're not riding the context of a previous
823 * host fork. Note: it's *possible* that after n>1 forks
824 * we have the same pid as one of our exited parents, but
825 * I'm pretty sure there are 0 practical implications, since
826 * it means generations would have to skip rumpclient init.
827 */
828 if (init_done == (mypid = getpid()))
829 return 0;
830
831 /* kq does not traverse fork() */
832 if (init_done != 0)
833 kq = -1;
834 init_done = mypid;
835
836 sigfillset(&fullset);
837
838 /*
839 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
840 * wann wird man je verstehen? wann wird man je verstehen?
841 */
842 #define FINDSYM2(_name_,_syscall_) \
843 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
844 #_syscall_)) == NULL) { \
845 if (rumphijack_dlsym == rumpclient__dlsym) \
846 host_##_name_ = _name_; /* static fallback */ \
847 if (host_##_name_ == NULL) \
848 errx(1, "cannot find %s: %s", #_syscall_, \
849 dlerror()); \
850 }
851 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
852 #ifdef __NetBSD__
853 FINDSYM2(socket,__socket30)
854 #else
855 FINDSYM(socket)
856 #endif
857
858 FINDSYM(close)
859 FINDSYM(connect)
860 FINDSYM(fcntl)
861 FINDSYM(poll)
862 FINDSYM(read)
863 FINDSYM(sendmsg)
864 FINDSYM(setsockopt)
865 FINDSYM(dup)
866 FINDSYM(execve)
867
868 #ifdef USE_KQUEUE
869 FINDSYM(kqueue)
870 #if !__NetBSD_Prereq__(5,99,7)
871 FINDSYM(kevent)
872 #else
873 FINDSYM2(kevent,_sys___kevent50)
874 #endif
875 #endif /* USE_KQUEUE */
876
877 #undef FINDSYM
878 #undef FINDSY2
879
880 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
881 if ((p = getenv("RUMP_SERVER")) == NULL) {
882 errno = ENOENT;
883 goto out;
884 }
885 }
886
887 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
888 errno = error;
889 goto out;
890 }
891
892 if (doinit() == -1)
893 goto out;
894
895 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
896 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
897 unsetenv("RUMPCLIENT__EXECFD");
898 hstype = HANDSHAKE_EXEC;
899 } else {
900 if (doconnect() == -1)
901 goto out;
902 hstype = HANDSHAKE_GUEST;
903 }
904
905 error = handshake_req(&clispc, hstype, NULL, 0, false);
906 if (error) {
907 pthread_mutex_destroy(&clispc.spc_mtx);
908 pthread_cond_destroy(&clispc.spc_cv);
909 if (clispc.spc_fd != -1)
910 host_close(clispc.spc_fd);
911 errno = error;
912 goto out;
913 }
914 rv = 0;
915
916 out:
917 if (rv == -1)
918 init_done = 0;
919 return rv;
920 }
921
922 struct rumpclient_fork {
923 uint32_t fork_auth[AUTHLEN];
924 struct spclient fork_spc;
925 int fork_kq;
926 };
927
928 struct rumpclient_fork *
929 rumpclient_prefork(void)
930 {
931 struct rumpclient_fork *rpf;
932 sigset_t omask;
933 void *resp;
934 int rv;
935
936 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
937 rpf = malloc(sizeof(*rpf));
938 if (rpf == NULL)
939 goto out;
940
941 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
942 free(rpf);
943 errno = rv;
944 rpf = NULL;
945 goto out;
946 }
947
948 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
949 free(resp);
950
951 rpf->fork_spc = clispc;
952 rpf->fork_kq = kq;
953
954 out:
955 pthread_sigmask(SIG_SETMASK, &omask, NULL);
956 return rpf;
957 }
958
959 int
960 rumpclient_fork_init(struct rumpclient_fork *rpf)
961 {
962 int error;
963 int osock;
964
965 osock = clispc.spc_fd;
966 memset(&clispc, 0, sizeof(clispc));
967 clispc.spc_fd = osock;
968
969 kq = -1; /* kqueue descriptor is not copied over fork() */
970
971 if (doinit() == -1)
972 return -1;
973 if (doconnect() == -1)
974 return -1;
975
976 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
977 0, false);
978 if (error) {
979 pthread_mutex_destroy(&clispc.spc_mtx);
980 pthread_cond_destroy(&clispc.spc_cv);
981 errno = error;
982 return -1;
983 }
984
985 return 0;
986 }
987
988 /*ARGSUSED*/
989 void
990 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
991 {
992
993 /* EUNIMPL */
994 }
995
996 void
997 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
998 {
999
1000 clispc = rpf->fork_spc;
1001 kq = rpf->fork_kq;
1002 }
1003
1004 void
1005 rumpclient_setconnretry(time_t timeout)
1006 {
1007
1008 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1009 return; /* gigo */
1010
1011 retrytimo = timeout;
1012 }
1013
1014 int
1015 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1016 {
1017 int fd = *fdp;
1018 int untilfd, rv;
1019 int newfd;
1020
1021 switch (variant) {
1022 case RUMPCLIENT_CLOSE_FCLOSEM:
1023 untilfd = MAX(clispc.spc_fd, kq);
1024 for (; fd <= untilfd; fd++) {
1025 if (fd == clispc.spc_fd || fd == kq)
1026 continue;
1027 rv = host_close(fd);
1028 if (rv == -1)
1029 return -1;
1030 }
1031 *fdp = fd;
1032 break;
1033
1034 case RUMPCLIENT_CLOSE_CLOSE:
1035 case RUMPCLIENT_CLOSE_DUP2:
1036 if (fd == clispc.spc_fd) {
1037 newfd = dupgood(clispc.spc_fd, 1);
1038 if (newfd == -1)
1039 return -1;
1040
1041 #ifdef USE_KQUEUE
1042 {
1043 struct kevent kev[2];
1044
1045 /*
1046 * now, we have a new socket number, so change
1047 * the file descriptor that kqueue is
1048 * monitoring. remove old and add new.
1049 */
1050 EV_SET(&kev[0], clispc.spc_fd,
1051 EVFILT_READ, EV_DELETE, 0, 0, 0);
1052 EV_SET(&kev[1], newfd,
1053 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1054 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1055 int sverrno = errno;
1056 host_close(newfd);
1057 errno = sverrno;
1058 return -1;
1059 }
1060 clispc.spc_fd = newfd;
1061 }
1062 }
1063 if (fd == kq) {
1064 newfd = dupgood(kq, 1);
1065 if (newfd == -1)
1066 return -1;
1067 kq = newfd;
1068 #else /* USE_KQUEUE */
1069 clispc.spc_fd = newfd;
1070 #endif /* !USE_KQUEUE */
1071 }
1072 break;
1073 }
1074
1075 return 0;
1076 }
1077
1078 pid_t
1079 rumpclient_fork(void)
1080 {
1081
1082 return rumpclient__dofork(fork);
1083 }
1084
1085 /*
1086 * Process is about to exec. Save info about our existing connection
1087 * in the env. rumpclient will check for this info in init().
1088 * This is mostly for the benefit of rumphijack, but regular applications
1089 * may use it as well.
1090 */
1091 int
1092 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1093 {
1094 char buf[4096];
1095 char **newenv;
1096 char *envstr, *envstr2;
1097 size_t nelem;
1098 int rv, sverrno;
1099
1100 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1101 clispc.spc_fd, kq);
1102 envstr = malloc(strlen(buf)+1);
1103 if (envstr == NULL) {
1104 return ENOMEM;
1105 }
1106 strcpy(envstr, buf);
1107
1108 /* do we have a fully parsed url we want to forward in the env? */
1109 if (*parsedurl != '\0') {
1110 snprintf(buf, sizeof(buf),
1111 "RUMP__PARSEDSERVER=%s", parsedurl);
1112 envstr2 = malloc(strlen(buf)+1);
1113 if (envstr2 == NULL) {
1114 free(envstr);
1115 return ENOMEM;
1116 }
1117 strcpy(envstr2, buf);
1118 } else {
1119 envstr2 = NULL;
1120 }
1121
1122 for (nelem = 0; envp && envp[nelem]; nelem++)
1123 continue;
1124
1125 newenv = malloc(sizeof(*newenv) * (nelem+3));
1126 if (newenv == NULL) {
1127 free(envstr2);
1128 free(envstr);
1129 return ENOMEM;
1130 }
1131 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1132
1133 newenv[nelem] = envstr;
1134 newenv[nelem+1] = envstr2;
1135 newenv[nelem+2] = NULL;
1136
1137 rv = host_execve(path, argv, newenv);
1138
1139 _DIAGASSERT(rv != 0);
1140 sverrno = errno;
1141 free(envstr2);
1142 free(envstr);
1143 free(newenv);
1144 errno = sverrno;
1145 return rv;
1146 }
1147
1148 int
1149 rumpclient_daemon(int nochdir, int noclose)
1150 {
1151 struct rumpclient_fork *rf;
1152 int sverrno;
1153
1154 if ((rf = rumpclient_prefork()) == NULL)
1155 return -1;
1156
1157 if (daemon(nochdir, noclose) == -1) {
1158 sverrno = errno;
1159 rumpclient_fork_cancel(rf);
1160 errno = sverrno;
1161 return -1;
1162 }
1163
1164 if (rumpclient_fork_init(rf) == -1)
1165 return -1;
1166
1167 return 0;
1168 }
1169