rumpclient.c revision 1.45.4.3 1 /* $NetBSD: rumpclient.c,v 1.45.4.3 2013/01/23 00:05:26 yamt Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include "rumpuser_port.h"
33
34 /*
35 * We use kqueue on NetBSD, poll elsewhere. Theoretically we could
36 * use kqueue on other BSD's too, but I haven't tested those. We
37 * want to use kqueue because it will give us the ability to get signal
38 * notifications but defer their handling to a stage where we do not
39 * hold the communication lock. Taking a signal while holding on to
40 * that lock may cause a deadlock. Therefore, block signals throughout
41 * the RPC when using poll. This unfortunately means that the normal
42 * SIGINT way of stopping a process while it is undergoing rump kernel
43 * RPC will not work. If anyone know which Linux system call handles
44 * the above scenario correctly, I'm all ears.
45 */
46
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50
51 __RCSID("$NetBSD: rumpclient.c,v 1.45.4.3 2013/01/23 00:05:26 yamt Exp $");
52
53 #include <sys/param.h>
54 #include <sys/mman.h>
55 #include <sys/socket.h>
56 #include <sys/time.h>
57
58 #ifdef USE_KQUEUE
59 #include <sys/event.h>
60 #endif
61
62 #include <arpa/inet.h>
63 #include <netinet/in.h>
64 #include <netinet/tcp.h>
65
66 #include <assert.h>
67 #include <dlfcn.h>
68 #include <errno.h>
69 #include <fcntl.h>
70 #include <poll.h>
71 #include <pthread.h>
72 #include <signal.h>
73 #include <stdarg.h>
74 #include <stdbool.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <unistd.h>
79
80 #include <rump/rumpclient.h>
81
82 #define HOSTOPS
83 int (*host_socket)(int, int, int);
84 int (*host_close)(int);
85 int (*host_connect)(int, const struct sockaddr *, socklen_t);
86 int (*host_fcntl)(int, int, ...);
87 int (*host_poll)(struct pollfd *, nfds_t, int);
88 ssize_t (*host_read)(int, void *, size_t);
89 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
90 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
91 int (*host_dup)(int);
92
93 #ifdef USE_KQUEUE
94 int (*host_kqueue)(void);
95 int (*host_kevent)(int, const struct kevent *, size_t,
96 struct kevent *, size_t, const struct timespec *);
97 #endif
98
99 int (*host_execve)(const char *, char *const[], char *const[]);
100
101 #include "sp_common.c"
102
103 static struct spclient clispc = {
104 .spc_fd = -1,
105 };
106
107 static int kq = -1;
108 static sigset_t fullset;
109
110 static int doconnect(void);
111 static int handshake_req(struct spclient *, int, void *, int, bool);
112
113 /*
114 * Default: don't retry. Most clients can't handle it
115 * (consider e.g. fds suddenly going missing).
116 */
117 static time_t retrytimo = 0;
118
119 /* always defined to nothingness for now */
120 #define ERRLOG(a)
121
122 static int
123 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
124 {
125 struct timeval starttime, curtime;
126 time_t prevreconmsg;
127 unsigned reconretries;
128 int rv;
129
130 for (prevreconmsg = 0, reconretries = 0;;) {
131 rv = dosend(spc, iov, iovlen);
132 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
133 /* no persistent connections */
134 if (retrytimo == 0) {
135 rv = ENOTCONN;
136 break;
137 }
138 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
139 _exit(1);
140
141 if (!prevreconmsg) {
142 prevreconmsg = time(NULL);
143 gettimeofday(&starttime, NULL);
144 }
145 if (reconretries == 1) {
146 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
147 rv = ENOTCONN;
148 break;
149 }
150 fprintf(stderr, "rump_sp: connection to "
151 "kernel lost, trying to reconnect ...\n");
152 } else if (time(NULL) - prevreconmsg > 120) {
153 fprintf(stderr, "rump_sp: still trying to "
154 "reconnect ...\n");
155 prevreconmsg = time(NULL);
156 }
157
158 /* check that we aren't over the limit */
159 if (retrytimo > 0) {
160 time_t tdiff;
161
162 gettimeofday(&curtime, NULL);
163 tdiff = curtime.tv_sec - starttime.tv_sec;
164 if (starttime.tv_usec > curtime.tv_usec)
165 tdiff--;
166 if (tdiff >= retrytimo) {
167 fprintf(stderr, "rump_sp: reconnect "
168 "failed, %lld second timeout\n",
169 (long long)retrytimo);
170 return ENOTCONN;
171 }
172 }
173
174 /* adhoc backoff timer */
175 if (reconretries < 10) {
176 usleep(100000 * reconretries);
177 } else {
178 sleep(MIN(10, reconretries-9));
179 }
180 reconretries++;
181
182 if ((rv = doconnect()) != 0)
183 continue;
184 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
185 NULL, 0, true)) != 0)
186 continue;
187
188 /*
189 * ok, reconnect succesful. we need to return to
190 * the upper layer to get the entire PDU resent.
191 */
192 if (reconretries != 1)
193 fprintf(stderr, "rump_sp: reconnected!\n");
194 rv = EAGAIN;
195 break;
196 } else {
197 _DIAGASSERT(errno != EAGAIN);
198 break;
199 }
200 }
201
202 return rv;
203 }
204
205 static int
206 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
207 bool keeplock)
208 {
209 uint64_t mygen;
210 bool imalive = true;
211
212 pthread_mutex_lock(&spc->spc_mtx);
213 if (!keeplock)
214 sendunlockl(spc);
215 mygen = spc->spc_generation;
216
217 rw->rw_error = 0;
218 while (!rw->rw_done && rw->rw_error == 0) {
219 if (__predict_false(spc->spc_generation != mygen || !imalive))
220 break;
221
222 /* are we free to receive? */
223 if (spc->spc_istatus == SPCSTATUS_FREE) {
224 int gotresp, dosig, rv;
225
226 spc->spc_istatus = SPCSTATUS_BUSY;
227 pthread_mutex_unlock(&spc->spc_mtx);
228
229 dosig = 0;
230 for (gotresp = 0; !gotresp; ) {
231 #ifdef USE_KQUEUE
232 struct kevent kev[8];
233 int i;
234
235 /*
236 * typically we don't have a frame waiting
237 * when we come in here, so call kevent now
238 */
239 rv = host_kevent(kq, NULL, 0,
240 kev, __arraycount(kev), NULL);
241
242 if (__predict_false(rv == -1)) {
243 goto activity;
244 }
245
246 /*
247 * XXX: don't know how this can happen
248 * (timeout cannot expire since there
249 * isn't one), but it does happen.
250 * treat it as an expectional condition
251 * and go through tryread to determine
252 * alive status.
253 */
254 if (__predict_false(rv == 0))
255 goto activity;
256
257 for (i = 0; i < rv; i++) {
258 if (kev[i].filter == EVFILT_SIGNAL)
259 dosig++;
260 }
261 if (dosig)
262 goto cleanup;
263
264 /*
265 * ok, activity. try to read a frame to
266 * determine what happens next.
267 */
268 activity:
269 #else /* USE_KQUEUE */
270 struct pollfd pfd;
271
272 pfd.fd = clispc.spc_fd;
273 pfd.events = POLLIN;
274
275 rv = host_poll(&pfd, 1, -1);
276 #endif /* !USE_KQUEUE */
277
278 switch (readframe(spc)) {
279 case 0:
280 continue;
281 case -1:
282 imalive = false;
283 goto cleanup;
284 default:
285 /* case 1 */
286 break;
287 }
288
289 switch (spc->spc_hdr.rsp_class) {
290 case RUMPSP_RESP:
291 case RUMPSP_ERROR:
292 kickwaiter(spc);
293 gotresp = spc->spc_hdr.rsp_reqno ==
294 rw->rw_reqno;
295 break;
296 case RUMPSP_REQ:
297 handlereq(spc);
298 break;
299 default:
300 /* panic */
301 break;
302 }
303 }
304
305 cleanup:
306 pthread_mutex_lock(&spc->spc_mtx);
307 if (spc->spc_istatus == SPCSTATUS_WANTED)
308 kickall(spc);
309 spc->spc_istatus = SPCSTATUS_FREE;
310
311 /* take one for the team */
312 if (dosig) {
313 pthread_mutex_unlock(&spc->spc_mtx);
314 pthread_sigmask(SIG_SETMASK, mask, NULL);
315 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
316 pthread_mutex_lock(&spc->spc_mtx);
317 }
318 } else {
319 spc->spc_istatus = SPCSTATUS_WANTED;
320 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
321 }
322 }
323 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
324 pthread_mutex_unlock(&spc->spc_mtx);
325 pthread_cond_destroy(&rw->rw_cv);
326
327 if (spc->spc_generation != mygen || !imalive) {
328 return ENOTCONN;
329 }
330 return rw->rw_error;
331 }
332
333 static int
334 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
335 const void *data, size_t dlen, void **resp)
336 {
337 struct rsp_hdr rhdr;
338 struct respwait rw;
339 struct iovec iov[2];
340 int rv;
341
342 rhdr.rsp_len = sizeof(rhdr) + dlen;
343 rhdr.rsp_class = RUMPSP_REQ;
344 rhdr.rsp_type = RUMPSP_SYSCALL;
345 rhdr.rsp_sysnum = sysnum;
346
347 IOVPUT(iov[0], rhdr);
348 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
349
350 do {
351 putwait(spc, &rw, &rhdr);
352 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
353 unputwait(spc, &rw);
354 continue;
355 }
356
357 rv = cliwaitresp(spc, &rw, omask, false);
358 if (rv == ENOTCONN)
359 rv = EAGAIN;
360 } while (rv == EAGAIN);
361
362 *resp = rw.rw_data;
363 return rv;
364 }
365
366 static int
367 handshake_req(struct spclient *spc, int type, void *data,
368 int cancel, bool haslock)
369 {
370 struct handshake_fork rf;
371 const char *myprogname = NULL; /* XXXgcc */
372 struct rsp_hdr rhdr;
373 struct respwait rw;
374 sigset_t omask;
375 size_t bonus;
376 struct iovec iov[2];
377 int rv;
378
379 if (type == HANDSHAKE_FORK) {
380 bonus = sizeof(rf);
381 } else {
382 #ifdef __NetBSD__
383 /* would procfs work on NetBSD too? */
384 myprogname = getprogname();
385 #else
386 int fd = open("/proc/self/comm", O_RDONLY);
387 if (fd == -1) {
388 myprogname = "???";
389 } else {
390 static char commname[128];
391
392 memset(commname, 0, sizeof(commname));
393 if (read(fd, commname, sizeof(commname)) > 0) {
394 char *n;
395
396 n = strrchr(commname, '\n');
397 if (n)
398 *n = '\0';
399 myprogname = commname;
400 } else {
401 myprogname = "???";
402 }
403 close(fd);
404 }
405 #endif
406 bonus = strlen(myprogname)+1;
407 }
408
409 /* performs server handshake */
410 rhdr.rsp_len = sizeof(rhdr) + bonus;
411 rhdr.rsp_class = RUMPSP_REQ;
412 rhdr.rsp_type = RUMPSP_HANDSHAKE;
413 rhdr.rsp_handshake = type;
414
415 IOVPUT(iov[0], rhdr);
416
417 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
418 if (haslock)
419 putwait_locked(spc, &rw, &rhdr);
420 else
421 putwait(spc, &rw, &rhdr);
422 if (type == HANDSHAKE_FORK) {
423 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
424 rf.rf_cancel = cancel;
425 IOVPUT(iov[1], rf);
426 } else {
427 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
428 }
429 rv = send_with_recon(spc, iov, __arraycount(iov));
430 if (rv || cancel) {
431 if (haslock)
432 unputwait_locked(spc, &rw);
433 else
434 unputwait(spc, &rw);
435 if (cancel) {
436 goto out;
437 }
438 } else {
439 rv = cliwaitresp(spc, &rw, &omask, haslock);
440 }
441 if (rv)
442 goto out;
443
444 rv = *(int *)rw.rw_data;
445 free(rw.rw_data);
446
447 out:
448 pthread_sigmask(SIG_SETMASK, &omask, NULL);
449 return rv;
450 }
451
452 static int
453 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
454 {
455 struct rsp_hdr rhdr;
456 struct respwait rw;
457 struct iovec iov[1];
458 int rv;
459
460 rhdr.rsp_len = sizeof(rhdr);
461 rhdr.rsp_class = RUMPSP_REQ;
462 rhdr.rsp_type = RUMPSP_PREFORK;
463 rhdr.rsp_error = 0;
464
465 IOVPUT(iov[0], rhdr);
466
467 do {
468 putwait(spc, &rw, &rhdr);
469 rv = send_with_recon(spc, iov, __arraycount(iov));
470 if (rv != 0) {
471 unputwait(spc, &rw);
472 continue;
473 }
474
475 rv = cliwaitresp(spc, &rw, omask, false);
476 if (rv == ENOTCONN)
477 rv = EAGAIN;
478 } while (rv == EAGAIN);
479
480 *resp = rw.rw_data;
481 return rv;
482 }
483
484 /*
485 * prevent response code from deadlocking with reconnect code
486 */
487 static int
488 resp_sendlock(struct spclient *spc)
489 {
490 int rv = 0;
491
492 pthread_mutex_lock(&spc->spc_mtx);
493 while (spc->spc_ostatus != SPCSTATUS_FREE) {
494 if (__predict_false(spc->spc_reconnecting)) {
495 rv = EBUSY;
496 goto out;
497 }
498 spc->spc_ostatus = SPCSTATUS_WANTED;
499 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
500 }
501 spc->spc_ostatus = SPCSTATUS_BUSY;
502
503 out:
504 pthread_mutex_unlock(&spc->spc_mtx);
505 return rv;
506 }
507
508 static void
509 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
510 int wantstr)
511 {
512 struct rsp_hdr rhdr;
513 struct iovec iov[2];
514
515 if (wantstr)
516 dlen = MIN(dlen, strlen(data)+1);
517
518 rhdr.rsp_len = sizeof(rhdr) + dlen;
519 rhdr.rsp_reqno = reqno;
520 rhdr.rsp_class = RUMPSP_RESP;
521 rhdr.rsp_type = RUMPSP_COPYIN;
522 rhdr.rsp_sysnum = 0;
523
524 IOVPUT(iov[0], rhdr);
525 IOVPUT_WITHSIZE(iov[1], data, dlen);
526
527 if (resp_sendlock(spc) != 0)
528 return;
529 (void)SENDIOV(spc, iov);
530 sendunlock(spc);
531 }
532
533 static void
534 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
535 {
536 struct rsp_hdr rhdr;
537 struct iovec iov[2];
538
539 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
540 rhdr.rsp_reqno = reqno;
541 rhdr.rsp_class = RUMPSP_RESP;
542 rhdr.rsp_type = RUMPSP_ANONMMAP;
543 rhdr.rsp_sysnum = 0;
544
545 IOVPUT(iov[0], rhdr);
546 IOVPUT(iov[1], addr);
547
548 if (resp_sendlock(spc) != 0)
549 return;
550 (void)SENDIOV(spc, iov);
551 sendunlock(spc);
552 }
553
554 int
555 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
556 register_t *retval)
557 {
558 struct rsp_sysresp *resp;
559 sigset_t omask;
560 void *rdata;
561 int rv;
562
563 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
564
565 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
566 sysnum, data, dlen));
567
568 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
569 if (rv)
570 goto out;
571
572 resp = rdata;
573 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
574 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
575
576 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
577 rv = resp->rsys_error;
578 free(rdata);
579
580 out:
581 pthread_sigmask(SIG_SETMASK, &omask, NULL);
582 return rv;
583 }
584
585 static void
586 handlereq(struct spclient *spc)
587 {
588 struct rsp_copydata *copydata;
589 struct rsp_hdr *rhdr = &spc->spc_hdr;
590 void *mapaddr;
591 size_t maplen;
592 int reqtype = spc->spc_hdr.rsp_type;
593
594 switch (reqtype) {
595 case RUMPSP_COPYIN:
596 case RUMPSP_COPYINSTR:
597 /*LINTED*/
598 copydata = (struct rsp_copydata *)spc->spc_buf;
599 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
600 copydata->rcp_addr, copydata->rcp_len));
601 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
602 copydata->rcp_addr, copydata->rcp_len,
603 reqtype == RUMPSP_COPYINSTR);
604 break;
605 case RUMPSP_COPYOUT:
606 case RUMPSP_COPYOUTSTR:
607 /*LINTED*/
608 copydata = (struct rsp_copydata *)spc->spc_buf;
609 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
610 copydata->rcp_addr, copydata->rcp_len));
611 /*LINTED*/
612 memcpy(copydata->rcp_addr, copydata->rcp_data,
613 copydata->rcp_len);
614 break;
615 case RUMPSP_ANONMMAP:
616 /*LINTED*/
617 maplen = *(size_t *)spc->spc_buf;
618 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
619 MAP_ANON, -1, 0);
620 if (mapaddr == MAP_FAILED)
621 mapaddr = NULL;
622 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
623 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
624 break;
625 case RUMPSP_RAISE:
626 DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
627 raise((int)rhdr->rsp_signo);
628 /*
629 * We most likely have signals blocked, but the signal
630 * will be handled soon enough when we return.
631 */
632 break;
633 default:
634 printf("PANIC: INVALID TYPE %d\n", reqtype);
635 abort();
636 break;
637 }
638
639 spcfreebuf(spc);
640 }
641
642 static unsigned ptab_idx;
643 static struct sockaddr *serv_sa;
644
645 /* dup until we get a "good" fd which does not collide with stdio */
646 static int
647 dupgood(int myfd, int mustchange)
648 {
649 int ofds[4];
650 int sverrno;
651 unsigned int i;
652
653 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
654 assert(i < __arraycount(ofds));
655 ofds[i] = myfd;
656 myfd = host_dup(myfd);
657 if (mustchange) {
658 i--; /* prevent closing old fd */
659 mustchange = 0;
660 }
661 }
662
663 sverrno = 0;
664 if (myfd == -1 && i > 0)
665 sverrno = errno;
666
667 while (i-- > 0) {
668 host_close(ofds[i]);
669 }
670
671 if (sverrno)
672 errno = sverrno;
673
674 return myfd;
675 }
676
677 static int
678 doconnect(void)
679 {
680 struct respwait rw;
681 struct rsp_hdr rhdr;
682 char banner[MAXBANNER];
683 int s, error, flags;
684 ssize_t n;
685
686 if (kq != -1)
687 host_close(kq);
688 kq = -1;
689 s = -1;
690
691 if (clispc.spc_fd != -1)
692 host_close(clispc.spc_fd);
693 clispc.spc_fd = -1;
694
695 /*
696 * for reconnect, gate everyone out of the receiver code
697 */
698 putwait_locked(&clispc, &rw, &rhdr);
699
700 pthread_mutex_lock(&clispc.spc_mtx);
701 clispc.spc_reconnecting = 1;
702 pthread_cond_broadcast(&clispc.spc_cv);
703 clispc.spc_generation++;
704 while (clispc.spc_istatus != SPCSTATUS_FREE) {
705 clispc.spc_istatus = SPCSTATUS_WANTED;
706 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
707 }
708 kickall(&clispc);
709
710 /*
711 * we can release it already since we hold the
712 * send lock during reconnect
713 * XXX: assert it
714 */
715 clispc.spc_istatus = SPCSTATUS_FREE;
716 pthread_mutex_unlock(&clispc.spc_mtx);
717 unputwait_locked(&clispc, &rw);
718
719 free(clispc.spc_buf);
720 clispc.spc_off = 0;
721
722 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
723 if (s == -1)
724 return -1;
725
726 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
727 if (errno == EINTR)
728 continue;
729 ERRLOG(("rump_sp: client connect failed: %s\n",
730 strerror(errno)));
731 return -1;
732 }
733
734 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
735 ERRLOG(("rump_sp: connect hook failed\n"));
736 return -1;
737 }
738
739 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
740 ERRLOG(("rump_sp: failed to read banner\n"));
741 return -1;
742 }
743
744 if (banner[n-1] != '\n') {
745 ERRLOG(("rump_sp: invalid banner\n"));
746 return -1;
747 }
748 banner[n] = '\0';
749 /* XXX parse the banner some day */
750
751 flags = host_fcntl(s, F_GETFL, 0);
752 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
753 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
754 return -1;
755 }
756 clispc.spc_fd = s;
757 clispc.spc_state = SPCSTATE_RUNNING;
758 clispc.spc_reconnecting = 0;
759
760 #ifdef USE_KQUEUE
761 {
762 struct kevent kev[NSIG+1];
763 int i;
764
765 /* setup kqueue, we want all signals and the fd */
766 if ((kq = dupgood(host_kqueue(), 0)) == -1) {
767 ERRLOG(("rump_sp: cannot setup kqueue"));
768 return -1;
769 }
770
771 for (i = 0; i < NSIG; i++) {
772 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
773 }
774 EV_SET(&kev[NSIG], clispc.spc_fd,
775 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
776 if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
777 ERRLOG(("rump_sp: kevent() failed"));
778 return -1;
779 }
780 }
781 #endif /* USE_KQUEUE */
782
783 return 0;
784 }
785
786 static int
787 doinit(void)
788 {
789
790 TAILQ_INIT(&clispc.spc_respwait);
791 pthread_mutex_init(&clispc.spc_mtx, NULL);
792 pthread_cond_init(&clispc.spc_cv, NULL);
793
794 return 0;
795 }
796
797 #ifdef RTLD_NEXT
798 void *rumpclient__dlsym(void *, const char *);
799 void *
800 rumpclient__dlsym(void *handle, const char *symbol)
801 {
802
803 return dlsym(handle, symbol);
804 }
805 void *rumphijack_dlsym(void *, const char *)
806 __attribute__((__weak__, alias("rumpclient__dlsym")));
807 #endif
808
809 static pid_t init_done = 0;
810
811 int
812 rumpclient_init(void)
813 {
814 char *p;
815 int error;
816 int rv = -1;
817 int hstype;
818 pid_t mypid;
819
820 /*
821 * Make sure we're not riding the context of a previous
822 * host fork. Note: it's *possible* that after n>1 forks
823 * we have the same pid as one of our exited parents, but
824 * I'm pretty sure there are 0 practical implications, since
825 * it means generations would have to skip rumpclient init.
826 */
827 if (init_done == (mypid = getpid()))
828 return 0;
829
830 /* kq does not traverse fork() */
831 if (init_done != 0)
832 kq = -1;
833 init_done = mypid;
834
835 sigfillset(&fullset);
836
837 /*
838 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
839 * wann wird man je verstehen? wann wird man je verstehen?
840 */
841 #ifdef RTLD_NEXT
842 #define FINDSYM2(_name_,_syscall_) \
843 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
844 #_syscall_)) == NULL) { \
845 if (rumphijack_dlsym == rumpclient__dlsym) \
846 host_##_name_ = _name_; /* static fallback */ \
847 if (host_##_name_ == NULL) { \
848 fprintf(stderr,"cannot find %s: %s", #_syscall_,\
849 dlerror()); \
850 exit(1); \
851 } \
852 }
853 #else
854 #define FINDSYM2(_name_,_syscall) \
855 host_##_name_ = _name_;
856 #endif
857 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
858 #ifdef __NetBSD__
859 FINDSYM2(socket,__socket30)
860 #else
861 FINDSYM(socket)
862 #endif
863
864 FINDSYM(close)
865 FINDSYM(connect)
866 FINDSYM(fcntl)
867 FINDSYM(poll)
868 FINDSYM(read)
869 FINDSYM(sendmsg)
870 FINDSYM(setsockopt)
871 FINDSYM(dup)
872 FINDSYM(execve)
873
874 #ifdef USE_KQUEUE
875 FINDSYM(kqueue)
876 #if !__NetBSD_Prereq__(5,99,7)
877 FINDSYM(kevent)
878 #else
879 FINDSYM2(kevent,_sys___kevent50)
880 #endif
881 #endif /* USE_KQUEUE */
882
883 #undef FINDSYM
884 #undef FINDSY2
885
886 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
887 if ((p = getenv("RUMP_SERVER")) == NULL) {
888 fprintf(stderr, "error: RUMP_SERVER not set\n");
889 errno = ENOENT;
890 goto out;
891 }
892 }
893
894 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
895 errno = error;
896 goto out;
897 }
898
899 if (doinit() == -1)
900 goto out;
901
902 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
903 sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
904 unsetenv("RUMPCLIENT__EXECFD");
905 hstype = HANDSHAKE_EXEC;
906 } else {
907 if (doconnect() == -1)
908 goto out;
909 hstype = HANDSHAKE_GUEST;
910 }
911
912 error = handshake_req(&clispc, hstype, NULL, 0, false);
913 if (error) {
914 pthread_mutex_destroy(&clispc.spc_mtx);
915 pthread_cond_destroy(&clispc.spc_cv);
916 if (clispc.spc_fd != -1)
917 host_close(clispc.spc_fd);
918 errno = error;
919 goto out;
920 }
921 rv = 0;
922
923 out:
924 if (rv == -1)
925 init_done = 0;
926 return rv;
927 }
928
929 struct rumpclient_fork {
930 uint32_t fork_auth[AUTHLEN];
931 struct spclient fork_spc;
932 int fork_kq;
933 };
934
935 struct rumpclient_fork *
936 rumpclient_prefork(void)
937 {
938 struct rumpclient_fork *rpf;
939 sigset_t omask;
940 void *resp;
941 int rv;
942
943 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
944 rpf = malloc(sizeof(*rpf));
945 if (rpf == NULL)
946 goto out;
947
948 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
949 free(rpf);
950 errno = rv;
951 rpf = NULL;
952 goto out;
953 }
954
955 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
956 free(resp);
957
958 rpf->fork_spc = clispc;
959 rpf->fork_kq = kq;
960
961 out:
962 pthread_sigmask(SIG_SETMASK, &omask, NULL);
963 return rpf;
964 }
965
966 int
967 rumpclient_fork_init(struct rumpclient_fork *rpf)
968 {
969 int error;
970 int osock;
971
972 osock = clispc.spc_fd;
973 memset(&clispc, 0, sizeof(clispc));
974 clispc.spc_fd = osock;
975
976 kq = -1; /* kqueue descriptor is not copied over fork() */
977
978 if (doinit() == -1)
979 return -1;
980 if (doconnect() == -1)
981 return -1;
982
983 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
984 0, false);
985 if (error) {
986 pthread_mutex_destroy(&clispc.spc_mtx);
987 pthread_cond_destroy(&clispc.spc_cv);
988 errno = error;
989 return -1;
990 }
991
992 return 0;
993 }
994
995 /*ARGSUSED*/
996 void
997 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
998 {
999
1000 /* EUNIMPL */
1001 }
1002
1003 void
1004 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1005 {
1006
1007 clispc = rpf->fork_spc;
1008 kq = rpf->fork_kq;
1009 }
1010
1011 void
1012 rumpclient_setconnretry(time_t timeout)
1013 {
1014
1015 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1016 return; /* gigo */
1017
1018 retrytimo = timeout;
1019 }
1020
1021 int
1022 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1023 {
1024 int fd = *fdp;
1025 int untilfd, rv;
1026 int newfd;
1027
1028 switch (variant) {
1029 case RUMPCLIENT_CLOSE_FCLOSEM:
1030 untilfd = MAX(clispc.spc_fd, kq);
1031 for (; fd <= untilfd; fd++) {
1032 if (fd == clispc.spc_fd || fd == kq)
1033 continue;
1034 rv = host_close(fd);
1035 if (rv == -1)
1036 return -1;
1037 }
1038 *fdp = fd;
1039 break;
1040
1041 case RUMPCLIENT_CLOSE_CLOSE:
1042 case RUMPCLIENT_CLOSE_DUP2:
1043 if (fd == clispc.spc_fd) {
1044 newfd = dupgood(clispc.spc_fd, 1);
1045 if (newfd == -1)
1046 return -1;
1047
1048 #ifdef USE_KQUEUE
1049 {
1050 struct kevent kev[2];
1051
1052 /*
1053 * now, we have a new socket number, so change
1054 * the file descriptor that kqueue is
1055 * monitoring. remove old and add new.
1056 */
1057 EV_SET(&kev[0], clispc.spc_fd,
1058 EVFILT_READ, EV_DELETE, 0, 0, 0);
1059 EV_SET(&kev[1], newfd,
1060 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1061 if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1062 int sverrno = errno;
1063 host_close(newfd);
1064 errno = sverrno;
1065 return -1;
1066 }
1067 clispc.spc_fd = newfd;
1068 }
1069 }
1070 if (fd == kq) {
1071 newfd = dupgood(kq, 1);
1072 if (newfd == -1)
1073 return -1;
1074 kq = newfd;
1075 #else /* USE_KQUEUE */
1076 clispc.spc_fd = newfd;
1077 #endif /* !USE_KQUEUE */
1078 }
1079 break;
1080 }
1081
1082 return 0;
1083 }
1084
1085 pid_t
1086 rumpclient_fork(void)
1087 {
1088
1089 return rumpclient__dofork(fork);
1090 }
1091
1092 /*
1093 * Process is about to exec. Save info about our existing connection
1094 * in the env. rumpclient will check for this info in init().
1095 * This is mostly for the benefit of rumphijack, but regular applications
1096 * may use it as well.
1097 */
1098 int
1099 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1100 {
1101 char buf[4096];
1102 char **newenv;
1103 char *envstr, *envstr2;
1104 size_t nelem;
1105 int rv, sverrno;
1106
1107 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1108 clispc.spc_fd, kq);
1109 envstr = malloc(strlen(buf)+1);
1110 if (envstr == NULL) {
1111 return ENOMEM;
1112 }
1113 strcpy(envstr, buf);
1114
1115 /* do we have a fully parsed url we want to forward in the env? */
1116 if (*parsedurl != '\0') {
1117 snprintf(buf, sizeof(buf),
1118 "RUMP__PARSEDSERVER=%s", parsedurl);
1119 envstr2 = malloc(strlen(buf)+1);
1120 if (envstr2 == NULL) {
1121 free(envstr);
1122 return ENOMEM;
1123 }
1124 strcpy(envstr2, buf);
1125 } else {
1126 envstr2 = NULL;
1127 }
1128
1129 for (nelem = 0; envp && envp[nelem]; nelem++)
1130 continue;
1131
1132 newenv = malloc(sizeof(*newenv) * (nelem+3));
1133 if (newenv == NULL) {
1134 free(envstr2);
1135 free(envstr);
1136 return ENOMEM;
1137 }
1138 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1139
1140 newenv[nelem] = envstr;
1141 newenv[nelem+1] = envstr2;
1142 newenv[nelem+2] = NULL;
1143
1144 rv = host_execve(path, argv, newenv);
1145
1146 _DIAGASSERT(rv != 0);
1147 sverrno = errno;
1148 free(envstr2);
1149 free(envstr);
1150 free(newenv);
1151 errno = sverrno;
1152 return rv;
1153 }
1154
1155 /*
1156 * daemon() is handwritten for the benefit of platforms which
1157 * do not support daemon().
1158 */
1159 int
1160 rumpclient_daemon(int nochdir, int noclose)
1161 {
1162 struct rumpclient_fork *rf;
1163 int sverrno;
1164
1165 if ((rf = rumpclient_prefork()) == NULL)
1166 return -1;
1167
1168 switch (fork()) {
1169 case 0:
1170 break;
1171 case -1:
1172 goto daemonerr;
1173 default:
1174 _exit(0);
1175 }
1176
1177 if (setsid() == -1)
1178 goto daemonerr;
1179 if (!nochdir && chdir("/") == -1)
1180 goto daemonerr;
1181 if (!noclose) {
1182 int fd = open("/dev/null", O_RDWR);
1183 dup2(fd, 0);
1184 dup2(fd, 1);
1185 dup2(fd, 2);
1186 if (fd > 2)
1187 close(fd);
1188 }
1189
1190 /* note: fork is either completed or cancelled by the call */
1191 if (rumpclient_fork_init(rf) == -1)
1192 return -1;
1193
1194 return 0;
1195
1196 daemonerr:
1197 sverrno = errno;
1198 rumpclient_fork_cancel(rf);
1199 errno = sverrno;
1200 return -1;
1201 }
1202