rumpuser_sp.c revision 1.16 1 /* $NetBSD: rumpuser_sp.c,v 1.16 2010/11/26 10:59:14 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Sysproxy routines. This provides system RPC support over host sockets.
30 * The most notable limitation is that the client and server must share
31 * the same ABI. This does not mean that they have to be the same
32 * machine or that they need to run the same version of the host OS,
33 * just that they must agree on the data structures. This even *might*
34 * work correctly from one hardware architecture to another.
35 *
36 * Not finished yet, i.e. don't use in production. Lacks locking plus
37 * handling of multiple clients and unexpected connection closes.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: rumpuser_sp.c,v 1.16 2010/11/26 10:59:14 pooka Exp $");
42
43 #include <sys/types.h>
44 #include <sys/atomic.h>
45 #include <sys/mman.h>
46 #include <sys/socket.h>
47
48 #include <arpa/inet.h>
49 #include <netinet/in.h>
50 #include <netinet/tcp.h>
51
52 #include <assert.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <poll.h>
56 #include <pthread.h>
57 #include <stdarg.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62
63 #include <rump/rumpuser.h>
64 #include "rumpuser_int.h"
65
66 #include "sp_common.c"
67
68 #define MAXCLI 4
69
70 static struct pollfd pfdlist[MAXCLI];
71 static struct spclient spclist[MAXCLI];
72 static unsigned int disco;
73
74 static struct rumpuser_sp_ops spops;
75
76 /*
77 * Manual wrappers, since librump does not have access to the
78 * user namespace wrapped interfaces.
79 */
80
81 static void
82 lwproc_switch(struct lwp *l)
83 {
84
85 spops.spop_schedule();
86 spops.spop_lwproc_switch(l);
87 spops.spop_unschedule();
88 }
89
90 static void
91 lwproc_release(void)
92 {
93
94 spops.spop_schedule();
95 spops.spop_lwproc_release();
96 spops.spop_unschedule();
97 }
98
99 static int
100 lwproc_newproc(struct spclient *spc)
101 {
102 int rv;
103
104 spops.spop_schedule();
105 rv = spops.spop_lwproc_newproc(spc);
106 spops.spop_unschedule();
107
108 return rv;
109 }
110
111 static int
112 lwproc_newlwp(pid_t pid)
113 {
114 int rv;
115
116 spops.spop_schedule();
117 rv = spops.spop_lwproc_newlwp(pid);
118 spops.spop_unschedule();
119
120 return rv;
121 }
122
123 static struct lwp *
124 lwproc_curlwp(void)
125 {
126 struct lwp *l;
127
128 spops.spop_schedule();
129 l = spops.spop_lwproc_curlwp();
130 spops.spop_unschedule();
131
132 return l;
133 }
134
135 static pid_t
136 lwproc_getpid(void)
137 {
138 pid_t p;
139
140 spops.spop_schedule();
141 p = spops.spop_getpid();
142 spops.spop_unschedule();
143
144 return p;
145 }
146
147 static int
148 rumpsyscall(int sysnum, void *data, register_t *retval)
149 {
150 int rv;
151
152 spops.spop_schedule();
153 rv = spops.spop_syscall(sysnum, data, retval);
154 spops.spop_unschedule();
155
156 return rv;
157 }
158
159 static uint64_t
160 nextreq(struct spclient *spc)
161 {
162 uint64_t nw;
163
164 pthread_mutex_lock(&spc->spc_mtx);
165 nw = spc->spc_nextreq++;
166 pthread_mutex_unlock(&spc->spc_mtx);
167
168 return nw;
169 }
170
171 static int
172 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
173 register_t *retval)
174 {
175 struct rsp_hdr rhdr;
176 struct rsp_sysresp sysresp;
177 int rv;
178
179 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
180 rhdr.rsp_reqno = reqno;
181 rhdr.rsp_class = RUMPSP_RESP;
182 rhdr.rsp_type = RUMPSP_SYSCALL;
183 rhdr.rsp_sysnum = 0;
184
185 sysresp.rsys_error = error;
186 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
187
188 sendlock(spc);
189 rv = dosend(spc, &rhdr, sizeof(rhdr));
190 rv = dosend(spc, &sysresp, sizeof(sysresp));
191 sendunlock(spc);
192
193 return rv;
194 }
195
196 static int
197 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
198 int wantstr, void **resp)
199 {
200 struct rsp_hdr rhdr;
201 struct rsp_copydata copydata;
202 struct respwait rw;
203 int rv;
204
205 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
206
207 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
208 rhdr.rsp_class = RUMPSP_REQ;
209 if (wantstr)
210 rhdr.rsp_type = RUMPSP_COPYINSTR;
211 else
212 rhdr.rsp_type = RUMPSP_COPYIN;
213 rhdr.rsp_sysnum = 0;
214
215 copydata.rcp_addr = __UNCONST(remaddr);
216 copydata.rcp_len = *dlen;
217
218 putwait(spc, &rw, &rhdr);
219 rv = dosend(spc, &rhdr, sizeof(rhdr));
220 rv = dosend(spc, ©data, sizeof(copydata));
221 if (rv) {
222 unputwait(spc, &rw);
223 return rv;
224 }
225
226 rv = waitresp(spc, &rw);
227
228 DPRINTF(("copyin: response %d\n", rv));
229
230 *resp = rw.rw_data;
231 if (wantstr)
232 *dlen = rw.rw_dlen;
233
234 return rv;
235
236 }
237
238 static int
239 send_copyout_req(struct spclient *spc, const void *remaddr,
240 const void *data, size_t dlen)
241 {
242 struct rsp_hdr rhdr;
243 struct rsp_copydata copydata;
244 int rv;
245
246 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
247
248 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
249 rhdr.rsp_reqno = nextreq(spc);
250 rhdr.rsp_class = RUMPSP_REQ;
251 rhdr.rsp_type = RUMPSP_COPYOUT;
252 rhdr.rsp_sysnum = 0;
253
254 copydata.rcp_addr = __UNCONST(remaddr);
255 copydata.rcp_len = dlen;
256
257 sendlock(spc);
258 rv = dosend(spc, &rhdr, sizeof(rhdr));
259 rv = dosend(spc, ©data, sizeof(copydata));
260 rv = dosend(spc, data, dlen);
261 sendunlock(spc);
262
263 return rv;
264 }
265
266 static int
267 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
268 {
269 struct rsp_hdr rhdr;
270 struct respwait rw;
271 int rv;
272
273 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
274
275 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
276 rhdr.rsp_class = RUMPSP_REQ;
277 rhdr.rsp_type = RUMPSP_ANONMMAP;
278 rhdr.rsp_sysnum = 0;
279
280 putwait(spc, &rw, &rhdr);
281 rv = dosend(spc, &rhdr, sizeof(rhdr));
282 rv = dosend(spc, &howmuch, sizeof(howmuch));
283 if (rv) {
284 unputwait(spc, &rw);
285 return rv;
286 }
287
288 rv = waitresp(spc, &rw);
289
290 *resp = rw.rw_data;
291
292 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
293
294 return rv;
295 }
296
297 static void
298 spcref(struct spclient *spc)
299 {
300
301 pthread_mutex_lock(&spc->spc_mtx);
302 spc->spc_refcnt++;
303 pthread_mutex_unlock(&spc->spc_mtx);
304 }
305
306 static void
307 spcrelease(struct spclient *spc)
308 {
309 int ref;
310
311 pthread_mutex_lock(&spc->spc_mtx);
312 ref = --spc->spc_refcnt;
313 pthread_mutex_unlock(&spc->spc_mtx);
314
315 if (ref > 0)
316 return;
317
318 DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
319
320 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
321 _DIAGASSERT(spc->spc_buf == NULL);
322
323 lwproc_switch(spc->spc_mainlwp);
324 lwproc_release();
325 spc->spc_mainlwp = NULL;
326
327 close(spc->spc_fd);
328 spc->spc_fd = -1;
329 spc->spc_dying = 0;
330
331 atomic_inc_uint(&disco);
332
333 }
334
335 static void
336 serv_handledisco(unsigned int idx)
337 {
338 struct spclient *spc = &spclist[idx];
339
340 DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
341
342 pfdlist[idx].fd = -1;
343 pfdlist[idx].revents = 0;
344 pthread_mutex_lock(&spc->spc_mtx);
345 spc->spc_dying = 1;
346 kickall(spc);
347 pthread_mutex_unlock(&spc->spc_mtx);
348 spcrelease(spc);
349 }
350
351 static unsigned
352 serv_handleconn(int fd, connecthook_fn connhook, int busy)
353 {
354 struct sockaddr_storage ss;
355 socklen_t sl = sizeof(ss);
356 int newfd, flags;
357 unsigned i;
358
359 /*LINTED: cast ok */
360 newfd = accept(fd, (struct sockaddr *)&ss, &sl);
361 if (newfd == -1)
362 return 0;
363
364 if (busy) {
365 close(newfd); /* EBUSY */
366 return 0;
367 }
368
369 /* XXX: should do some sort of handshake too */
370
371 flags = fcntl(newfd, F_GETFL, 0);
372 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
373 close(newfd);
374 return 0;
375 }
376
377 if (connhook(newfd) != 0) {
378 close(newfd);
379 return 0;
380 }
381
382 /* find empty slot the simple way */
383 for (i = 0; i < MAXCLI; i++) {
384 if (pfdlist[i].fd == -1 && spclist[i].spc_dying == 0)
385 break;
386 }
387
388 if (lwproc_newproc(&spclist[i]) != 0) {
389 close(newfd);
390 return 0;
391 }
392
393 assert(i < MAXCLI);
394
395 pfdlist[i].fd = newfd;
396 spclist[i].spc_fd = newfd;
397 spclist[i].spc_mainlwp = lwproc_curlwp();
398 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
399 spclist[i].spc_pid = lwproc_getpid();
400 spclist[i].spc_refcnt = 1;
401
402 TAILQ_INIT(&spclist[i].spc_respwait);
403
404 DPRINTF(("rump_sp: added new connection fd %d at idx %u, pid %d\n",
405 newfd, i, lwproc_getpid()));
406
407 lwproc_switch(NULL);
408
409 return i;
410 }
411
412 static void
413 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
414 {
415 register_t retval[2] = {0, 0};
416 int rv, sysnum;
417
418 sysnum = (int)rhdr->rsp_sysnum;
419 DPRINTF(("rump_sp: handling syscall %d from client %d\n",
420 sysnum, 0));
421
422 lwproc_newlwp(spc->spc_pid);
423 rv = rumpsyscall(sysnum, data, retval);
424 lwproc_release();
425
426 DPRINTF(("rump_sp: got return value %d & %d/%d\n",
427 rv, retval[0], retval[1]));
428
429 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
430 }
431
432 struct sysbouncearg {
433 struct spclient *sba_spc;
434 struct rsp_hdr sba_hdr;
435 uint8_t *sba_data;
436 };
437 static void *
438 serv_syscallbouncer(void *arg)
439 {
440 struct sysbouncearg *barg = arg;
441
442 serv_handlesyscall(barg->sba_spc, &barg->sba_hdr, barg->sba_data);
443 spcrelease(barg->sba_spc);
444 free(barg->sba_data);
445 free(barg);
446 return NULL;
447 }
448
449 static int
450 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
451 {
452 struct spclient *spc = arg;
453 void *rdata = NULL; /* XXXuninit */
454 int rv, nlocks;
455
456 rumpuser__kunlock(0, &nlocks, NULL);
457
458 rv = copyin_req(spc, raddr, len, wantstr, &rdata);
459 if (rv)
460 goto out;
461
462 memcpy(laddr, rdata, *len);
463 free(rdata);
464
465 out:
466 rumpuser__klock(nlocks, NULL);
467 if (rv)
468 return EFAULT;
469 return 0;
470 }
471
472 int
473 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
474 {
475
476 return sp_copyin(arg, raddr, laddr, &len, 0);
477 }
478
479 int
480 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
481 {
482
483 return sp_copyin(arg, raddr, laddr, len, 1);
484 }
485
486 static int
487 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
488 {
489 struct spclient *spc = arg;
490 int nlocks, rv;
491
492 rumpuser__kunlock(0, &nlocks, NULL);
493 rv = send_copyout_req(spc, raddr, laddr, dlen);
494 rumpuser__klock(nlocks, NULL);
495
496 if (rv)
497 return EFAULT;
498 return 0;
499 }
500
501 int
502 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
503 {
504
505 return sp_copyout(arg, laddr, raddr, dlen);
506 }
507
508 int
509 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
510 {
511
512 return sp_copyout(arg, laddr, raddr, *dlen);
513 }
514
515 int
516 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
517 {
518 struct spclient *spc = arg;
519 void *resp, *rdata;
520 int nlocks, rv;
521
522 rumpuser__kunlock(0, &nlocks, NULL);
523
524 rv = anonmmap_req(spc, howmuch, &rdata);
525 if (rv) {
526 rv = EFAULT;
527 goto out;
528 }
529
530 resp = *(void **)rdata;
531 free(rdata);
532
533 if (resp == NULL) {
534 rv = ENOMEM;
535 }
536
537 *addr = resp;
538
539 out:
540 rumpuser__klock(nlocks, NULL);
541
542 if (rv)
543 return rv;
544 return 0;
545 }
546
547 /*
548 *
549 * Startup routines and mainloop for server.
550 *
551 */
552
553 struct spservarg {
554 int sps_sock;
555 connecthook_fn sps_connhook;
556 };
557
558 static pthread_attr_t pattr_detached;
559 static void
560 handlereq(struct spclient *spc)
561 {
562 struct sysbouncearg *sba;
563 pthread_t pt;
564 int rv;
565
566 /* XXX: check that it's a syscall */
567
568 sba = malloc(sizeof(*sba));
569 if (sba == NULL) {
570 /* panic */
571 abort();
572 }
573
574 sba->sba_spc = spc;
575 sba->sba_hdr = spc->spc_hdr;
576 sba->sba_data = spc->spc_buf;
577
578 spc->spc_buf = NULL;
579 spc->spc_off = 0;
580
581 spcref(spc);
582 if ((rv = pthread_create(&pt, &pattr_detached,
583 serv_syscallbouncer, sba)) != 0) {
584 /* panic */
585 abort();
586 }
587 }
588
589 static void *
590 spserver(void *arg)
591 {
592 struct spservarg *sarg = arg;
593 struct spclient *spc;
594 unsigned idx;
595 int seen;
596 int rv;
597 unsigned int nfds, maxidx;
598
599 for (idx = 0; idx < MAXCLI; idx++) {
600 pfdlist[idx].fd = -1;
601 pfdlist[idx].events = POLLIN;
602
603 spc = &spclist[idx];
604 pthread_mutex_init(&spc->spc_mtx, NULL);
605 pthread_cond_init(&spc->spc_cv, NULL);
606 }
607 pfdlist[0].fd = sarg->sps_sock;
608 pfdlist[0].events = POLLIN;
609 nfds = 1;
610 maxidx = 0;
611
612 pthread_attr_init(&pattr_detached);
613 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
614
615 DPRINTF(("rump_sp: server mainloop\n"));
616
617 for (;;) {
618 /* g/c hangarounds (eventually) */
619 if (disco) {
620 int discoed;
621
622 discoed = atomic_swap_uint(&disco, 0);
623 while (discoed--) {
624 nfds--;
625 idx = maxidx;
626 while (idx) {
627 if (pfdlist[idx].fd != -1) {
628 maxidx = idx;
629 break;
630 }
631 idx--;
632 }
633 DPRINTF(("rump_sp: set maxidx to [%u]\n",
634 maxidx));
635 assert(maxidx+1 >= nfds);
636 }
637 }
638
639 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
640 seen = 0;
641 rv = poll(pfdlist, maxidx+1, INFTIM);
642 assert(maxidx+1 <= MAXCLI);
643 assert(rv != 0);
644 if (rv == -1) {
645 if (errno == EINTR)
646 continue;
647 fprintf(stderr, "rump_spserver: poll returned %d\n",
648 errno);
649 break;
650 }
651
652 for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
653 if ((pfdlist[idx].revents & POLLIN) == 0)
654 continue;
655
656 seen++;
657 DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
658 idx, seen, rv));
659 if (idx > 0) {
660 spc = &spclist[idx];
661 DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
662 switch (readframe(spc)) {
663 case 0:
664 break;
665 case -1:
666 serv_handledisco(idx);
667 break;
668 default:
669 switch (spc->spc_hdr.rsp_class) {
670 case RUMPSP_RESP:
671 kickwaiter(spc);
672 break;
673 case RUMPSP_REQ:
674 handlereq(spc);
675 break;
676 default:
677 printf("PANIC\n");
678 abort();
679 break;
680 }
681 break;
682 }
683
684 } else {
685 DPRINTF(("rump_sp: mainloop new connection\n"));
686
687 idx = serv_handleconn(pfdlist[0].fd,
688 sarg->sps_connhook, nfds == MAXCLI);
689 if (idx)
690 nfds++;
691 if (idx > maxidx)
692 maxidx = idx;
693 DPRINTF(("rump_sp: maxid now %d\n", maxidx));
694 }
695 }
696 }
697
698 return NULL;
699 }
700
701 int
702 rumpuser_sp_init(const struct rumpuser_sp_ops *spopsp, const char *url)
703 {
704 pthread_t pt;
705 struct spservarg *sarg;
706 struct sockaddr *sap;
707 char *p;
708 unsigned idx;
709 int error, s;
710
711 p = strdup(url);
712 if (p == NULL)
713 return ENOMEM;
714 error = parseurl(p, &sap, &idx, 1);
715 free(p);
716 if (error)
717 return error;
718
719 s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
720 if (s == -1)
721 return errno;
722
723 spops = *spopsp;
724 sarg = malloc(sizeof(*sarg));
725 if (sarg == NULL) {
726 close(s);
727 return ENOMEM;
728 }
729
730 sarg->sps_sock = s;
731 sarg->sps_connhook = parsetab[idx].connhook;
732
733 /* sloppy error recovery */
734
735 /*LINTED*/
736 if (bind(s, sap, sap->sa_len) == -1) {
737 fprintf(stderr, "rump_sp: server bind failed\n");
738 return errno;
739 }
740 if (listen(s, 20) == -1) {
741 fprintf(stderr, "rump_sp: server listen failed\n");
742 return errno;
743 }
744
745 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
746 fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
747 return errno;
748 }
749 pthread_detach(pt);
750
751 return 0;
752 }
753