rumpuser_sp.c revision 1.40 1 /* $NetBSD: rumpuser_sp.c,v 1.40 2011/02/08 11:21:22 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Sysproxy routines. This provides system RPC support over host sockets.
30 * The most notable limitation is that the client and server must share
31 * the same ABI. This does not mean that they have to be the same
32 * machine or that they need to run the same version of the host OS,
33 * just that they must agree on the data structures. This even *might*
34 * work correctly from one hardware architecture to another.
35 */
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.40 2011/02/08 11:21:22 pooka Exp $");
39
40 #include <sys/types.h>
41 #include <sys/atomic.h>
42 #include <sys/mman.h>
43 #include <sys/socket.h>
44
45 #include <arpa/inet.h>
46 #include <netinet/in.h>
47 #include <netinet/tcp.h>
48
49 #include <assert.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <poll.h>
53 #include <pthread.h>
54 #include <stdarg.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59
60 #include <rump/rump.h> /* XXX: for rfork flags */
61 #include <rump/rumpuser.h>
62 #include "rumpuser_int.h"
63
64 #include "sp_common.c"
65
66 #ifndef MAXCLI
67 #define MAXCLI 256
68 #endif
69 #ifndef MAXWORKER
70 #define MAXWORKER 128
71 #endif
72 #ifndef IDLEWORKER
73 #define IDLEWORKER 16
74 #endif
75 int rumpsp_maxworker = MAXWORKER;
76 int rumpsp_idleworker = IDLEWORKER;
77
78 static struct pollfd pfdlist[MAXCLI];
79 static struct spclient spclist[MAXCLI];
80 static unsigned int disco;
81 static volatile int spfini;
82
83 static struct rumpuser_sp_ops spops;
84
85 static char banner[MAXBANNER];
86
87 #define PROTOMAJOR 0
88 #define PROTOMINOR 2
89
90 struct prefork {
91 uint32_t pf_auth[AUTHLEN];
92 struct lwp *pf_lwp;
93
94 LIST_ENTRY(prefork) pf_entries; /* global list */
95 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */
96 };
97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks);
98 static pthread_mutex_t pfmtx;
99
100 /*
101 * This version is for the server. It's optimized for multiple threads
102 * and is *NOT* reentrant wrt to signals.
103 */
104 static int
105 waitresp(struct spclient *spc, struct respwait *rw)
106 {
107 int spcstate;
108 int rv = 0;
109
110 pthread_mutex_lock(&spc->spc_mtx);
111 sendunlockl(spc);
112 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) {
113 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
114 }
115 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
116 spcstate = spc->spc_state;
117 pthread_mutex_unlock(&spc->spc_mtx);
118
119 pthread_cond_destroy(&rw->rw_cv);
120
121 if (rv)
122 return rv;
123 if (spcstate == SPCSTATE_DYING)
124 return ENOTCONN;
125 return rw->rw_error;
126 }
127
128 /*
129 * Manual wrappers, since librump does not have access to the
130 * user namespace wrapped interfaces.
131 */
132
133 static void
134 lwproc_switch(struct lwp *l)
135 {
136
137 spops.spop_schedule();
138 spops.spop_lwproc_switch(l);
139 spops.spop_unschedule();
140 }
141
142 static void
143 lwproc_release(void)
144 {
145
146 spops.spop_schedule();
147 spops.spop_lwproc_release();
148 spops.spop_unschedule();
149 }
150
151 static int
152 lwproc_rfork(struct spclient *spc, int flags, const char *comm)
153 {
154 int rv;
155
156 spops.spop_schedule();
157 rv = spops.spop_lwproc_rfork(spc, flags, comm);
158 spops.spop_unschedule();
159
160 return rv;
161 }
162
163 static int
164 lwproc_newlwp(pid_t pid)
165 {
166 int rv;
167
168 spops.spop_schedule();
169 rv = spops.spop_lwproc_newlwp(pid);
170 spops.spop_unschedule();
171
172 return rv;
173 }
174
175 static struct lwp *
176 lwproc_curlwp(void)
177 {
178 struct lwp *l;
179
180 spops.spop_schedule();
181 l = spops.spop_lwproc_curlwp();
182 spops.spop_unschedule();
183
184 return l;
185 }
186
187 static pid_t
188 lwproc_getpid(void)
189 {
190 pid_t p;
191
192 spops.spop_schedule();
193 p = spops.spop_getpid();
194 spops.spop_unschedule();
195
196 return p;
197 }
198
199 static void
200 lwproc_procexit(void)
201 {
202
203 spops.spop_schedule();
204 spops.spop_procexit();
205 spops.spop_unschedule();
206 }
207
208 static int
209 rumpsyscall(int sysnum, void *data, register_t *retval)
210 {
211 int rv;
212
213 spops.spop_schedule();
214 rv = spops.spop_syscall(sysnum, data, retval);
215 spops.spop_unschedule();
216
217 return rv;
218 }
219
220 static uint64_t
221 nextreq(struct spclient *spc)
222 {
223 uint64_t nw;
224
225 pthread_mutex_lock(&spc->spc_mtx);
226 nw = spc->spc_nextreq++;
227 pthread_mutex_unlock(&spc->spc_mtx);
228
229 return nw;
230 }
231
232 static void
233 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
234 {
235 struct rsp_hdr rhdr;
236
237 rhdr.rsp_len = sizeof(rhdr);
238 rhdr.rsp_reqno = reqno;
239 rhdr.rsp_class = RUMPSP_ERROR;
240 rhdr.rsp_type = 0;
241 rhdr.rsp_error = error;
242
243 sendlock(spc);
244 (void)dosend(spc, &rhdr, sizeof(rhdr));
245 sendunlock(spc);
246 }
247
248 static int
249 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
250 {
251 struct rsp_hdr rhdr;
252 int rv;
253
254 rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
255 rhdr.rsp_reqno = reqno;
256 rhdr.rsp_class = RUMPSP_RESP;
257 rhdr.rsp_type = RUMPSP_HANDSHAKE;
258 rhdr.rsp_error = 0;
259
260 sendlock(spc);
261 rv = dosend(spc, &rhdr, sizeof(rhdr));
262 rv = dosend(spc, &error, sizeof(error));
263 sendunlock(spc);
264
265 return rv;
266 }
267
268 static int
269 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
270 register_t *retval)
271 {
272 struct rsp_hdr rhdr;
273 struct rsp_sysresp sysresp;
274 int rv;
275
276 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
277 rhdr.rsp_reqno = reqno;
278 rhdr.rsp_class = RUMPSP_RESP;
279 rhdr.rsp_type = RUMPSP_SYSCALL;
280 rhdr.rsp_sysnum = 0;
281
282 sysresp.rsys_error = error;
283 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
284
285 sendlock(spc);
286 rv = dosend(spc, &rhdr, sizeof(rhdr));
287 rv = dosend(spc, &sysresp, sizeof(sysresp));
288 sendunlock(spc);
289
290 return rv;
291 }
292
293 static int
294 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth)
295 {
296 struct rsp_hdr rhdr;
297 int rv;
298
299 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth);
300 rhdr.rsp_reqno = reqno;
301 rhdr.rsp_class = RUMPSP_RESP;
302 rhdr.rsp_type = RUMPSP_PREFORK;
303 rhdr.rsp_sysnum = 0;
304
305 sendlock(spc);
306 rv = dosend(spc, &rhdr, sizeof(rhdr));
307 rv = dosend(spc, auth, AUTHLEN*sizeof(*auth));
308 sendunlock(spc);
309
310 return rv;
311 }
312
313 static int
314 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
315 int wantstr, void **resp)
316 {
317 struct rsp_hdr rhdr;
318 struct rsp_copydata copydata;
319 struct respwait rw;
320 int rv;
321
322 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
323
324 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
325 rhdr.rsp_class = RUMPSP_REQ;
326 if (wantstr)
327 rhdr.rsp_type = RUMPSP_COPYINSTR;
328 else
329 rhdr.rsp_type = RUMPSP_COPYIN;
330 rhdr.rsp_sysnum = 0;
331
332 copydata.rcp_addr = __UNCONST(remaddr);
333 copydata.rcp_len = *dlen;
334
335 putwait(spc, &rw, &rhdr);
336 rv = dosend(spc, &rhdr, sizeof(rhdr));
337 rv = dosend(spc, ©data, sizeof(copydata));
338 if (rv) {
339 unputwait(spc, &rw);
340 return rv;
341 }
342
343 rv = waitresp(spc, &rw);
344
345 DPRINTF(("copyin: response %d\n", rv));
346
347 *resp = rw.rw_data;
348 if (wantstr)
349 *dlen = rw.rw_dlen;
350
351 return rv;
352
353 }
354
355 static int
356 send_copyout_req(struct spclient *spc, const void *remaddr,
357 const void *data, size_t dlen)
358 {
359 struct rsp_hdr rhdr;
360 struct rsp_copydata copydata;
361 int rv;
362
363 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
364
365 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
366 rhdr.rsp_reqno = nextreq(spc);
367 rhdr.rsp_class = RUMPSP_REQ;
368 rhdr.rsp_type = RUMPSP_COPYOUT;
369 rhdr.rsp_sysnum = 0;
370
371 copydata.rcp_addr = __UNCONST(remaddr);
372 copydata.rcp_len = dlen;
373
374 sendlock(spc);
375 rv = dosend(spc, &rhdr, sizeof(rhdr));
376 rv = dosend(spc, ©data, sizeof(copydata));
377 rv = dosend(spc, data, dlen);
378 sendunlock(spc);
379
380 return rv;
381 }
382
383 static int
384 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
385 {
386 struct rsp_hdr rhdr;
387 struct respwait rw;
388 int rv;
389
390 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
391
392 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
393 rhdr.rsp_class = RUMPSP_REQ;
394 rhdr.rsp_type = RUMPSP_ANONMMAP;
395 rhdr.rsp_sysnum = 0;
396
397 putwait(spc, &rw, &rhdr);
398 rv = dosend(spc, &rhdr, sizeof(rhdr));
399 rv = dosend(spc, &howmuch, sizeof(howmuch));
400 if (rv) {
401 unputwait(spc, &rw);
402 return rv;
403 }
404
405 rv = waitresp(spc, &rw);
406
407 *resp = rw.rw_data;
408
409 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
410
411 return rv;
412 }
413
414 static int
415 send_raise_req(struct spclient *spc, int signo)
416 {
417 struct rsp_hdr rhdr;
418 int rv;
419
420 rhdr.rsp_len = sizeof(rhdr);
421 rhdr.rsp_class = RUMPSP_REQ;
422 rhdr.rsp_type = RUMPSP_RAISE;
423 rhdr.rsp_signo = signo;
424
425 sendlock(spc);
426 rv = dosend(spc, &rhdr, sizeof(rhdr));
427 sendunlock(spc);
428
429 return rv;
430 }
431
432 static void
433 spcref(struct spclient *spc)
434 {
435
436 pthread_mutex_lock(&spc->spc_mtx);
437 spc->spc_refcnt++;
438 pthread_mutex_unlock(&spc->spc_mtx);
439 }
440
441 static void
442 spcrelease(struct spclient *spc)
443 {
444 int ref;
445
446 pthread_mutex_lock(&spc->spc_mtx);
447 ref = --spc->spc_refcnt;
448 pthread_mutex_unlock(&spc->spc_mtx);
449
450 if (ref > 0)
451 return;
452
453 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
454
455 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
456 _DIAGASSERT(spc->spc_buf == NULL);
457
458 if (spc->spc_mainlwp) {
459 lwproc_switch(spc->spc_mainlwp);
460 lwproc_release();
461 }
462 spc->spc_mainlwp = NULL;
463
464 close(spc->spc_fd);
465 spc->spc_fd = -1;
466 spc->spc_state = SPCSTATE_NEW;
467
468 atomic_inc_uint(&disco);
469 }
470
471 static void
472 serv_handledisco(unsigned int idx)
473 {
474 struct spclient *spc = &spclist[idx];
475
476 DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
477
478 pfdlist[idx].fd = -1;
479 pfdlist[idx].revents = 0;
480 pthread_mutex_lock(&spc->spc_mtx);
481 spc->spc_state = SPCSTATE_DYING;
482 kickall(spc);
483 sendunlockl(spc);
484 pthread_mutex_unlock(&spc->spc_mtx);
485
486 if (spc->spc_mainlwp) {
487 lwproc_switch(spc->spc_mainlwp);
488 lwproc_procexit();
489 lwproc_switch(NULL);
490 }
491
492 /*
493 * Nobody's going to attempt to send/receive anymore,
494 * so reinit info relevant to that.
495 */
496 /*LINTED:pointer casts may be ok*/
497 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
498
499 spcrelease(spc);
500 }
501
502 static void
503 serv_shutdown(void)
504 {
505 struct spclient *spc;
506 unsigned int i;
507
508 for (i = 1; i < MAXCLI; i++) {
509 spc = &spclist[i];
510 if (spc->spc_fd == -1)
511 continue;
512
513 shutdown(spc->spc_fd, SHUT_RDWR);
514 serv_handledisco(i);
515
516 spcrelease(spc);
517 }
518 }
519
520 static unsigned
521 serv_handleconn(int fd, connecthook_fn connhook, int busy)
522 {
523 struct sockaddr_storage ss;
524 socklen_t sl = sizeof(ss);
525 int newfd, flags;
526 unsigned i;
527
528 /*LINTED: cast ok */
529 newfd = accept(fd, (struct sockaddr *)&ss, &sl);
530 if (newfd == -1)
531 return 0;
532
533 if (busy) {
534 close(newfd); /* EBUSY */
535 return 0;
536 }
537
538 flags = fcntl(newfd, F_GETFL, 0);
539 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
540 close(newfd);
541 return 0;
542 }
543
544 if (connhook(newfd) != 0) {
545 close(newfd);
546 return 0;
547 }
548
549 /* write out a banner for the client */
550 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL)
551 != (ssize_t)strlen(banner)) {
552 close(newfd);
553 return 0;
554 }
555
556 /* find empty slot the simple way */
557 for (i = 0; i < MAXCLI; i++) {
558 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
559 break;
560 }
561
562 assert(i < MAXCLI);
563
564 pfdlist[i].fd = newfd;
565 spclist[i].spc_fd = newfd;
566 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
567 spclist[i].spc_refcnt = 1;
568
569 TAILQ_INIT(&spclist[i].spc_respwait);
570
571 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i));
572
573 return i;
574 }
575
576 static void
577 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
578 {
579 register_t retval[2] = {0, 0};
580 int rv, sysnum;
581
582 sysnum = (int)rhdr->rsp_sysnum;
583 DPRINTF(("rump_sp: handling syscall %d from client %d\n",
584 sysnum, spc->spc_pid));
585
586 lwproc_newlwp(spc->spc_pid);
587 spc->spc_syscallreq = rhdr->rsp_reqno;
588 rv = rumpsyscall(sysnum, data, retval);
589 spc->spc_syscallreq = 0;
590 lwproc_release();
591
592 DPRINTF(("rump_sp: got return value %d & %d/%d\n",
593 rv, retval[0], retval[1]));
594
595 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
596 }
597
598 struct sysbouncearg {
599 struct spclient *sba_spc;
600 struct rsp_hdr sba_hdr;
601 uint8_t *sba_data;
602
603 TAILQ_ENTRY(sysbouncearg) sba_entries;
604 };
605 static pthread_mutex_t sbamtx;
606 static pthread_cond_t sbacv;
607 static int nworker, idleworker, nwork;
608 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist);
609
610 /*ARGSUSED*/
611 static void *
612 serv_syscallbouncer(void *arg)
613 {
614 struct sysbouncearg *sba;
615
616 for (;;) {
617 pthread_mutex_lock(&sbamtx);
618 if (__predict_false(idleworker >= rumpsp_idleworker)) {
619 nworker--;
620 pthread_mutex_unlock(&sbamtx);
621 break;
622 }
623 idleworker++;
624 while (TAILQ_EMPTY(&syslist)) {
625 _DIAGASSERT(nwork == 0);
626 pthread_cond_wait(&sbacv, &sbamtx);
627 }
628 idleworker--;
629
630 sba = TAILQ_FIRST(&syslist);
631 TAILQ_REMOVE(&syslist, sba, sba_entries);
632 nwork--;
633 pthread_mutex_unlock(&sbamtx);
634
635 serv_handlesyscall(sba->sba_spc,
636 &sba->sba_hdr, sba->sba_data);
637 spcrelease(sba->sba_spc);
638 free(sba->sba_data);
639 free(sba);
640 }
641
642 return NULL;
643 }
644
645 static int
646 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
647 {
648 struct spclient *spc = arg;
649 void *rdata = NULL; /* XXXuninit */
650 int rv, nlocks;
651
652 rumpuser__kunlock(0, &nlocks, NULL);
653
654 rv = copyin_req(spc, raddr, len, wantstr, &rdata);
655 if (rv)
656 goto out;
657
658 memcpy(laddr, rdata, *len);
659 free(rdata);
660
661 out:
662 rumpuser__klock(nlocks, NULL);
663 if (rv)
664 return EFAULT;
665 return 0;
666 }
667
668 int
669 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
670 {
671
672 return sp_copyin(arg, raddr, laddr, &len, 0);
673 }
674
675 int
676 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
677 {
678
679 return sp_copyin(arg, raddr, laddr, len, 1);
680 }
681
682 static int
683 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
684 {
685 struct spclient *spc = arg;
686 int nlocks, rv;
687
688 rumpuser__kunlock(0, &nlocks, NULL);
689 rv = send_copyout_req(spc, raddr, laddr, dlen);
690 rumpuser__klock(nlocks, NULL);
691
692 if (rv)
693 return EFAULT;
694 return 0;
695 }
696
697 int
698 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
699 {
700
701 return sp_copyout(arg, laddr, raddr, dlen);
702 }
703
704 int
705 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
706 {
707
708 return sp_copyout(arg, laddr, raddr, *dlen);
709 }
710
711 int
712 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
713 {
714 struct spclient *spc = arg;
715 void *resp, *rdata;
716 int nlocks, rv;
717
718 rumpuser__kunlock(0, &nlocks, NULL);
719
720 rv = anonmmap_req(spc, howmuch, &rdata);
721 if (rv) {
722 rv = EFAULT;
723 goto out;
724 }
725
726 resp = *(void **)rdata;
727 free(rdata);
728
729 if (resp == NULL) {
730 rv = ENOMEM;
731 }
732
733 *addr = resp;
734
735 out:
736 rumpuser__klock(nlocks, NULL);
737
738 if (rv)
739 return rv;
740 return 0;
741 }
742
743 int
744 rumpuser_sp_raise(void *arg, int signo)
745 {
746 struct spclient *spc = arg;
747 int rv, nlocks;
748
749 rumpuser__kunlock(0, &nlocks, NULL);
750 rv = send_raise_req(spc, signo);
751 rumpuser__klock(nlocks, NULL);
752
753 return rv;
754 }
755
756 /*
757 *
758 * Startup routines and mainloop for server.
759 *
760 */
761
762 struct spservarg {
763 int sps_sock;
764 connecthook_fn sps_connhook;
765 };
766
767 static pthread_attr_t pattr_detached;
768 static void
769 handlereq(struct spclient *spc)
770 {
771 struct sysbouncearg *sba;
772 pthread_t pt;
773 int retries, error, i;
774
775 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
776 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
777 send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAUTH);
778 shutdown(spc->spc_fd, SHUT_RDWR);
779 spcfreebuf(spc);
780 return;
781 }
782
783 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) {
784 char *comm = (char *)spc->spc_buf;
785 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ;
786
787 /* ensure it's 0-terminated */
788 /* XXX make sure it contains sensible chars? */
789 comm[commlen] = '\0';
790
791 if ((error = lwproc_rfork(spc,
792 RUMP_RFCFDG, comm)) != 0) {
793 shutdown(spc->spc_fd, SHUT_RDWR);
794 }
795
796 spcfreebuf(spc);
797 if (error)
798 return;
799
800 spc->spc_mainlwp = lwproc_curlwp();
801
802 send_handshake_resp(spc, spc->spc_hdr.rsp_reqno, 0);
803 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) {
804 struct lwp *tmpmain;
805 struct prefork *pf;
806 struct handshake_fork *rfp;
807 uint64_t reqno;
808 int cancel;
809
810 reqno = spc->spc_hdr.rsp_reqno;
811 if (spc->spc_off-HDRSZ != sizeof(*rfp)) {
812 send_error_resp(spc, reqno, EINVAL);
813 shutdown(spc->spc_fd, SHUT_RDWR);
814 spcfreebuf(spc);
815 return;
816 }
817
818 /*LINTED*/
819 rfp = (void *)spc->spc_buf;
820 cancel = rfp->rf_cancel;
821
822 pthread_mutex_lock(&pfmtx);
823 LIST_FOREACH(pf, &preforks, pf_entries) {
824 if (memcmp(rfp->rf_auth, pf->pf_auth,
825 sizeof(rfp->rf_auth)) == 0) {
826 LIST_REMOVE(pf, pf_entries);
827 LIST_REMOVE(pf, pf_spcentries);
828 break;
829 }
830 }
831 pthread_mutex_lock(&pfmtx);
832 spcfreebuf(spc);
833
834 if (!pf) {
835 send_error_resp(spc, reqno, ESRCH);
836 shutdown(spc->spc_fd, SHUT_RDWR);
837 return;
838 }
839
840 tmpmain = pf->pf_lwp;
841 free(pf);
842 lwproc_switch(tmpmain);
843 if (cancel) {
844 lwproc_release();
845 shutdown(spc->spc_fd, SHUT_RDWR);
846 return;
847 }
848
849 /*
850 * So, we forked already during "prefork" to save
851 * the file descriptors from a parent exit
852 * race condition. But now we need to fork
853 * a second time since the initial fork has
854 * the wrong spc pointer. (yea, optimize
855 * interfaces some day if anyone cares)
856 */
857 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) {
858 send_error_resp(spc, reqno, error);
859 shutdown(spc->spc_fd, SHUT_RDWR);
860 lwproc_release();
861 return;
862 }
863 spc->spc_mainlwp = lwproc_curlwp();
864 lwproc_switch(tmpmain);
865 lwproc_release();
866 lwproc_switch(spc->spc_mainlwp);
867
868 send_handshake_resp(spc, reqno, 0);
869 }
870
871 spc->spc_pid = lwproc_getpid();
872
873 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n",
874 spc, spc->spc_pid));
875
876 lwproc_switch(NULL);
877 spc->spc_state = SPCSTATE_RUNNING;
878 return;
879 }
880
881 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) {
882 struct prefork *pf;
883 uint64_t reqno;
884 uint32_t auth[AUTHLEN];
885
886 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc));
887 reqno = spc->spc_hdr.rsp_reqno;
888 spcfreebuf(spc);
889
890 pf = malloc(sizeof(*pf));
891 if (pf == NULL) {
892 send_error_resp(spc, reqno, ENOMEM);
893 return;
894 }
895
896 /*
897 * Use client main lwp to fork. this is never used by
898 * worker threads (except if spc refcount goes to 0),
899 * so we can safely use it here.
900 */
901 lwproc_switch(spc->spc_mainlwp);
902 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) {
903 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc));
904 send_error_resp(spc, reqno, error);
905 lwproc_switch(NULL);
906 free(pf);
907 return;
908 }
909
910 /* Ok, we have a new process context and a new curlwp */
911 for (i = 0; i < AUTHLEN; i++) {
912 pf->pf_auth[i] = auth[i] = arc4random();
913 }
914 pf->pf_lwp = lwproc_curlwp();
915 lwproc_switch(NULL);
916
917 pthread_mutex_lock(&pfmtx);
918 LIST_INSERT_HEAD(&preforks, pf, pf_entries);
919 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries);
920 pthread_mutex_unlock(&pfmtx);
921
922 DPRINTF(("rump_sp: prefork handler success %p\n", spc));
923
924 send_prefork_resp(spc, reqno, auth);
925 return;
926 }
927
928 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
929 send_error_resp(spc, spc->spc_hdr.rsp_reqno, EINVAL);
930 spcfreebuf(spc);
931 return;
932 }
933
934 retries = 0;
935 while ((sba = malloc(sizeof(*sba))) == NULL) {
936 if (nworker == 0 || retries > 10) {
937 send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAGAIN);
938 spcfreebuf(spc);
939 return;
940 }
941 /* slim chance of more memory? */
942 usleep(10000);
943 }
944
945 sba->sba_spc = spc;
946 sba->sba_hdr = spc->spc_hdr;
947 sba->sba_data = spc->spc_buf;
948 spcresetbuf(spc);
949
950 spcref(spc);
951
952 pthread_mutex_lock(&sbamtx);
953 TAILQ_INSERT_TAIL(&syslist, sba, sba_entries);
954 nwork++;
955 if (nwork <= idleworker) {
956 /* do we have a daemon's tool (i.e. idle threads)? */
957 pthread_cond_signal(&sbacv);
958 } else if (nworker < rumpsp_maxworker) {
959 /*
960 * Else, need to create one
961 * (if we can, otherwise just expect another
962 * worker to pick up the syscall)
963 */
964 if (pthread_create(&pt, &pattr_detached,
965 serv_syscallbouncer, NULL) == 0) {
966 nworker++;
967 }
968 }
969 pthread_mutex_unlock(&sbamtx);
970 }
971
972 static void *
973 spserver(void *arg)
974 {
975 struct spservarg *sarg = arg;
976 struct spclient *spc;
977 unsigned idx;
978 int seen;
979 int rv;
980 unsigned int nfds, maxidx;
981
982 for (idx = 0; idx < MAXCLI; idx++) {
983 pfdlist[idx].fd = -1;
984 pfdlist[idx].events = POLLIN;
985
986 spc = &spclist[idx];
987 pthread_mutex_init(&spc->spc_mtx, NULL);
988 pthread_cond_init(&spc->spc_cv, NULL);
989 spc->spc_fd = -1;
990 }
991 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
992 pfdlist[0].events = POLLIN;
993 nfds = 1;
994 maxidx = 0;
995
996 pthread_attr_init(&pattr_detached);
997 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
998 /* XXX: doesn't stacksize currently work on NetBSD */
999 pthread_attr_setstacksize(&pattr_detached, 32*1024);
1000
1001 pthread_mutex_init(&sbamtx, NULL);
1002 pthread_cond_init(&sbacv, NULL);
1003
1004 DPRINTF(("rump_sp: server mainloop\n"));
1005
1006 for (;;) {
1007 int discoed;
1008
1009 /* g/c hangarounds (eventually) */
1010 discoed = atomic_swap_uint(&disco, 0);
1011 while (discoed--) {
1012 nfds--;
1013 idx = maxidx;
1014 while (idx) {
1015 if (pfdlist[idx].fd != -1) {
1016 maxidx = idx;
1017 break;
1018 }
1019 idx--;
1020 }
1021 DPRINTF(("rump_sp: set maxidx to [%u]\n",
1022 maxidx));
1023 }
1024
1025 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
1026 seen = 0;
1027 rv = poll(pfdlist, maxidx+1, INFTIM);
1028 assert(maxidx+1 <= MAXCLI);
1029 assert(rv != 0);
1030 if (rv == -1) {
1031 if (errno == EINTR)
1032 continue;
1033 fprintf(stderr, "rump_spserver: poll returned %d\n",
1034 errno);
1035 break;
1036 }
1037
1038 for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
1039 if ((pfdlist[idx].revents & POLLIN) == 0)
1040 continue;
1041
1042 seen++;
1043 DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
1044 idx, seen, rv));
1045 if (idx > 0) {
1046 spc = &spclist[idx];
1047 DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
1048 switch (readframe(spc)) {
1049 case 0:
1050 break;
1051 case -1:
1052 serv_handledisco(idx);
1053 break;
1054 default:
1055 switch (spc->spc_hdr.rsp_class) {
1056 case RUMPSP_RESP:
1057 kickwaiter(spc);
1058 break;
1059 case RUMPSP_REQ:
1060 handlereq(spc);
1061 break;
1062 default:
1063 send_error_resp(spc,
1064 spc->spc_hdr.rsp_reqno,
1065 ENOENT);
1066 spcfreebuf(spc);
1067 break;
1068 }
1069 break;
1070 }
1071
1072 } else {
1073 DPRINTF(("rump_sp: mainloop new connection\n"));
1074
1075 if (__predict_false(spfini)) {
1076 close(spclist[0].spc_fd);
1077 serv_shutdown();
1078 goto out;
1079 }
1080
1081 idx = serv_handleconn(pfdlist[0].fd,
1082 sarg->sps_connhook, nfds == MAXCLI);
1083 if (idx)
1084 nfds++;
1085 if (idx > maxidx)
1086 maxidx = idx;
1087 DPRINTF(("rump_sp: maxid now %d\n", maxidx));
1088 }
1089 }
1090 }
1091
1092 out:
1093 return NULL;
1094 }
1095
1096 static unsigned cleanupidx;
1097 static struct sockaddr *cleanupsa;
1098 int
1099 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
1100 const char *ostype, const char *osrelease, const char *machine)
1101 {
1102 pthread_t pt;
1103 struct spservarg *sarg;
1104 struct sockaddr *sap;
1105 char *p;
1106 unsigned idx;
1107 int error, s;
1108
1109 p = strdup(url);
1110 if (p == NULL)
1111 return ENOMEM;
1112 error = parseurl(p, &sap, &idx, 1);
1113 free(p);
1114 if (error)
1115 return error;
1116
1117 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
1118 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
1119
1120 s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
1121 if (s == -1)
1122 return errno;
1123
1124 spops = *spopsp;
1125 sarg = malloc(sizeof(*sarg));
1126 if (sarg == NULL) {
1127 close(s);
1128 return ENOMEM;
1129 }
1130
1131 sarg->sps_sock = s;
1132 sarg->sps_connhook = parsetab[idx].connhook;
1133
1134 cleanupidx = idx;
1135 cleanupsa = sap;
1136
1137 /* sloppy error recovery */
1138
1139 /*LINTED*/
1140 if (bind(s, sap, sap->sa_len) == -1) {
1141 fprintf(stderr, "rump_sp: server bind failed\n");
1142 return errno;
1143 }
1144
1145 if (listen(s, MAXCLI) == -1) {
1146 fprintf(stderr, "rump_sp: server listen failed\n");
1147 return errno;
1148 }
1149
1150 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
1151 fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
1152 return errno;
1153 }
1154 pthread_detach(pt);
1155
1156 return 0;
1157 }
1158
1159 void
1160 rumpuser_sp_fini(void *arg)
1161 {
1162 struct spclient *spc = arg;
1163 register_t retval[2] = {0, 0};
1164
1165 /*
1166 * stuff response into the socket, since this process is just
1167 * about to exit
1168 */
1169 if (spc && spc->spc_syscallreq)
1170 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval);
1171
1172 if (spclist[0].spc_fd) {
1173 parsetab[cleanupidx].cleanup(cleanupsa);
1174 shutdown(spclist[0].spc_fd, SHUT_RDWR);
1175 spfini = 1;
1176 }
1177 }
1178