rumpuser_sp.c revision 1.43 1 /* $NetBSD: rumpuser_sp.c,v 1.43 2011/03/07 21:57:15 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Sysproxy routines. This provides system RPC support over host sockets.
30 * The most notable limitation is that the client and server must share
31 * the same ABI. This does not mean that they have to be the same
32 * machine or that they need to run the same version of the host OS,
33 * just that they must agree on the data structures. This even *might*
34 * work correctly from one hardware architecture to another.
35 */
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.43 2011/03/07 21:57:15 pooka Exp $");
39
40 #include <sys/types.h>
41 #include <sys/atomic.h>
42 #include <sys/mman.h>
43 #include <sys/socket.h>
44
45 #include <arpa/inet.h>
46 #include <netinet/in.h>
47 #include <netinet/tcp.h>
48
49 #include <assert.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <poll.h>
53 #include <pthread.h>
54 #include <stdarg.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59
60 #include <rump/rump.h> /* XXX: for rfork flags */
61 #include <rump/rumpuser.h>
62 #include "rumpuser_int.h"
63
64 #include "sp_common.c"
65
66 #ifndef MAXCLI
67 #define MAXCLI 256
68 #endif
69 #ifndef MAXWORKER
70 #define MAXWORKER 128
71 #endif
72 #ifndef IDLEWORKER
73 #define IDLEWORKER 16
74 #endif
75 int rumpsp_maxworker = MAXWORKER;
76 int rumpsp_idleworker = IDLEWORKER;
77
78 static struct pollfd pfdlist[MAXCLI];
79 static struct spclient spclist[MAXCLI];
80 static unsigned int disco;
81 static volatile int spfini;
82
83 static struct rumpuser_sp_ops spops;
84
85 static char banner[MAXBANNER];
86
87 #define PROTOMAJOR 0
88 #define PROTOMINOR 3
89
90 struct prefork {
91 uint32_t pf_auth[AUTHLEN];
92 struct lwp *pf_lwp;
93
94 LIST_ENTRY(prefork) pf_entries; /* global list */
95 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */
96 };
97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks);
98 static pthread_mutex_t pfmtx;
99
100 /*
101 * This version is for the server. It's optimized for multiple threads
102 * and is *NOT* reentrant wrt to signals.
103 */
104 static int
105 waitresp(struct spclient *spc, struct respwait *rw)
106 {
107 int spcstate;
108 int rv = 0;
109
110 pthread_mutex_lock(&spc->spc_mtx);
111 sendunlockl(spc);
112 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) {
113 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
114 }
115 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
116 spcstate = spc->spc_state;
117 pthread_mutex_unlock(&spc->spc_mtx);
118
119 pthread_cond_destroy(&rw->rw_cv);
120
121 if (rv)
122 return rv;
123 if (spcstate == SPCSTATE_DYING)
124 return ENOTCONN;
125 return rw->rw_error;
126 }
127
128 /*
129 * Manual wrappers, since librump does not have access to the
130 * user namespace wrapped interfaces.
131 */
132
133 static void
134 lwproc_switch(struct lwp *l)
135 {
136
137 spops.spop_schedule();
138 spops.spop_lwproc_switch(l);
139 spops.spop_unschedule();
140 }
141
142 static void
143 lwproc_release(void)
144 {
145
146 spops.spop_schedule();
147 spops.spop_lwproc_release();
148 spops.spop_unschedule();
149 }
150
151 static int
152 lwproc_rfork(struct spclient *spc, int flags, const char *comm)
153 {
154 int rv;
155
156 spops.spop_schedule();
157 rv = spops.spop_lwproc_rfork(spc, flags, comm);
158 spops.spop_unschedule();
159
160 return rv;
161 }
162
163 static int
164 lwproc_newlwp(pid_t pid)
165 {
166 int rv;
167
168 spops.spop_schedule();
169 rv = spops.spop_lwproc_newlwp(pid);
170 spops.spop_unschedule();
171
172 return rv;
173 }
174
175 static struct lwp *
176 lwproc_curlwp(void)
177 {
178 struct lwp *l;
179
180 spops.spop_schedule();
181 l = spops.spop_lwproc_curlwp();
182 spops.spop_unschedule();
183
184 return l;
185 }
186
187 static pid_t
188 lwproc_getpid(void)
189 {
190 pid_t p;
191
192 spops.spop_schedule();
193 p = spops.spop_getpid();
194 spops.spop_unschedule();
195
196 return p;
197 }
198 static void
199 lwproc_execnotify(const char *comm)
200 {
201
202 spops.spop_schedule();
203 spops.spop_execnotify(comm);
204 spops.spop_unschedule();
205 }
206
207 static void
208 lwproc_procexit(void)
209 {
210
211 spops.spop_schedule();
212 spops.spop_procexit();
213 spops.spop_unschedule();
214 }
215
216 static int
217 rumpsyscall(int sysnum, void *data, register_t *retval)
218 {
219 int rv;
220
221 spops.spop_schedule();
222 rv = spops.spop_syscall(sysnum, data, retval);
223 spops.spop_unschedule();
224
225 return rv;
226 }
227
228 static uint64_t
229 nextreq(struct spclient *spc)
230 {
231 uint64_t nw;
232
233 pthread_mutex_lock(&spc->spc_mtx);
234 nw = spc->spc_nextreq++;
235 pthread_mutex_unlock(&spc->spc_mtx);
236
237 return nw;
238 }
239
240 static void
241 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
242 {
243 struct rsp_hdr rhdr;
244
245 rhdr.rsp_len = sizeof(rhdr);
246 rhdr.rsp_reqno = reqno;
247 rhdr.rsp_class = RUMPSP_ERROR;
248 rhdr.rsp_type = 0;
249 rhdr.rsp_error = error;
250
251 sendlock(spc);
252 (void)dosend(spc, &rhdr, sizeof(rhdr));
253 sendunlock(spc);
254 }
255
256 static int
257 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
258 {
259 struct rsp_hdr rhdr;
260 int rv;
261
262 rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
263 rhdr.rsp_reqno = reqno;
264 rhdr.rsp_class = RUMPSP_RESP;
265 rhdr.rsp_type = RUMPSP_HANDSHAKE;
266 rhdr.rsp_error = 0;
267
268 sendlock(spc);
269 rv = dosend(spc, &rhdr, sizeof(rhdr));
270 rv = dosend(spc, &error, sizeof(error));
271 sendunlock(spc);
272
273 return rv;
274 }
275
276 static int
277 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
278 register_t *retval)
279 {
280 struct rsp_hdr rhdr;
281 struct rsp_sysresp sysresp;
282 int rv;
283
284 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
285 rhdr.rsp_reqno = reqno;
286 rhdr.rsp_class = RUMPSP_RESP;
287 rhdr.rsp_type = RUMPSP_SYSCALL;
288 rhdr.rsp_sysnum = 0;
289
290 sysresp.rsys_error = error;
291 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
292
293 sendlock(spc);
294 rv = dosend(spc, &rhdr, sizeof(rhdr));
295 rv = dosend(spc, &sysresp, sizeof(sysresp));
296 sendunlock(spc);
297
298 return rv;
299 }
300
301 static int
302 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth)
303 {
304 struct rsp_hdr rhdr;
305 int rv;
306
307 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth);
308 rhdr.rsp_reqno = reqno;
309 rhdr.rsp_class = RUMPSP_RESP;
310 rhdr.rsp_type = RUMPSP_PREFORK;
311 rhdr.rsp_sysnum = 0;
312
313 sendlock(spc);
314 rv = dosend(spc, &rhdr, sizeof(rhdr));
315 rv = dosend(spc, auth, AUTHLEN*sizeof(*auth));
316 sendunlock(spc);
317
318 return rv;
319 }
320
321 static int
322 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
323 int wantstr, void **resp)
324 {
325 struct rsp_hdr rhdr;
326 struct rsp_copydata copydata;
327 struct respwait rw;
328 int rv;
329
330 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
331
332 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
333 rhdr.rsp_class = RUMPSP_REQ;
334 if (wantstr)
335 rhdr.rsp_type = RUMPSP_COPYINSTR;
336 else
337 rhdr.rsp_type = RUMPSP_COPYIN;
338 rhdr.rsp_sysnum = 0;
339
340 copydata.rcp_addr = __UNCONST(remaddr);
341 copydata.rcp_len = *dlen;
342
343 putwait(spc, &rw, &rhdr);
344 rv = dosend(spc, &rhdr, sizeof(rhdr));
345 rv = dosend(spc, ©data, sizeof(copydata));
346 if (rv) {
347 unputwait(spc, &rw);
348 return rv;
349 }
350
351 rv = waitresp(spc, &rw);
352
353 DPRINTF(("copyin: response %d\n", rv));
354
355 *resp = rw.rw_data;
356 if (wantstr)
357 *dlen = rw.rw_dlen;
358
359 return rv;
360
361 }
362
363 static int
364 send_copyout_req(struct spclient *spc, const void *remaddr,
365 const void *data, size_t dlen)
366 {
367 struct rsp_hdr rhdr;
368 struct rsp_copydata copydata;
369 int rv;
370
371 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
372
373 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
374 rhdr.rsp_reqno = nextreq(spc);
375 rhdr.rsp_class = RUMPSP_REQ;
376 rhdr.rsp_type = RUMPSP_COPYOUT;
377 rhdr.rsp_sysnum = 0;
378
379 copydata.rcp_addr = __UNCONST(remaddr);
380 copydata.rcp_len = dlen;
381
382 sendlock(spc);
383 rv = dosend(spc, &rhdr, sizeof(rhdr));
384 rv = dosend(spc, ©data, sizeof(copydata));
385 rv = dosend(spc, data, dlen);
386 sendunlock(spc);
387
388 return rv;
389 }
390
391 static int
392 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
393 {
394 struct rsp_hdr rhdr;
395 struct respwait rw;
396 int rv;
397
398 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
399
400 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
401 rhdr.rsp_class = RUMPSP_REQ;
402 rhdr.rsp_type = RUMPSP_ANONMMAP;
403 rhdr.rsp_sysnum = 0;
404
405 putwait(spc, &rw, &rhdr);
406 rv = dosend(spc, &rhdr, sizeof(rhdr));
407 rv = dosend(spc, &howmuch, sizeof(howmuch));
408 if (rv) {
409 unputwait(spc, &rw);
410 return rv;
411 }
412
413 rv = waitresp(spc, &rw);
414
415 *resp = rw.rw_data;
416
417 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
418
419 return rv;
420 }
421
422 static int
423 send_raise_req(struct spclient *spc, int signo)
424 {
425 struct rsp_hdr rhdr;
426 int rv;
427
428 rhdr.rsp_len = sizeof(rhdr);
429 rhdr.rsp_class = RUMPSP_REQ;
430 rhdr.rsp_type = RUMPSP_RAISE;
431 rhdr.rsp_signo = signo;
432
433 sendlock(spc);
434 rv = dosend(spc, &rhdr, sizeof(rhdr));
435 sendunlock(spc);
436
437 return rv;
438 }
439
440 static void
441 spcref(struct spclient *spc)
442 {
443
444 pthread_mutex_lock(&spc->spc_mtx);
445 spc->spc_refcnt++;
446 pthread_mutex_unlock(&spc->spc_mtx);
447 }
448
449 static void
450 spcrelease(struct spclient *spc)
451 {
452 int ref;
453
454 pthread_mutex_lock(&spc->spc_mtx);
455 ref = --spc->spc_refcnt;
456 pthread_mutex_unlock(&spc->spc_mtx);
457
458 if (ref > 0)
459 return;
460
461 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
462
463 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
464 _DIAGASSERT(spc->spc_buf == NULL);
465
466 if (spc->spc_mainlwp) {
467 lwproc_switch(spc->spc_mainlwp);
468 lwproc_release();
469 }
470 spc->spc_mainlwp = NULL;
471
472 close(spc->spc_fd);
473 spc->spc_fd = -1;
474 spc->spc_state = SPCSTATE_NEW;
475
476 atomic_inc_uint(&disco);
477 }
478
479 static void
480 serv_handledisco(unsigned int idx)
481 {
482 struct spclient *spc = &spclist[idx];
483
484 DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
485
486 pfdlist[idx].fd = -1;
487 pfdlist[idx].revents = 0;
488 pthread_mutex_lock(&spc->spc_mtx);
489 spc->spc_state = SPCSTATE_DYING;
490 kickall(spc);
491 sendunlockl(spc);
492 pthread_mutex_unlock(&spc->spc_mtx);
493
494 if (spc->spc_mainlwp) {
495 lwproc_switch(spc->spc_mainlwp);
496 lwproc_procexit();
497 lwproc_switch(NULL);
498 }
499
500 /*
501 * Nobody's going to attempt to send/receive anymore,
502 * so reinit info relevant to that.
503 */
504 /*LINTED:pointer casts may be ok*/
505 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
506
507 spcrelease(spc);
508 }
509
510 static void
511 serv_shutdown(void)
512 {
513 struct spclient *spc;
514 unsigned int i;
515
516 for (i = 1; i < MAXCLI; i++) {
517 spc = &spclist[i];
518 if (spc->spc_fd == -1)
519 continue;
520
521 shutdown(spc->spc_fd, SHUT_RDWR);
522 serv_handledisco(i);
523
524 spcrelease(spc);
525 }
526 }
527
528 static unsigned
529 serv_handleconn(int fd, connecthook_fn connhook, int busy)
530 {
531 struct sockaddr_storage ss;
532 socklen_t sl = sizeof(ss);
533 int newfd, flags;
534 unsigned i;
535
536 /*LINTED: cast ok */
537 newfd = accept(fd, (struct sockaddr *)&ss, &sl);
538 if (newfd == -1)
539 return 0;
540
541 if (busy) {
542 close(newfd); /* EBUSY */
543 return 0;
544 }
545
546 flags = fcntl(newfd, F_GETFL, 0);
547 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
548 close(newfd);
549 return 0;
550 }
551
552 if (connhook(newfd) != 0) {
553 close(newfd);
554 return 0;
555 }
556
557 /* write out a banner for the client */
558 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL)
559 != (ssize_t)strlen(banner)) {
560 close(newfd);
561 return 0;
562 }
563
564 /* find empty slot the simple way */
565 for (i = 0; i < MAXCLI; i++) {
566 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
567 break;
568 }
569
570 assert(i < MAXCLI);
571
572 pfdlist[i].fd = newfd;
573 spclist[i].spc_fd = newfd;
574 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
575 spclist[i].spc_refcnt = 1;
576
577 TAILQ_INIT(&spclist[i].spc_respwait);
578
579 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i));
580
581 return i;
582 }
583
584 static void
585 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
586 {
587 register_t retval[2] = {0, 0};
588 int rv, sysnum;
589
590 sysnum = (int)rhdr->rsp_sysnum;
591 DPRINTF(("rump_sp: handling syscall %d from client %d\n",
592 sysnum, spc->spc_pid));
593
594 lwproc_newlwp(spc->spc_pid);
595 spc->spc_syscallreq = rhdr->rsp_reqno;
596 rv = rumpsyscall(sysnum, data, retval);
597 spc->spc_syscallreq = 0;
598 lwproc_release();
599
600 DPRINTF(("rump_sp: got return value %d & %d/%d\n",
601 rv, retval[0], retval[1]));
602
603 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
604 }
605
606 struct sysbouncearg {
607 struct spclient *sba_spc;
608 struct rsp_hdr sba_hdr;
609 uint8_t *sba_data;
610
611 TAILQ_ENTRY(sysbouncearg) sba_entries;
612 };
613 static pthread_mutex_t sbamtx;
614 static pthread_cond_t sbacv;
615 static int nworker, idleworker, nwork;
616 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist);
617
618 /*ARGSUSED*/
619 static void *
620 serv_syscallbouncer(void *arg)
621 {
622 struct sysbouncearg *sba;
623
624 for (;;) {
625 pthread_mutex_lock(&sbamtx);
626 if (__predict_false(idleworker - nwork >= rumpsp_idleworker)) {
627 nworker--;
628 pthread_mutex_unlock(&sbamtx);
629 break;
630 }
631 idleworker++;
632 while (TAILQ_EMPTY(&syslist)) {
633 _DIAGASSERT(nwork == 0);
634 pthread_cond_wait(&sbacv, &sbamtx);
635 }
636 idleworker--;
637
638 sba = TAILQ_FIRST(&syslist);
639 TAILQ_REMOVE(&syslist, sba, sba_entries);
640 nwork--;
641 pthread_mutex_unlock(&sbamtx);
642
643 serv_handlesyscall(sba->sba_spc,
644 &sba->sba_hdr, sba->sba_data);
645 spcrelease(sba->sba_spc);
646 free(sba->sba_data);
647 free(sba);
648 }
649
650 return NULL;
651 }
652
653 static int
654 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
655 {
656 struct spclient *spc = arg;
657 void *rdata = NULL; /* XXXuninit */
658 int rv, nlocks;
659
660 rumpuser__kunlock(0, &nlocks, NULL);
661
662 rv = copyin_req(spc, raddr, len, wantstr, &rdata);
663 if (rv)
664 goto out;
665
666 memcpy(laddr, rdata, *len);
667 free(rdata);
668
669 out:
670 rumpuser__klock(nlocks, NULL);
671 if (rv)
672 return EFAULT;
673 return 0;
674 }
675
676 int
677 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
678 {
679
680 return sp_copyin(arg, raddr, laddr, &len, 0);
681 }
682
683 int
684 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
685 {
686
687 return sp_copyin(arg, raddr, laddr, len, 1);
688 }
689
690 static int
691 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
692 {
693 struct spclient *spc = arg;
694 int nlocks, rv;
695
696 rumpuser__kunlock(0, &nlocks, NULL);
697 rv = send_copyout_req(spc, raddr, laddr, dlen);
698 rumpuser__klock(nlocks, NULL);
699
700 if (rv)
701 return EFAULT;
702 return 0;
703 }
704
705 int
706 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
707 {
708
709 return sp_copyout(arg, laddr, raddr, dlen);
710 }
711
712 int
713 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
714 {
715
716 return sp_copyout(arg, laddr, raddr, *dlen);
717 }
718
719 int
720 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
721 {
722 struct spclient *spc = arg;
723 void *resp, *rdata;
724 int nlocks, rv;
725
726 rumpuser__kunlock(0, &nlocks, NULL);
727
728 rv = anonmmap_req(spc, howmuch, &rdata);
729 if (rv) {
730 rv = EFAULT;
731 goto out;
732 }
733
734 resp = *(void **)rdata;
735 free(rdata);
736
737 if (resp == NULL) {
738 rv = ENOMEM;
739 }
740
741 *addr = resp;
742
743 out:
744 rumpuser__klock(nlocks, NULL);
745
746 if (rv)
747 return rv;
748 return 0;
749 }
750
751 int
752 rumpuser_sp_raise(void *arg, int signo)
753 {
754 struct spclient *spc = arg;
755 int rv, nlocks;
756
757 rumpuser__kunlock(0, &nlocks, NULL);
758 rv = send_raise_req(spc, signo);
759 rumpuser__klock(nlocks, NULL);
760
761 return rv;
762 }
763
764 /*
765 *
766 * Startup routines and mainloop for server.
767 *
768 */
769
770 struct spservarg {
771 int sps_sock;
772 connecthook_fn sps_connhook;
773 };
774
775 static pthread_attr_t pattr_detached;
776 static void
777 handlereq(struct spclient *spc)
778 {
779 struct sysbouncearg *sba;
780 pthread_t pt;
781 uint64_t reqno;
782 int retries, error, i;
783
784 reqno = spc->spc_hdr.rsp_reqno;
785 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
786 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
787 send_error_resp(spc, reqno, EAUTH);
788 shutdown(spc->spc_fd, SHUT_RDWR);
789 spcfreebuf(spc);
790 return;
791 }
792
793 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) {
794 char *comm = (char *)spc->spc_buf;
795 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ;
796
797 /* ensure it's 0-terminated */
798 /* XXX make sure it contains sensible chars? */
799 comm[commlen] = '\0';
800
801 if ((error = lwproc_rfork(spc,
802 RUMP_RFCFDG, comm)) != 0) {
803 shutdown(spc->spc_fd, SHUT_RDWR);
804 }
805
806 spcfreebuf(spc);
807 if (error)
808 return;
809
810 spc->spc_mainlwp = lwproc_curlwp();
811
812 send_handshake_resp(spc, reqno, 0);
813 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) {
814 struct lwp *tmpmain;
815 struct prefork *pf;
816 struct handshake_fork *rfp;
817 int cancel;
818
819 if (spc->spc_off-HDRSZ != sizeof(*rfp)) {
820 send_error_resp(spc, reqno, EINVAL);
821 shutdown(spc->spc_fd, SHUT_RDWR);
822 spcfreebuf(spc);
823 return;
824 }
825
826 /*LINTED*/
827 rfp = (void *)spc->spc_buf;
828 cancel = rfp->rf_cancel;
829
830 pthread_mutex_lock(&pfmtx);
831 LIST_FOREACH(pf, &preforks, pf_entries) {
832 if (memcmp(rfp->rf_auth, pf->pf_auth,
833 sizeof(rfp->rf_auth)) == 0) {
834 LIST_REMOVE(pf, pf_entries);
835 LIST_REMOVE(pf, pf_spcentries);
836 break;
837 }
838 }
839 pthread_mutex_lock(&pfmtx);
840 spcfreebuf(spc);
841
842 if (!pf) {
843 send_error_resp(spc, reqno, ESRCH);
844 shutdown(spc->spc_fd, SHUT_RDWR);
845 return;
846 }
847
848 tmpmain = pf->pf_lwp;
849 free(pf);
850 lwproc_switch(tmpmain);
851 if (cancel) {
852 lwproc_release();
853 shutdown(spc->spc_fd, SHUT_RDWR);
854 return;
855 }
856
857 /*
858 * So, we forked already during "prefork" to save
859 * the file descriptors from a parent exit
860 * race condition. But now we need to fork
861 * a second time since the initial fork has
862 * the wrong spc pointer. (yea, optimize
863 * interfaces some day if anyone cares)
864 */
865 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) {
866 send_error_resp(spc, reqno, error);
867 shutdown(spc->spc_fd, SHUT_RDWR);
868 lwproc_release();
869 return;
870 }
871 spc->spc_mainlwp = lwproc_curlwp();
872 lwproc_switch(tmpmain);
873 lwproc_release();
874 lwproc_switch(spc->spc_mainlwp);
875
876 send_handshake_resp(spc, reqno, 0);
877 }
878
879 spc->spc_pid = lwproc_getpid();
880
881 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n",
882 spc, spc->spc_pid));
883
884 lwproc_switch(NULL);
885 spc->spc_state = SPCSTATE_RUNNING;
886 return;
887 }
888
889 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) {
890 struct prefork *pf;
891 uint32_t auth[AUTHLEN];
892
893 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc));
894 spcfreebuf(spc);
895
896 pf = malloc(sizeof(*pf));
897 if (pf == NULL) {
898 send_error_resp(spc, reqno, ENOMEM);
899 return;
900 }
901
902 /*
903 * Use client main lwp to fork. this is never used by
904 * worker threads (except if spc refcount goes to 0),
905 * so we can safely use it here.
906 */
907 lwproc_switch(spc->spc_mainlwp);
908 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) {
909 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc));
910 send_error_resp(spc, reqno, error);
911 lwproc_switch(NULL);
912 free(pf);
913 return;
914 }
915
916 /* Ok, we have a new process context and a new curlwp */
917 for (i = 0; i < AUTHLEN; i++) {
918 pf->pf_auth[i] = auth[i] = arc4random();
919 }
920 pf->pf_lwp = lwproc_curlwp();
921 lwproc_switch(NULL);
922
923 pthread_mutex_lock(&pfmtx);
924 LIST_INSERT_HEAD(&preforks, pf, pf_entries);
925 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries);
926 pthread_mutex_unlock(&pfmtx);
927
928 DPRINTF(("rump_sp: prefork handler success %p\n", spc));
929
930 send_prefork_resp(spc, reqno, auth);
931 return;
932 }
933
934 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) {
935 char *comm = (char *)spc->spc_buf;
936 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ;
937
938 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) {
939 send_error_resp(spc, reqno, EINVAL);
940 spcfreebuf(spc);
941 return;
942 }
943
944 /* ensure it's 0-terminated */
945 /* XXX make sure it contains sensible chars? */
946 comm[commlen] = '\0';
947
948 lwproc_switch(spc->spc_mainlwp);
949 lwproc_execnotify(comm);
950 lwproc_switch(NULL);
951
952 send_handshake_resp(spc, reqno, 0);
953 spcfreebuf(spc);
954 return;
955 }
956
957 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
958 send_error_resp(spc, reqno, EINVAL);
959 spcfreebuf(spc);
960 return;
961 }
962
963 retries = 0;
964 while ((sba = malloc(sizeof(*sba))) == NULL) {
965 if (nworker == 0 || retries > 10) {
966 send_error_resp(spc, reqno, EAGAIN);
967 spcfreebuf(spc);
968 return;
969 }
970 /* slim chance of more memory? */
971 usleep(10000);
972 }
973
974 sba->sba_spc = spc;
975 sba->sba_hdr = spc->spc_hdr;
976 sba->sba_data = spc->spc_buf;
977 spcresetbuf(spc);
978
979 spcref(spc);
980
981 pthread_mutex_lock(&sbamtx);
982 TAILQ_INSERT_TAIL(&syslist, sba, sba_entries);
983 nwork++;
984 if (nwork <= idleworker) {
985 /* do we have a daemon's tool (i.e. idle threads)? */
986 pthread_cond_signal(&sbacv);
987 } else if (nworker < rumpsp_maxworker) {
988 /*
989 * Else, need to create one
990 * (if we can, otherwise just expect another
991 * worker to pick up the syscall)
992 */
993 if (pthread_create(&pt, &pattr_detached,
994 serv_syscallbouncer, NULL) == 0) {
995 nworker++;
996 }
997 }
998 pthread_mutex_unlock(&sbamtx);
999 }
1000
1001 static void *
1002 spserver(void *arg)
1003 {
1004 struct spservarg *sarg = arg;
1005 struct spclient *spc;
1006 unsigned idx;
1007 int seen;
1008 int rv;
1009 unsigned int nfds, maxidx;
1010
1011 for (idx = 0; idx < MAXCLI; idx++) {
1012 pfdlist[idx].fd = -1;
1013 pfdlist[idx].events = POLLIN;
1014
1015 spc = &spclist[idx];
1016 pthread_mutex_init(&spc->spc_mtx, NULL);
1017 pthread_cond_init(&spc->spc_cv, NULL);
1018 spc->spc_fd = -1;
1019 }
1020 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
1021 pfdlist[0].events = POLLIN;
1022 nfds = 1;
1023 maxidx = 0;
1024
1025 pthread_attr_init(&pattr_detached);
1026 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
1027 /* XXX: doesn't stacksize currently work on NetBSD */
1028 pthread_attr_setstacksize(&pattr_detached, 32*1024);
1029
1030 pthread_mutex_init(&sbamtx, NULL);
1031 pthread_cond_init(&sbacv, NULL);
1032
1033 DPRINTF(("rump_sp: server mainloop\n"));
1034
1035 for (;;) {
1036 int discoed;
1037
1038 /* g/c hangarounds (eventually) */
1039 discoed = atomic_swap_uint(&disco, 0);
1040 while (discoed--) {
1041 nfds--;
1042 idx = maxidx;
1043 while (idx) {
1044 if (pfdlist[idx].fd != -1) {
1045 maxidx = idx;
1046 break;
1047 }
1048 idx--;
1049 }
1050 DPRINTF(("rump_sp: set maxidx to [%u]\n",
1051 maxidx));
1052 }
1053
1054 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
1055 seen = 0;
1056 rv = poll(pfdlist, maxidx+1, INFTIM);
1057 assert(maxidx+1 <= MAXCLI);
1058 assert(rv != 0);
1059 if (rv == -1) {
1060 if (errno == EINTR)
1061 continue;
1062 fprintf(stderr, "rump_spserver: poll returned %d\n",
1063 errno);
1064 break;
1065 }
1066
1067 for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
1068 if ((pfdlist[idx].revents & POLLIN) == 0)
1069 continue;
1070
1071 seen++;
1072 DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
1073 idx, seen, rv));
1074 if (idx > 0) {
1075 spc = &spclist[idx];
1076 DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
1077 switch (readframe(spc)) {
1078 case 0:
1079 break;
1080 case -1:
1081 serv_handledisco(idx);
1082 break;
1083 default:
1084 switch (spc->spc_hdr.rsp_class) {
1085 case RUMPSP_RESP:
1086 kickwaiter(spc);
1087 break;
1088 case RUMPSP_REQ:
1089 handlereq(spc);
1090 break;
1091 default:
1092 send_error_resp(spc,
1093 spc->spc_hdr.rsp_reqno,
1094 ENOENT);
1095 spcfreebuf(spc);
1096 break;
1097 }
1098 break;
1099 }
1100
1101 } else {
1102 DPRINTF(("rump_sp: mainloop new connection\n"));
1103
1104 if (__predict_false(spfini)) {
1105 close(spclist[0].spc_fd);
1106 serv_shutdown();
1107 goto out;
1108 }
1109
1110 idx = serv_handleconn(pfdlist[0].fd,
1111 sarg->sps_connhook, nfds == MAXCLI);
1112 if (idx)
1113 nfds++;
1114 if (idx > maxidx)
1115 maxidx = idx;
1116 DPRINTF(("rump_sp: maxid now %d\n", maxidx));
1117 }
1118 }
1119 }
1120
1121 out:
1122 return NULL;
1123 }
1124
1125 static unsigned cleanupidx;
1126 static struct sockaddr *cleanupsa;
1127 int
1128 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
1129 const char *ostype, const char *osrelease, const char *machine)
1130 {
1131 pthread_t pt;
1132 struct spservarg *sarg;
1133 struct sockaddr *sap;
1134 char *p;
1135 unsigned idx;
1136 int error, s;
1137
1138 p = strdup(url);
1139 if (p == NULL)
1140 return ENOMEM;
1141 error = parseurl(p, &sap, &idx, 1);
1142 free(p);
1143 if (error)
1144 return error;
1145
1146 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
1147 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
1148
1149 s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
1150 if (s == -1)
1151 return errno;
1152
1153 spops = *spopsp;
1154 sarg = malloc(sizeof(*sarg));
1155 if (sarg == NULL) {
1156 close(s);
1157 return ENOMEM;
1158 }
1159
1160 sarg->sps_sock = s;
1161 sarg->sps_connhook = parsetab[idx].connhook;
1162
1163 cleanupidx = idx;
1164 cleanupsa = sap;
1165
1166 /* sloppy error recovery */
1167
1168 /*LINTED*/
1169 if (bind(s, sap, sap->sa_len) == -1) {
1170 fprintf(stderr, "rump_sp: server bind failed\n");
1171 return errno;
1172 }
1173
1174 if (listen(s, MAXCLI) == -1) {
1175 fprintf(stderr, "rump_sp: server listen failed\n");
1176 return errno;
1177 }
1178
1179 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
1180 fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
1181 return errno;
1182 }
1183 pthread_detach(pt);
1184
1185 return 0;
1186 }
1187
1188 void
1189 rumpuser_sp_fini(void *arg)
1190 {
1191 struct spclient *spc = arg;
1192 register_t retval[2] = {0, 0};
1193
1194 if (spclist[0].spc_fd) {
1195 parsetab[cleanupidx].cleanup(cleanupsa);
1196 }
1197
1198 /*
1199 * stuff response into the socket, since this process is just
1200 * about to exit
1201 */
1202 if (spc && spc->spc_syscallreq)
1203 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval);
1204
1205 if (spclist[0].spc_fd) {
1206 shutdown(spclist[0].spc_fd, SHUT_RDWR);
1207 spfini = 1;
1208 }
1209 }
1210