rumpuser_sp.c revision 1.52 1 /* $NetBSD: rumpuser_sp.c,v 1.52 2013/04/27 14:59:08 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Sysproxy routines. This provides system RPC support over host sockets.
30 * The most notable limitation is that the client and server must share
31 * the same ABI. This does not mean that they have to be the same
32 * machine or that they need to run the same version of the host OS,
33 * just that they must agree on the data structures. This even *might*
34 * work correctly from one hardware architecture to another.
35 */
36
37 #include "rumpuser_port.h"
38
39 #if !defined(lint)
40 __RCSID("$NetBSD: rumpuser_sp.c,v 1.52 2013/04/27 14:59:08 pooka Exp $");
41 #endif /* !lint */
42
43 #include <sys/types.h>
44 #include <sys/mman.h>
45 #include <sys/socket.h>
46
47 #include <arpa/inet.h>
48 #include <netinet/in.h>
49 #include <netinet/tcp.h>
50
51 #include <assert.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <poll.h>
55 #include <pthread.h>
56 #include <stdarg.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61
62 #include <rump/rump.h> /* XXX: for rfork flags */
63 #include <rump/rumpuser.h>
64
65 #include "rumpuser_int.h"
66
67 #include "sp_common.c"
68
69 #ifndef MAXCLI
70 #define MAXCLI 256
71 #endif
72 #ifndef MAXWORKER
73 #define MAXWORKER 128
74 #endif
75 #ifndef IDLEWORKER
76 #define IDLEWORKER 16
77 #endif
78 int rumpsp_maxworker = MAXWORKER;
79 int rumpsp_idleworker = IDLEWORKER;
80
81 static struct pollfd pfdlist[MAXCLI];
82 static struct spclient spclist[MAXCLI];
83 static unsigned int disco;
84 static volatile int spfini;
85
86 static struct rumpuser_sp_ops spops;
87
88 static char banner[MAXBANNER];
89
90 #define PROTOMAJOR 0
91 #define PROTOMINOR 4
92
93
94 /* how to use atomic ops on Linux? */
95 #if defined(__linux__) || defined(__CYGWIN__)
96 static pthread_mutex_t discomtx = PTHREAD_MUTEX_INITIALIZER;
97
98 static void
99 signaldisco(void)
100 {
101
102 pthread_mutex_lock(&discomtx);
103 disco++;
104 pthread_mutex_unlock(&discomtx);
105 }
106
107 static unsigned int
108 getdisco(void)
109 {
110 unsigned int discocnt;
111
112 pthread_mutex_lock(&discomtx);
113 discocnt = disco;
114 disco = 0;
115 pthread_mutex_unlock(&discomtx);
116
117 return discocnt;
118 }
119
120 #elif defined(__FreeBSD__) || defined(__DragonFly__)
121
122 #include <machine/atomic.h>
123 #define signaldisco() atomic_add_int(&disco, 1)
124 #define getdisco() atomic_readandclear_int(&disco)
125
126 #else /* NetBSD */
127
128 #include <sys/atomic.h>
129 #define signaldisco() atomic_inc_uint(&disco)
130 #define getdisco() atomic_swap_uint(&disco, 0)
131
132 #endif
133
134
135 struct prefork {
136 uint32_t pf_auth[AUTHLEN];
137 struct lwp *pf_lwp;
138
139 LIST_ENTRY(prefork) pf_entries; /* global list */
140 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */
141 };
142 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks);
143 static pthread_mutex_t pfmtx;
144
145 /*
146 * This version is for the server. It's optimized for multiple threads
147 * and is *NOT* reentrant wrt to signals.
148 */
149 static int
150 waitresp(struct spclient *spc, struct respwait *rw)
151 {
152 int spcstate;
153 int rv = 0;
154
155 pthread_mutex_lock(&spc->spc_mtx);
156 sendunlockl(spc);
157 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) {
158 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
159 }
160 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
161 spcstate = spc->spc_state;
162 pthread_mutex_unlock(&spc->spc_mtx);
163
164 pthread_cond_destroy(&rw->rw_cv);
165
166 if (rv)
167 return rv;
168 if (spcstate == SPCSTATE_DYING)
169 return ENOTCONN;
170 return rw->rw_error;
171 }
172
173 /*
174 * Manual wrappers, since librump does not have access to the
175 * user namespace wrapped interfaces.
176 */
177
178 static void
179 lwproc_switch(struct lwp *l)
180 {
181
182 spops.spop_schedule();
183 spops.spop_lwproc_switch(l);
184 spops.spop_unschedule();
185 }
186
187 static void
188 lwproc_release(void)
189 {
190
191 spops.spop_schedule();
192 spops.spop_lwproc_release();
193 spops.spop_unschedule();
194 }
195
196 static int
197 lwproc_rfork(struct spclient *spc, int flags, const char *comm)
198 {
199 int rv;
200
201 spops.spop_schedule();
202 rv = spops.spop_lwproc_rfork(spc, flags, comm);
203 spops.spop_unschedule();
204
205 return rv;
206 }
207
208 static int
209 lwproc_newlwp(pid_t pid)
210 {
211 int rv;
212
213 spops.spop_schedule();
214 rv = spops.spop_lwproc_newlwp(pid);
215 spops.spop_unschedule();
216
217 return rv;
218 }
219
220 static struct lwp *
221 lwproc_curlwp(void)
222 {
223 struct lwp *l;
224
225 spops.spop_schedule();
226 l = spops.spop_lwproc_curlwp();
227 spops.spop_unschedule();
228
229 return l;
230 }
231
232 static pid_t
233 lwproc_getpid(void)
234 {
235 pid_t p;
236
237 spops.spop_schedule();
238 p = spops.spop_getpid();
239 spops.spop_unschedule();
240
241 return p;
242 }
243
244 static void
245 lwproc_execnotify(const char *comm)
246 {
247
248 spops.spop_schedule();
249 spops.spop_execnotify(comm);
250 spops.spop_unschedule();
251 }
252
253 static void
254 lwproc_lwpexit(void)
255 {
256
257 spops.spop_schedule();
258 spops.spop_lwpexit();
259 spops.spop_unschedule();
260 }
261
262 static int
263 rumpsyscall(int sysnum, void *data, register_t *retval)
264 {
265 int rv;
266
267 spops.spop_schedule();
268 rv = spops.spop_syscall(sysnum, data, retval);
269 spops.spop_unschedule();
270
271 return rv;
272 }
273
274 static uint64_t
275 nextreq(struct spclient *spc)
276 {
277 uint64_t nw;
278
279 pthread_mutex_lock(&spc->spc_mtx);
280 nw = spc->spc_nextreq++;
281 pthread_mutex_unlock(&spc->spc_mtx);
282
283 return nw;
284 }
285
286 /*
287 * XXX: we send responses with "blocking" I/O. This is not
288 * ok for the main thread. XXXFIXME
289 */
290
291 static void
292 send_error_resp(struct spclient *spc, uint64_t reqno, enum rumpsp_err error)
293 {
294 struct rsp_hdr rhdr;
295 struct iovec iov[1];
296
297 rhdr.rsp_len = sizeof(rhdr);
298 rhdr.rsp_reqno = reqno;
299 rhdr.rsp_class = RUMPSP_ERROR;
300 rhdr.rsp_type = 0;
301 rhdr.rsp_error = error;
302
303 IOVPUT(iov[0], rhdr);
304
305 sendlock(spc);
306 (void)SENDIOV(spc, iov);
307 sendunlock(spc);
308 }
309
310 static int
311 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
312 {
313 struct rsp_hdr rhdr;
314 struct iovec iov[2];
315 int rv;
316
317 rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
318 rhdr.rsp_reqno = reqno;
319 rhdr.rsp_class = RUMPSP_RESP;
320 rhdr.rsp_type = RUMPSP_HANDSHAKE;
321 rhdr.rsp_error = 0;
322
323 IOVPUT(iov[0], rhdr);
324 IOVPUT(iov[1], error);
325
326 sendlock(spc);
327 rv = SENDIOV(spc, iov);
328 sendunlock(spc);
329
330 return rv;
331 }
332
333 static int
334 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
335 register_t *retval)
336 {
337 struct rsp_hdr rhdr;
338 struct rsp_sysresp sysresp;
339 struct iovec iov[2];
340 int rv;
341
342 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
343 rhdr.rsp_reqno = reqno;
344 rhdr.rsp_class = RUMPSP_RESP;
345 rhdr.rsp_type = RUMPSP_SYSCALL;
346 rhdr.rsp_sysnum = 0;
347
348 sysresp.rsys_error = error;
349 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
350
351 IOVPUT(iov[0], rhdr);
352 IOVPUT(iov[1], sysresp);
353
354 sendlock(spc);
355 rv = SENDIOV(spc, iov);
356 sendunlock(spc);
357
358 return rv;
359 }
360
361 static int
362 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth)
363 {
364 struct rsp_hdr rhdr;
365 struct iovec iov[2];
366 int rv;
367
368 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth);
369 rhdr.rsp_reqno = reqno;
370 rhdr.rsp_class = RUMPSP_RESP;
371 rhdr.rsp_type = RUMPSP_PREFORK;
372 rhdr.rsp_sysnum = 0;
373
374 IOVPUT(iov[0], rhdr);
375 IOVPUT_WITHSIZE(iov[1], auth, AUTHLEN*sizeof(*auth));
376
377 sendlock(spc);
378 rv = SENDIOV(spc, iov);
379 sendunlock(spc);
380
381 return rv;
382 }
383
384 static int
385 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
386 int wantstr, void **resp)
387 {
388 struct rsp_hdr rhdr;
389 struct rsp_copydata copydata;
390 struct respwait rw;
391 struct iovec iov[2];
392 int rv;
393
394 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
395
396 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
397 rhdr.rsp_class = RUMPSP_REQ;
398 if (wantstr)
399 rhdr.rsp_type = RUMPSP_COPYINSTR;
400 else
401 rhdr.rsp_type = RUMPSP_COPYIN;
402 rhdr.rsp_sysnum = 0;
403
404 copydata.rcp_addr = __UNCONST(remaddr);
405 copydata.rcp_len = *dlen;
406
407 IOVPUT(iov[0], rhdr);
408 IOVPUT(iov[1], copydata);
409
410 putwait(spc, &rw, &rhdr);
411 rv = SENDIOV(spc, iov);
412 if (rv) {
413 unputwait(spc, &rw);
414 return rv;
415 }
416
417 rv = waitresp(spc, &rw);
418
419 DPRINTF(("copyin: response %d\n", rv));
420
421 *resp = rw.rw_data;
422 if (wantstr)
423 *dlen = rw.rw_dlen;
424
425 return rv;
426
427 }
428
429 static int
430 send_copyout_req(struct spclient *spc, const void *remaddr,
431 const void *data, size_t dlen)
432 {
433 struct rsp_hdr rhdr;
434 struct rsp_copydata copydata;
435 struct iovec iov[3];
436 int rv;
437
438 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
439
440 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
441 rhdr.rsp_reqno = nextreq(spc);
442 rhdr.rsp_class = RUMPSP_REQ;
443 rhdr.rsp_type = RUMPSP_COPYOUT;
444 rhdr.rsp_sysnum = 0;
445
446 copydata.rcp_addr = __UNCONST(remaddr);
447 copydata.rcp_len = dlen;
448
449 IOVPUT(iov[0], rhdr);
450 IOVPUT(iov[1], copydata);
451 IOVPUT_WITHSIZE(iov[2], __UNCONST(data), dlen);
452
453 sendlock(spc);
454 rv = SENDIOV(spc, iov);
455 sendunlock(spc);
456
457 return rv;
458 }
459
460 static int
461 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
462 {
463 struct rsp_hdr rhdr;
464 struct respwait rw;
465 struct iovec iov[2];
466 int rv;
467
468 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
469
470 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
471 rhdr.rsp_class = RUMPSP_REQ;
472 rhdr.rsp_type = RUMPSP_ANONMMAP;
473 rhdr.rsp_sysnum = 0;
474
475 IOVPUT(iov[0], rhdr);
476 IOVPUT(iov[1], howmuch);
477
478 putwait(spc, &rw, &rhdr);
479 rv = SENDIOV(spc, iov);
480 if (rv) {
481 unputwait(spc, &rw);
482 return rv;
483 }
484
485 rv = waitresp(spc, &rw);
486
487 *resp = rw.rw_data;
488
489 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
490
491 return rv;
492 }
493
494 static int
495 send_raise_req(struct spclient *spc, int signo)
496 {
497 struct rsp_hdr rhdr;
498 struct iovec iov[1];
499 int rv;
500
501 rhdr.rsp_len = sizeof(rhdr);
502 rhdr.rsp_class = RUMPSP_REQ;
503 rhdr.rsp_type = RUMPSP_RAISE;
504 rhdr.rsp_signo = signo;
505
506 IOVPUT(iov[0], rhdr);
507
508 sendlock(spc);
509 rv = SENDIOV(spc, iov);
510 sendunlock(spc);
511
512 return rv;
513 }
514
515 static void
516 spcref(struct spclient *spc)
517 {
518
519 pthread_mutex_lock(&spc->spc_mtx);
520 spc->spc_refcnt++;
521 pthread_mutex_unlock(&spc->spc_mtx);
522 }
523
524 static void
525 spcrelease(struct spclient *spc)
526 {
527 int ref;
528
529 pthread_mutex_lock(&spc->spc_mtx);
530 ref = --spc->spc_refcnt;
531 if (__predict_false(spc->spc_inexec && ref <= 2))
532 pthread_cond_broadcast(&spc->spc_cv);
533 pthread_mutex_unlock(&spc->spc_mtx);
534
535 if (ref > 0)
536 return;
537
538 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
539
540 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
541 _DIAGASSERT(spc->spc_buf == NULL);
542
543 if (spc->spc_mainlwp) {
544 lwproc_switch(spc->spc_mainlwp);
545 lwproc_release();
546 }
547 spc->spc_mainlwp = NULL;
548
549 close(spc->spc_fd);
550 spc->spc_fd = -1;
551 spc->spc_state = SPCSTATE_NEW;
552
553 signaldisco();
554 }
555
556 static void
557 serv_handledisco(unsigned int idx)
558 {
559 struct spclient *spc = &spclist[idx];
560 int dolwpexit;
561
562 DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
563
564 pfdlist[idx].fd = -1;
565 pfdlist[idx].revents = 0;
566 pthread_mutex_lock(&spc->spc_mtx);
567 spc->spc_state = SPCSTATE_DYING;
568 kickall(spc);
569 sendunlockl(spc);
570 /* exec uses mainlwp in another thread, but also nuked all lwps */
571 dolwpexit = !spc->spc_inexec;
572 pthread_mutex_unlock(&spc->spc_mtx);
573
574 if (dolwpexit && spc->spc_mainlwp) {
575 lwproc_switch(spc->spc_mainlwp);
576 lwproc_lwpexit();
577 lwproc_switch(NULL);
578 }
579
580 /*
581 * Nobody's going to attempt to send/receive anymore,
582 * so reinit info relevant to that.
583 */
584 /*LINTED:pointer casts may be ok*/
585 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
586
587 spcrelease(spc);
588 }
589
590 static void
591 serv_shutdown(void)
592 {
593 struct spclient *spc;
594 unsigned int i;
595
596 for (i = 1; i < MAXCLI; i++) {
597 spc = &spclist[i];
598 if (spc->spc_fd == -1)
599 continue;
600
601 shutdown(spc->spc_fd, SHUT_RDWR);
602 serv_handledisco(i);
603
604 spcrelease(spc);
605 }
606 }
607
608 static unsigned
609 serv_handleconn(int fd, connecthook_fn connhook, int busy)
610 {
611 struct sockaddr_storage ss;
612 socklen_t sl = sizeof(ss);
613 int newfd, flags;
614 unsigned i;
615
616 /*LINTED: cast ok */
617 newfd = accept(fd, (struct sockaddr *)&ss, &sl);
618 if (newfd == -1)
619 return 0;
620
621 if (busy) {
622 close(newfd); /* EBUSY */
623 return 0;
624 }
625
626 flags = fcntl(newfd, F_GETFL, 0);
627 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
628 close(newfd);
629 return 0;
630 }
631
632 if (connhook(newfd) != 0) {
633 close(newfd);
634 return 0;
635 }
636
637 /* write out a banner for the client */
638 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL)
639 != (ssize_t)strlen(banner)) {
640 close(newfd);
641 return 0;
642 }
643
644 /* find empty slot the simple way */
645 for (i = 0; i < MAXCLI; i++) {
646 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
647 break;
648 }
649
650 assert(i < MAXCLI);
651
652 pfdlist[i].fd = newfd;
653 spclist[i].spc_fd = newfd;
654 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
655 spclist[i].spc_refcnt = 1;
656
657 TAILQ_INIT(&spclist[i].spc_respwait);
658
659 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i));
660
661 return i;
662 }
663
664 static void
665 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
666 {
667 register_t retval[2] = {0, 0};
668 int rv, sysnum;
669
670 sysnum = (int)rhdr->rsp_sysnum;
671 DPRINTF(("rump_sp: handling syscall %d from client %d\n",
672 sysnum, spc->spc_pid));
673
674 if (__predict_false((rv = lwproc_newlwp(spc->spc_pid)) != 0)) {
675 retval[0] = -1;
676 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
677 return;
678 }
679 spc->spc_syscallreq = rhdr->rsp_reqno;
680 rv = rumpsyscall(sysnum, data, retval);
681 spc->spc_syscallreq = 0;
682 lwproc_release();
683
684 DPRINTF(("rump_sp: got return value %d & %d/%d\n",
685 rv, retval[0], retval[1]));
686
687 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
688 }
689
690 static void
691 serv_handleexec(struct spclient *spc, struct rsp_hdr *rhdr, char *comm)
692 {
693 size_t commlen = rhdr->rsp_len - HDRSZ;
694
695 pthread_mutex_lock(&spc->spc_mtx);
696 /* one for the connection and one for us */
697 while (spc->spc_refcnt > 2)
698 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
699 pthread_mutex_unlock(&spc->spc_mtx);
700
701 /*
702 * ok, all the threads are dead (or one is still alive and
703 * the connection is dead, in which case this doesn't matter
704 * very much). proceed with exec.
705 */
706
707 /* ensure comm is 0-terminated */
708 /* TODO: make sure it contains sensible chars? */
709 comm[commlen] = '\0';
710
711 lwproc_switch(spc->spc_mainlwp);
712 lwproc_execnotify(comm);
713 lwproc_switch(NULL);
714
715 pthread_mutex_lock(&spc->spc_mtx);
716 spc->spc_inexec = 0;
717 pthread_mutex_unlock(&spc->spc_mtx);
718 send_handshake_resp(spc, rhdr->rsp_reqno, 0);
719 }
720
721 enum sbatype { SBA_SYSCALL, SBA_EXEC };
722
723 struct servbouncearg {
724 struct spclient *sba_spc;
725 struct rsp_hdr sba_hdr;
726 enum sbatype sba_type;
727 uint8_t *sba_data;
728
729 TAILQ_ENTRY(servbouncearg) sba_entries;
730 };
731 static pthread_mutex_t sbamtx;
732 static pthread_cond_t sbacv;
733 static int nworker, idleworker, nwork;
734 static TAILQ_HEAD(, servbouncearg) wrklist = TAILQ_HEAD_INITIALIZER(wrklist);
735
736 /*ARGSUSED*/
737 static void *
738 serv_workbouncer(void *arg)
739 {
740 struct servbouncearg *sba;
741
742 for (;;) {
743 pthread_mutex_lock(&sbamtx);
744 if (__predict_false(idleworker - nwork >= rumpsp_idleworker)) {
745 nworker--;
746 pthread_mutex_unlock(&sbamtx);
747 break;
748 }
749 idleworker++;
750 while (TAILQ_EMPTY(&wrklist)) {
751 _DIAGASSERT(nwork == 0);
752 pthread_cond_wait(&sbacv, &sbamtx);
753 }
754 idleworker--;
755
756 sba = TAILQ_FIRST(&wrklist);
757 TAILQ_REMOVE(&wrklist, sba, sba_entries);
758 nwork--;
759 pthread_mutex_unlock(&sbamtx);
760
761 if (__predict_true(sba->sba_type == SBA_SYSCALL)) {
762 serv_handlesyscall(sba->sba_spc,
763 &sba->sba_hdr, sba->sba_data);
764 } else {
765 _DIAGASSERT(sba->sba_type == SBA_EXEC);
766 serv_handleexec(sba->sba_spc, &sba->sba_hdr,
767 (char *)sba->sba_data);
768 }
769 spcrelease(sba->sba_spc);
770 free(sba->sba_data);
771 free(sba);
772 }
773
774 return NULL;
775 }
776
777 static int
778 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
779 {
780 struct spclient *spc = arg;
781 void *rdata = NULL; /* XXXuninit */
782 int rv, nlocks;
783
784 rumpuser__unschedule(0, &nlocks, NULL);
785
786 rv = copyin_req(spc, raddr, len, wantstr, &rdata);
787 if (rv)
788 goto out;
789
790 memcpy(laddr, rdata, *len);
791 free(rdata);
792
793 out:
794 rumpuser__reschedule(nlocks, NULL);
795 if (rv)
796 return EFAULT;
797 return 0;
798 }
799
800 int
801 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
802 {
803
804 return sp_copyin(arg, raddr, laddr, &len, 0);
805 }
806
807 int
808 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
809 {
810
811 return sp_copyin(arg, raddr, laddr, len, 1);
812 }
813
814 static int
815 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
816 {
817 struct spclient *spc = arg;
818 int nlocks, rv;
819
820 rumpuser__unschedule(0, &nlocks, NULL);
821 rv = send_copyout_req(spc, raddr, laddr, dlen);
822 rumpuser__reschedule(nlocks, NULL);
823
824 if (rv)
825 return EFAULT;
826 return 0;
827 }
828
829 int
830 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
831 {
832
833 return sp_copyout(arg, laddr, raddr, dlen);
834 }
835
836 int
837 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
838 {
839
840 return sp_copyout(arg, laddr, raddr, *dlen);
841 }
842
843 int
844 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
845 {
846 struct spclient *spc = arg;
847 void *resp, *rdata;
848 int nlocks, rv;
849
850 rumpuser__unschedule(0, &nlocks, NULL);
851
852 rv = anonmmap_req(spc, howmuch, &rdata);
853 if (rv) {
854 rv = EFAULT;
855 goto out;
856 }
857
858 resp = *(void **)rdata;
859 free(rdata);
860
861 if (resp == NULL) {
862 rv = ENOMEM;
863 }
864
865 *addr = resp;
866
867 out:
868 rumpuser__reschedule(nlocks, NULL);
869
870 if (rv)
871 return rv;
872 return 0;
873 }
874
875 int
876 rumpuser_sp_raise(void *arg, int signo)
877 {
878 struct spclient *spc = arg;
879 int rv, nlocks;
880
881 rumpuser__unschedule(0, &nlocks, NULL);
882 rv = send_raise_req(spc, signo);
883 rumpuser__reschedule(nlocks, NULL);
884
885 return rv;
886 }
887
888 static pthread_attr_t pattr_detached;
889 static void
890 schedulework(struct spclient *spc, enum sbatype sba_type)
891 {
892 struct servbouncearg *sba;
893 pthread_t pt;
894 uint64_t reqno;
895 int retries = 0;
896
897 reqno = spc->spc_hdr.rsp_reqno;
898 while ((sba = malloc(sizeof(*sba))) == NULL) {
899 if (nworker == 0 || retries > 10) {
900 send_error_resp(spc, reqno, RUMPSP_ERR_TRYAGAIN);
901 spcfreebuf(spc);
902 return;
903 }
904 /* slim chance of more memory? */
905 usleep(10000);
906 }
907
908 sba->sba_spc = spc;
909 sba->sba_type = sba_type;
910 sba->sba_hdr = spc->spc_hdr;
911 sba->sba_data = spc->spc_buf;
912 spcresetbuf(spc);
913
914 spcref(spc);
915
916 pthread_mutex_lock(&sbamtx);
917 TAILQ_INSERT_TAIL(&wrklist, sba, sba_entries);
918 nwork++;
919 if (nwork <= idleworker) {
920 /* do we have a daemon's tool (i.e. idle threads)? */
921 pthread_cond_signal(&sbacv);
922 } else if (nworker < rumpsp_maxworker) {
923 /*
924 * Else, need to create one
925 * (if we can, otherwise just expect another
926 * worker to pick up the syscall)
927 */
928 if (pthread_create(&pt, &pattr_detached,
929 serv_workbouncer, NULL) == 0) {
930 nworker++;
931 }
932 }
933 pthread_mutex_unlock(&sbamtx);
934 }
935
936 /*
937 *
938 * Startup routines and mainloop for server.
939 *
940 */
941
942 struct spservarg {
943 int sps_sock;
944 connecthook_fn sps_connhook;
945 };
946
947 static void
948 handlereq(struct spclient *spc)
949 {
950 uint64_t reqno;
951 int error, i;
952
953 reqno = spc->spc_hdr.rsp_reqno;
954 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
955 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
956 send_error_resp(spc, reqno, RUMPSP_ERR_AUTH);
957 shutdown(spc->spc_fd, SHUT_RDWR);
958 spcfreebuf(spc);
959 return;
960 }
961
962 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) {
963 char *comm = (char *)spc->spc_buf;
964 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ;
965
966 /* ensure it's 0-terminated */
967 /* XXX make sure it contains sensible chars? */
968 comm[commlen] = '\0';
969
970 if ((error = lwproc_rfork(spc,
971 RUMP_RFCFDG, comm)) != 0) {
972 shutdown(spc->spc_fd, SHUT_RDWR);
973 }
974
975 spcfreebuf(spc);
976 if (error)
977 return;
978
979 spc->spc_mainlwp = lwproc_curlwp();
980
981 send_handshake_resp(spc, reqno, 0);
982 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) {
983 struct lwp *tmpmain;
984 struct prefork *pf;
985 struct handshake_fork *rfp;
986 int cancel;
987
988 if (spc->spc_off-HDRSZ != sizeof(*rfp)) {
989 send_error_resp(spc, reqno,
990 RUMPSP_ERR_MALFORMED_REQUEST);
991 shutdown(spc->spc_fd, SHUT_RDWR);
992 spcfreebuf(spc);
993 return;
994 }
995
996 /*LINTED*/
997 rfp = (void *)spc->spc_buf;
998 cancel = rfp->rf_cancel;
999
1000 pthread_mutex_lock(&pfmtx);
1001 LIST_FOREACH(pf, &preforks, pf_entries) {
1002 if (memcmp(rfp->rf_auth, pf->pf_auth,
1003 sizeof(rfp->rf_auth)) == 0) {
1004 LIST_REMOVE(pf, pf_entries);
1005 LIST_REMOVE(pf, pf_spcentries);
1006 break;
1007 }
1008 }
1009 pthread_mutex_lock(&pfmtx);
1010 spcfreebuf(spc);
1011
1012 if (!pf) {
1013 send_error_resp(spc, reqno,
1014 RUMPSP_ERR_INVALID_PREFORK);
1015 shutdown(spc->spc_fd, SHUT_RDWR);
1016 return;
1017 }
1018
1019 tmpmain = pf->pf_lwp;
1020 free(pf);
1021 lwproc_switch(tmpmain);
1022 if (cancel) {
1023 lwproc_release();
1024 shutdown(spc->spc_fd, SHUT_RDWR);
1025 return;
1026 }
1027
1028 /*
1029 * So, we forked already during "prefork" to save
1030 * the file descriptors from a parent exit
1031 * race condition. But now we need to fork
1032 * a second time since the initial fork has
1033 * the wrong spc pointer. (yea, optimize
1034 * interfaces some day if anyone cares)
1035 */
1036 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) {
1037 send_error_resp(spc, reqno,
1038 RUMPSP_ERR_RFORK_FAILED);
1039 shutdown(spc->spc_fd, SHUT_RDWR);
1040 lwproc_release();
1041 return;
1042 }
1043 spc->spc_mainlwp = lwproc_curlwp();
1044 lwproc_switch(tmpmain);
1045 lwproc_release();
1046 lwproc_switch(spc->spc_mainlwp);
1047
1048 send_handshake_resp(spc, reqno, 0);
1049 } else {
1050 send_error_resp(spc, reqno, RUMPSP_ERR_AUTH);
1051 shutdown(spc->spc_fd, SHUT_RDWR);
1052 spcfreebuf(spc);
1053 return;
1054 }
1055
1056 spc->spc_pid = lwproc_getpid();
1057
1058 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n",
1059 spc, spc->spc_pid));
1060
1061 lwproc_switch(NULL);
1062 spc->spc_state = SPCSTATE_RUNNING;
1063 return;
1064 }
1065
1066 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) {
1067 struct prefork *pf;
1068 uint32_t auth[AUTHLEN];
1069 int inexec;
1070
1071 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc));
1072 spcfreebuf(spc);
1073
1074 pthread_mutex_lock(&spc->spc_mtx);
1075 inexec = spc->spc_inexec;
1076 pthread_mutex_unlock(&spc->spc_mtx);
1077 if (inexec) {
1078 send_error_resp(spc, reqno, RUMPSP_ERR_INEXEC);
1079 shutdown(spc->spc_fd, SHUT_RDWR);
1080 return;
1081 }
1082
1083 pf = malloc(sizeof(*pf));
1084 if (pf == NULL) {
1085 send_error_resp(spc, reqno, RUMPSP_ERR_NOMEM);
1086 return;
1087 }
1088
1089 /*
1090 * Use client main lwp to fork. this is never used by
1091 * worker threads (except in exec, but we checked for that
1092 * above) so we can safely use it here.
1093 */
1094 lwproc_switch(spc->spc_mainlwp);
1095 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) {
1096 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc));
1097 send_error_resp(spc, reqno, RUMPSP_ERR_RFORK_FAILED);
1098 lwproc_switch(NULL);
1099 free(pf);
1100 return;
1101 }
1102
1103 /* Ok, we have a new process context and a new curlwp */
1104 for (i = 0; i < AUTHLEN; i++) {
1105 pf->pf_auth[i] = auth[i] = arc4random();
1106 }
1107 pf->pf_lwp = lwproc_curlwp();
1108 lwproc_switch(NULL);
1109
1110 pthread_mutex_lock(&pfmtx);
1111 LIST_INSERT_HEAD(&preforks, pf, pf_entries);
1112 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries);
1113 pthread_mutex_unlock(&pfmtx);
1114
1115 DPRINTF(("rump_sp: prefork handler success %p\n", spc));
1116
1117 send_prefork_resp(spc, reqno, auth);
1118 return;
1119 }
1120
1121 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) {
1122 int inexec;
1123
1124 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) {
1125 send_error_resp(spc, reqno,
1126 RUMPSP_ERR_MALFORMED_REQUEST);
1127 shutdown(spc->spc_fd, SHUT_RDWR);
1128 spcfreebuf(spc);
1129 return;
1130 }
1131
1132 pthread_mutex_lock(&spc->spc_mtx);
1133 inexec = spc->spc_inexec;
1134 pthread_mutex_unlock(&spc->spc_mtx);
1135 if (inexec) {
1136 send_error_resp(spc, reqno, RUMPSP_ERR_INEXEC);
1137 shutdown(spc->spc_fd, SHUT_RDWR);
1138 spcfreebuf(spc);
1139 return;
1140 }
1141
1142 pthread_mutex_lock(&spc->spc_mtx);
1143 spc->spc_inexec = 1;
1144 pthread_mutex_unlock(&spc->spc_mtx);
1145
1146 /*
1147 * start to drain lwps. we will wait for it to finish
1148 * in another thread
1149 */
1150 lwproc_switch(spc->spc_mainlwp);
1151 lwproc_lwpexit();
1152 lwproc_switch(NULL);
1153
1154 /*
1155 * exec has to wait for lwps to drain, so finish it off
1156 * in another thread
1157 */
1158 schedulework(spc, SBA_EXEC);
1159 return;
1160 }
1161
1162 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
1163 send_error_resp(spc, reqno, RUMPSP_ERR_MALFORMED_REQUEST);
1164 spcfreebuf(spc);
1165 return;
1166 }
1167
1168 schedulework(spc, SBA_SYSCALL);
1169 }
1170
1171 static void *
1172 spserver(void *arg)
1173 {
1174 struct spservarg *sarg = arg;
1175 struct spclient *spc;
1176 unsigned idx;
1177 int seen;
1178 int rv;
1179 unsigned int nfds, maxidx;
1180
1181 for (idx = 0; idx < MAXCLI; idx++) {
1182 pfdlist[idx].fd = -1;
1183 pfdlist[idx].events = POLLIN;
1184
1185 spc = &spclist[idx];
1186 pthread_mutex_init(&spc->spc_mtx, NULL);
1187 pthread_cond_init(&spc->spc_cv, NULL);
1188 spc->spc_fd = -1;
1189 }
1190 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
1191 pfdlist[0].events = POLLIN;
1192 nfds = 1;
1193 maxidx = 0;
1194
1195 pthread_attr_init(&pattr_detached);
1196 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
1197 #if NOTYET
1198 pthread_attr_setstacksize(&pattr_detached, 32*1024);
1199 #endif
1200
1201 pthread_mutex_init(&sbamtx, NULL);
1202 pthread_cond_init(&sbacv, NULL);
1203
1204 DPRINTF(("rump_sp: server mainloop\n"));
1205
1206 for (;;) {
1207 int discoed;
1208
1209 /* g/c hangarounds (eventually) */
1210 discoed = getdisco();
1211 while (discoed--) {
1212 nfds--;
1213 idx = maxidx;
1214 while (idx) {
1215 if (pfdlist[idx].fd != -1) {
1216 maxidx = idx;
1217 break;
1218 }
1219 idx--;
1220 }
1221 DPRINTF(("rump_sp: set maxidx to [%u]\n",
1222 maxidx));
1223 }
1224
1225 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
1226 seen = 0;
1227 rv = poll(pfdlist, maxidx+1, INFTIM);
1228 assert(maxidx+1 <= MAXCLI);
1229 assert(rv != 0);
1230 if (rv == -1) {
1231 if (errno == EINTR)
1232 continue;
1233 fprintf(stderr, "rump_spserver: poll returned %d\n",
1234 errno);
1235 break;
1236 }
1237
1238 for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
1239 if ((pfdlist[idx].revents & POLLIN) == 0)
1240 continue;
1241
1242 seen++;
1243 DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
1244 idx, seen, rv));
1245 if (idx > 0) {
1246 spc = &spclist[idx];
1247 DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
1248 switch (readframe(spc)) {
1249 case 0:
1250 break;
1251 case -1:
1252 serv_handledisco(idx);
1253 break;
1254 default:
1255 switch (spc->spc_hdr.rsp_class) {
1256 case RUMPSP_RESP:
1257 kickwaiter(spc);
1258 break;
1259 case RUMPSP_REQ:
1260 handlereq(spc);
1261 break;
1262 default:
1263 send_error_resp(spc,
1264 spc->spc_hdr.rsp_reqno,
1265 RUMPSP_ERR_MALFORMED_REQUEST);
1266 spcfreebuf(spc);
1267 break;
1268 }
1269 break;
1270 }
1271
1272 } else {
1273 DPRINTF(("rump_sp: mainloop new connection\n"));
1274
1275 if (__predict_false(spfini)) {
1276 close(spclist[0].spc_fd);
1277 serv_shutdown();
1278 goto out;
1279 }
1280
1281 idx = serv_handleconn(pfdlist[0].fd,
1282 sarg->sps_connhook, nfds == MAXCLI);
1283 if (idx)
1284 nfds++;
1285 if (idx > maxidx)
1286 maxidx = idx;
1287 DPRINTF(("rump_sp: maxid now %d\n", maxidx));
1288 }
1289 }
1290 }
1291
1292 out:
1293 return NULL;
1294 }
1295
1296 static unsigned cleanupidx;
1297 static struct sockaddr *cleanupsa;
1298 int
1299 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
1300 const char *ostype, const char *osrelease, const char *machine)
1301 {
1302 pthread_t pt;
1303 struct spservarg *sarg;
1304 struct sockaddr *sap;
1305 char *p;
1306 unsigned idx;
1307 int error, s;
1308
1309 p = strdup(url);
1310 if (p == NULL)
1311 return ENOMEM;
1312 error = parseurl(p, &sap, &idx, 1);
1313 free(p);
1314 if (error)
1315 return error;
1316
1317 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
1318 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
1319
1320 s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
1321 if (s == -1)
1322 return errno;
1323
1324 spops = *spopsp;
1325 sarg = malloc(sizeof(*sarg));
1326 if (sarg == NULL) {
1327 close(s);
1328 return ENOMEM;
1329 }
1330
1331 sarg->sps_sock = s;
1332 sarg->sps_connhook = parsetab[idx].connhook;
1333
1334 cleanupidx = idx;
1335 cleanupsa = sap;
1336
1337 /* sloppy error recovery */
1338
1339 /*LINTED*/
1340 if (bind(s, sap, parsetab[idx].slen) == -1) {
1341 fprintf(stderr, "rump_sp: server bind failed\n");
1342 return errno;
1343 }
1344
1345 if (listen(s, MAXCLI) == -1) {
1346 fprintf(stderr, "rump_sp: server listen failed\n");
1347 return errno;
1348 }
1349
1350 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
1351 fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
1352 return errno;
1353 }
1354 pthread_detach(pt);
1355
1356 return 0;
1357 }
1358
1359 void
1360 rumpuser_sp_fini(void *arg)
1361 {
1362 struct spclient *spc = arg;
1363 register_t retval[2] = {0, 0};
1364
1365 if (spclist[0].spc_fd) {
1366 parsetab[cleanupidx].cleanup(cleanupsa);
1367 }
1368
1369 /*
1370 * stuff response into the socket, since this process is just
1371 * about to exit
1372 */
1373 if (spc && spc->spc_syscallreq)
1374 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval);
1375
1376 if (spclist[0].spc_fd) {
1377 shutdown(spclist[0].spc_fd, SHUT_RDWR);
1378 spfini = 1;
1379 }
1380 }
1381