rumpuser_sp.c revision 1.45 1 /* $NetBSD: rumpuser_sp.c,v 1.45 2011/03/08 15:34:37 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Sysproxy routines. This provides system RPC support over host sockets.
30 * The most notable limitation is that the client and server must share
31 * the same ABI. This does not mean that they have to be the same
32 * machine or that they need to run the same version of the host OS,
33 * just that they must agree on the data structures. This even *might*
34 * work correctly from one hardware architecture to another.
35 */
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.45 2011/03/08 15:34:37 pooka Exp $");
39
40 #include <sys/types.h>
41 #include <sys/atomic.h>
42 #include <sys/mman.h>
43 #include <sys/socket.h>
44
45 #include <arpa/inet.h>
46 #include <netinet/in.h>
47 #include <netinet/tcp.h>
48
49 #include <assert.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <poll.h>
53 #include <pthread.h>
54 #include <stdarg.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59
60 #include <rump/rump.h> /* XXX: for rfork flags */
61 #include <rump/rumpuser.h>
62 #include "rumpuser_int.h"
63
64 #include "sp_common.c"
65
66 #ifndef MAXCLI
67 #define MAXCLI 256
68 #endif
69 #ifndef MAXWORKER
70 #define MAXWORKER 128
71 #endif
72 #ifndef IDLEWORKER
73 #define IDLEWORKER 16
74 #endif
75 int rumpsp_maxworker = MAXWORKER;
76 int rumpsp_idleworker = IDLEWORKER;
77
78 static struct pollfd pfdlist[MAXCLI];
79 static struct spclient spclist[MAXCLI];
80 static unsigned int disco;
81 static volatile int spfini;
82
83 static struct rumpuser_sp_ops spops;
84
85 static char banner[MAXBANNER];
86
87 #define PROTOMAJOR 0
88 #define PROTOMINOR 3
89
90 struct prefork {
91 uint32_t pf_auth[AUTHLEN];
92 struct lwp *pf_lwp;
93
94 LIST_ENTRY(prefork) pf_entries; /* global list */
95 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */
96 };
97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks);
98 static pthread_mutex_t pfmtx;
99
100 /*
101 * This version is for the server. It's optimized for multiple threads
102 * and is *NOT* reentrant wrt to signals.
103 */
104 static int
105 waitresp(struct spclient *spc, struct respwait *rw)
106 {
107 int spcstate;
108 int rv = 0;
109
110 pthread_mutex_lock(&spc->spc_mtx);
111 sendunlockl(spc);
112 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) {
113 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
114 }
115 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
116 spcstate = spc->spc_state;
117 pthread_mutex_unlock(&spc->spc_mtx);
118
119 pthread_cond_destroy(&rw->rw_cv);
120
121 if (rv)
122 return rv;
123 if (spcstate == SPCSTATE_DYING)
124 return ENOTCONN;
125 return rw->rw_error;
126 }
127
128 /*
129 * Manual wrappers, since librump does not have access to the
130 * user namespace wrapped interfaces.
131 */
132
133 static void
134 lwproc_switch(struct lwp *l)
135 {
136
137 spops.spop_schedule();
138 spops.spop_lwproc_switch(l);
139 spops.spop_unschedule();
140 }
141
142 static void
143 lwproc_release(void)
144 {
145
146 spops.spop_schedule();
147 spops.spop_lwproc_release();
148 spops.spop_unschedule();
149 }
150
151 static int
152 lwproc_rfork(struct spclient *spc, int flags, const char *comm)
153 {
154 int rv;
155
156 spops.spop_schedule();
157 rv = spops.spop_lwproc_rfork(spc, flags, comm);
158 spops.spop_unschedule();
159
160 return rv;
161 }
162
163 static int
164 lwproc_newlwp(pid_t pid)
165 {
166 int rv;
167
168 spops.spop_schedule();
169 rv = spops.spop_lwproc_newlwp(pid);
170 spops.spop_unschedule();
171
172 return rv;
173 }
174
175 static struct lwp *
176 lwproc_curlwp(void)
177 {
178 struct lwp *l;
179
180 spops.spop_schedule();
181 l = spops.spop_lwproc_curlwp();
182 spops.spop_unschedule();
183
184 return l;
185 }
186
187 static pid_t
188 lwproc_getpid(void)
189 {
190 pid_t p;
191
192 spops.spop_schedule();
193 p = spops.spop_getpid();
194 spops.spop_unschedule();
195
196 return p;
197 }
198
199 static void
200 lwproc_execnotify(const char *comm)
201 {
202
203 spops.spop_schedule();
204 spops.spop_execnotify(comm);
205 spops.spop_unschedule();
206 }
207
208 static void
209 lwproc_lwpexit(void)
210 {
211
212 spops.spop_schedule();
213 spops.spop_lwpexit();
214 spops.spop_unschedule();
215 }
216
217 static int
218 rumpsyscall(int sysnum, void *data, register_t *retval)
219 {
220 int rv;
221
222 spops.spop_schedule();
223 rv = spops.spop_syscall(sysnum, data, retval);
224 spops.spop_unschedule();
225
226 return rv;
227 }
228
229 static uint64_t
230 nextreq(struct spclient *spc)
231 {
232 uint64_t nw;
233
234 pthread_mutex_lock(&spc->spc_mtx);
235 nw = spc->spc_nextreq++;
236 pthread_mutex_unlock(&spc->spc_mtx);
237
238 return nw;
239 }
240
241 /*
242 * XXX: we send responses with "blocking" I/O. This is not
243 * ok for the main thread. XXXFIXME
244 */
245
246 static void
247 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
248 {
249 struct rsp_hdr rhdr;
250 struct iovec iov[1];
251
252 rhdr.rsp_len = sizeof(rhdr);
253 rhdr.rsp_reqno = reqno;
254 rhdr.rsp_class = RUMPSP_ERROR;
255 rhdr.rsp_type = 0;
256 rhdr.rsp_error = error;
257
258 IOVPUT(iov[0], rhdr);
259
260 sendlock(spc);
261 (void)SENDIOV(spc, iov);
262 sendunlock(spc);
263 }
264
265 static int
266 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
267 {
268 struct rsp_hdr rhdr;
269 struct iovec iov[2];
270 int rv;
271
272 rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
273 rhdr.rsp_reqno = reqno;
274 rhdr.rsp_class = RUMPSP_RESP;
275 rhdr.rsp_type = RUMPSP_HANDSHAKE;
276 rhdr.rsp_error = 0;
277
278 IOVPUT(iov[0], rhdr);
279 IOVPUT(iov[1], error);
280
281 sendlock(spc);
282 rv = SENDIOV(spc, iov);
283 sendunlock(spc);
284
285 return rv;
286 }
287
288 static int
289 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
290 register_t *retval)
291 {
292 struct rsp_hdr rhdr;
293 struct rsp_sysresp sysresp;
294 struct iovec iov[2];
295 int rv;
296
297 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
298 rhdr.rsp_reqno = reqno;
299 rhdr.rsp_class = RUMPSP_RESP;
300 rhdr.rsp_type = RUMPSP_SYSCALL;
301 rhdr.rsp_sysnum = 0;
302
303 sysresp.rsys_error = error;
304 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
305
306 IOVPUT(iov[0], rhdr);
307 IOVPUT(iov[1], sysresp);
308
309 sendlock(spc);
310 rv = SENDIOV(spc, iov);
311 sendunlock(spc);
312
313 return rv;
314 }
315
316 static int
317 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth)
318 {
319 struct rsp_hdr rhdr;
320 struct iovec iov[2];
321 int rv;
322
323 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth);
324 rhdr.rsp_reqno = reqno;
325 rhdr.rsp_class = RUMPSP_RESP;
326 rhdr.rsp_type = RUMPSP_PREFORK;
327 rhdr.rsp_sysnum = 0;
328
329 IOVPUT(iov[0], rhdr);
330 IOVPUT_WITHSIZE(iov[1], auth, AUTHLEN*sizeof(*auth));
331
332 sendlock(spc);
333 rv = SENDIOV(spc, iov);
334 sendunlock(spc);
335
336 return rv;
337 }
338
339 static int
340 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
341 int wantstr, void **resp)
342 {
343 struct rsp_hdr rhdr;
344 struct rsp_copydata copydata;
345 struct respwait rw;
346 struct iovec iov[2];
347 int rv;
348
349 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
350
351 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
352 rhdr.rsp_class = RUMPSP_REQ;
353 if (wantstr)
354 rhdr.rsp_type = RUMPSP_COPYINSTR;
355 else
356 rhdr.rsp_type = RUMPSP_COPYIN;
357 rhdr.rsp_sysnum = 0;
358
359 copydata.rcp_addr = __UNCONST(remaddr);
360 copydata.rcp_len = *dlen;
361
362 IOVPUT(iov[0], rhdr);
363 IOVPUT(iov[1], copydata);
364
365 putwait(spc, &rw, &rhdr);
366 rv = SENDIOV(spc, iov);
367 if (rv) {
368 unputwait(spc, &rw);
369 return rv;
370 }
371
372 rv = waitresp(spc, &rw);
373
374 DPRINTF(("copyin: response %d\n", rv));
375
376 *resp = rw.rw_data;
377 if (wantstr)
378 *dlen = rw.rw_dlen;
379
380 return rv;
381
382 }
383
384 static int
385 send_copyout_req(struct spclient *spc, const void *remaddr,
386 const void *data, size_t dlen)
387 {
388 struct rsp_hdr rhdr;
389 struct rsp_copydata copydata;
390 struct iovec iov[3];
391 int rv;
392
393 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
394
395 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
396 rhdr.rsp_reqno = nextreq(spc);
397 rhdr.rsp_class = RUMPSP_REQ;
398 rhdr.rsp_type = RUMPSP_COPYOUT;
399 rhdr.rsp_sysnum = 0;
400
401 copydata.rcp_addr = __UNCONST(remaddr);
402 copydata.rcp_len = dlen;
403
404 IOVPUT(iov[0], rhdr);
405 IOVPUT(iov[1], copydata);
406 IOVPUT_WITHSIZE(iov[2], __UNCONST(data), dlen);
407
408 sendlock(spc);
409 rv = SENDIOV(spc, iov);
410 sendunlock(spc);
411
412 return rv;
413 }
414
415 static int
416 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
417 {
418 struct rsp_hdr rhdr;
419 struct respwait rw;
420 struct iovec iov[2];
421 int rv;
422
423 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
424
425 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
426 rhdr.rsp_class = RUMPSP_REQ;
427 rhdr.rsp_type = RUMPSP_ANONMMAP;
428 rhdr.rsp_sysnum = 0;
429
430 IOVPUT(iov[0], rhdr);
431 IOVPUT(iov[1], howmuch);
432
433 putwait(spc, &rw, &rhdr);
434 rv = SENDIOV(spc, iov);
435 if (rv) {
436 unputwait(spc, &rw);
437 return rv;
438 }
439
440 rv = waitresp(spc, &rw);
441
442 *resp = rw.rw_data;
443
444 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
445
446 return rv;
447 }
448
449 static int
450 send_raise_req(struct spclient *spc, int signo)
451 {
452 struct rsp_hdr rhdr;
453 struct iovec iov[1];
454 int rv;
455
456 rhdr.rsp_len = sizeof(rhdr);
457 rhdr.rsp_class = RUMPSP_REQ;
458 rhdr.rsp_type = RUMPSP_RAISE;
459 rhdr.rsp_signo = signo;
460
461 IOVPUT(iov[0], rhdr);
462
463 sendlock(spc);
464 rv = SENDIOV(spc, iov);
465 sendunlock(spc);
466
467 return rv;
468 }
469
470 static void
471 spcref(struct spclient *spc)
472 {
473
474 pthread_mutex_lock(&spc->spc_mtx);
475 spc->spc_refcnt++;
476 pthread_mutex_unlock(&spc->spc_mtx);
477 }
478
479 static void
480 spcrelease(struct spclient *spc)
481 {
482 int ref;
483
484 pthread_mutex_lock(&spc->spc_mtx);
485 ref = --spc->spc_refcnt;
486 if (__predict_false(spc->spc_inexec && ref <= 2))
487 pthread_cond_broadcast(&spc->spc_cv);
488 pthread_mutex_unlock(&spc->spc_mtx);
489
490 if (ref > 0)
491 return;
492
493 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
494
495 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
496 _DIAGASSERT(spc->spc_buf == NULL);
497
498 if (spc->spc_mainlwp) {
499 lwproc_switch(spc->spc_mainlwp);
500 lwproc_release();
501 }
502 spc->spc_mainlwp = NULL;
503
504 close(spc->spc_fd);
505 spc->spc_fd = -1;
506 spc->spc_state = SPCSTATE_NEW;
507
508 atomic_inc_uint(&disco);
509 }
510
511 static void
512 serv_handledisco(unsigned int idx)
513 {
514 struct spclient *spc = &spclist[idx];
515 int dolwpexit;
516
517 DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
518
519 pfdlist[idx].fd = -1;
520 pfdlist[idx].revents = 0;
521 pthread_mutex_lock(&spc->spc_mtx);
522 spc->spc_state = SPCSTATE_DYING;
523 kickall(spc);
524 sendunlockl(spc);
525 /* exec uses mainlwp in another thread, but also nuked all lwps */
526 dolwpexit = !spc->spc_inexec;
527 pthread_mutex_unlock(&spc->spc_mtx);
528
529 if (dolwpexit && spc->spc_mainlwp) {
530 lwproc_switch(spc->spc_mainlwp);
531 lwproc_lwpexit();
532 lwproc_switch(NULL);
533 }
534
535 /*
536 * Nobody's going to attempt to send/receive anymore,
537 * so reinit info relevant to that.
538 */
539 /*LINTED:pointer casts may be ok*/
540 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
541
542 spcrelease(spc);
543 }
544
545 static void
546 serv_shutdown(void)
547 {
548 struct spclient *spc;
549 unsigned int i;
550
551 for (i = 1; i < MAXCLI; i++) {
552 spc = &spclist[i];
553 if (spc->spc_fd == -1)
554 continue;
555
556 shutdown(spc->spc_fd, SHUT_RDWR);
557 serv_handledisco(i);
558
559 spcrelease(spc);
560 }
561 }
562
563 static unsigned
564 serv_handleconn(int fd, connecthook_fn connhook, int busy)
565 {
566 struct sockaddr_storage ss;
567 socklen_t sl = sizeof(ss);
568 int newfd, flags;
569 unsigned i;
570
571 /*LINTED: cast ok */
572 newfd = accept(fd, (struct sockaddr *)&ss, &sl);
573 if (newfd == -1)
574 return 0;
575
576 if (busy) {
577 close(newfd); /* EBUSY */
578 return 0;
579 }
580
581 flags = fcntl(newfd, F_GETFL, 0);
582 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
583 close(newfd);
584 return 0;
585 }
586
587 if (connhook(newfd) != 0) {
588 close(newfd);
589 return 0;
590 }
591
592 /* write out a banner for the client */
593 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL)
594 != (ssize_t)strlen(banner)) {
595 close(newfd);
596 return 0;
597 }
598
599 /* find empty slot the simple way */
600 for (i = 0; i < MAXCLI; i++) {
601 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
602 break;
603 }
604
605 assert(i < MAXCLI);
606
607 pfdlist[i].fd = newfd;
608 spclist[i].spc_fd = newfd;
609 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
610 spclist[i].spc_refcnt = 1;
611
612 TAILQ_INIT(&spclist[i].spc_respwait);
613
614 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i));
615
616 return i;
617 }
618
619 static void
620 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
621 {
622 register_t retval[2] = {0, 0};
623 int rv, sysnum;
624
625 sysnum = (int)rhdr->rsp_sysnum;
626 DPRINTF(("rump_sp: handling syscall %d from client %d\n",
627 sysnum, spc->spc_pid));
628
629 if (__predict_false((rv = lwproc_newlwp(spc->spc_pid)) != 0)) {
630 retval[0] = -1;
631 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
632 return;
633 }
634 spc->spc_syscallreq = rhdr->rsp_reqno;
635 rv = rumpsyscall(sysnum, data, retval);
636 spc->spc_syscallreq = 0;
637 lwproc_release();
638
639 DPRINTF(("rump_sp: got return value %d & %d/%d\n",
640 rv, retval[0], retval[1]));
641
642 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
643 }
644
645 static void
646 serv_handleexec(struct spclient *spc, struct rsp_hdr *rhdr, char *comm)
647 {
648 size_t commlen = rhdr->rsp_len - HDRSZ;
649
650 pthread_mutex_lock(&spc->spc_mtx);
651 /* one for the connection and one for us */
652 while (spc->spc_refcnt > 2)
653 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
654 pthread_mutex_unlock(&spc->spc_mtx);
655
656 /*
657 * ok, all the threads are dead (or one is still alive and
658 * the connection is dead, in which case this doesn't matter
659 * very much). proceed with exec.
660 */
661
662 /* ensure comm is 0-terminated */
663 /* TODO: make sure it contains sensible chars? */
664 comm[commlen] = '\0';
665
666 lwproc_switch(spc->spc_mainlwp);
667 lwproc_execnotify(comm);
668 lwproc_switch(NULL);
669
670 pthread_mutex_lock(&spc->spc_mtx);
671 spc->spc_inexec = 0;
672 pthread_mutex_unlock(&spc->spc_mtx);
673 send_handshake_resp(spc, rhdr->rsp_reqno, 0);
674 }
675
676 enum sbatype { SBA_SYSCALL, SBA_EXEC };
677
678 struct servbouncearg {
679 struct spclient *sba_spc;
680 struct rsp_hdr sba_hdr;
681 enum sbatype sba_type;
682 uint8_t *sba_data;
683
684 TAILQ_ENTRY(servbouncearg) sba_entries;
685 };
686 static pthread_mutex_t sbamtx;
687 static pthread_cond_t sbacv;
688 static int nworker, idleworker, nwork;
689 static TAILQ_HEAD(, servbouncearg) wrklist = TAILQ_HEAD_INITIALIZER(wrklist);
690
691 /*ARGSUSED*/
692 static void *
693 serv_workbouncer(void *arg)
694 {
695 struct servbouncearg *sba;
696
697 for (;;) {
698 pthread_mutex_lock(&sbamtx);
699 if (__predict_false(idleworker - nwork >= rumpsp_idleworker)) {
700 nworker--;
701 pthread_mutex_unlock(&sbamtx);
702 break;
703 }
704 idleworker++;
705 while (TAILQ_EMPTY(&wrklist)) {
706 _DIAGASSERT(nwork == 0);
707 pthread_cond_wait(&sbacv, &sbamtx);
708 }
709 idleworker--;
710
711 sba = TAILQ_FIRST(&wrklist);
712 TAILQ_REMOVE(&wrklist, sba, sba_entries);
713 nwork--;
714 pthread_mutex_unlock(&sbamtx);
715
716 if (__predict_true(sba->sba_type == SBA_SYSCALL)) {
717 serv_handlesyscall(sba->sba_spc,
718 &sba->sba_hdr, sba->sba_data);
719 } else {
720 _DIAGASSERT(sba->sba_type == SBA_EXEC);
721 serv_handleexec(sba->sba_spc, &sba->sba_hdr,
722 (char *)sba->sba_data);
723 }
724 spcrelease(sba->sba_spc);
725 free(sba->sba_data);
726 free(sba);
727 }
728
729 return NULL;
730 }
731
732 static int
733 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
734 {
735 struct spclient *spc = arg;
736 void *rdata = NULL; /* XXXuninit */
737 int rv, nlocks;
738
739 rumpuser__kunlock(0, &nlocks, NULL);
740
741 rv = copyin_req(spc, raddr, len, wantstr, &rdata);
742 if (rv)
743 goto out;
744
745 memcpy(laddr, rdata, *len);
746 free(rdata);
747
748 out:
749 rumpuser__klock(nlocks, NULL);
750 if (rv)
751 return EFAULT;
752 return 0;
753 }
754
755 int
756 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
757 {
758
759 return sp_copyin(arg, raddr, laddr, &len, 0);
760 }
761
762 int
763 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
764 {
765
766 return sp_copyin(arg, raddr, laddr, len, 1);
767 }
768
769 static int
770 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
771 {
772 struct spclient *spc = arg;
773 int nlocks, rv;
774
775 rumpuser__kunlock(0, &nlocks, NULL);
776 rv = send_copyout_req(spc, raddr, laddr, dlen);
777 rumpuser__klock(nlocks, NULL);
778
779 if (rv)
780 return EFAULT;
781 return 0;
782 }
783
784 int
785 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
786 {
787
788 return sp_copyout(arg, laddr, raddr, dlen);
789 }
790
791 int
792 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
793 {
794
795 return sp_copyout(arg, laddr, raddr, *dlen);
796 }
797
798 int
799 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
800 {
801 struct spclient *spc = arg;
802 void *resp, *rdata;
803 int nlocks, rv;
804
805 rumpuser__kunlock(0, &nlocks, NULL);
806
807 rv = anonmmap_req(spc, howmuch, &rdata);
808 if (rv) {
809 rv = EFAULT;
810 goto out;
811 }
812
813 resp = *(void **)rdata;
814 free(rdata);
815
816 if (resp == NULL) {
817 rv = ENOMEM;
818 }
819
820 *addr = resp;
821
822 out:
823 rumpuser__klock(nlocks, NULL);
824
825 if (rv)
826 return rv;
827 return 0;
828 }
829
830 int
831 rumpuser_sp_raise(void *arg, int signo)
832 {
833 struct spclient *spc = arg;
834 int rv, nlocks;
835
836 rumpuser__kunlock(0, &nlocks, NULL);
837 rv = send_raise_req(spc, signo);
838 rumpuser__klock(nlocks, NULL);
839
840 return rv;
841 }
842
843 static pthread_attr_t pattr_detached;
844 static void
845 schedulework(struct spclient *spc, enum sbatype sba_type)
846 {
847 struct servbouncearg *sba;
848 pthread_t pt;
849 uint64_t reqno;
850 int retries = 0;
851
852 reqno = spc->spc_hdr.rsp_reqno;
853 while ((sba = malloc(sizeof(*sba))) == NULL) {
854 if (nworker == 0 || retries > 10) {
855 send_error_resp(spc, reqno, EAGAIN);
856 spcfreebuf(spc);
857 return;
858 }
859 /* slim chance of more memory? */
860 usleep(10000);
861 }
862
863 sba->sba_spc = spc;
864 sba->sba_type = sba_type;
865 sba->sba_hdr = spc->spc_hdr;
866 sba->sba_data = spc->spc_buf;
867 spcresetbuf(spc);
868
869 spcref(spc);
870
871 pthread_mutex_lock(&sbamtx);
872 TAILQ_INSERT_TAIL(&wrklist, sba, sba_entries);
873 nwork++;
874 if (nwork <= idleworker) {
875 /* do we have a daemon's tool (i.e. idle threads)? */
876 pthread_cond_signal(&sbacv);
877 } else if (nworker < rumpsp_maxworker) {
878 /*
879 * Else, need to create one
880 * (if we can, otherwise just expect another
881 * worker to pick up the syscall)
882 */
883 if (pthread_create(&pt, &pattr_detached,
884 serv_workbouncer, NULL) == 0) {
885 nworker++;
886 }
887 }
888 pthread_mutex_unlock(&sbamtx);
889 }
890
891 /*
892 *
893 * Startup routines and mainloop for server.
894 *
895 */
896
897 struct spservarg {
898 int sps_sock;
899 connecthook_fn sps_connhook;
900 };
901
902 static void
903 handlereq(struct spclient *spc)
904 {
905 uint64_t reqno;
906 int error, i;
907
908 reqno = spc->spc_hdr.rsp_reqno;
909 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
910 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
911 send_error_resp(spc, reqno, EAUTH);
912 shutdown(spc->spc_fd, SHUT_RDWR);
913 spcfreebuf(spc);
914 return;
915 }
916
917 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) {
918 char *comm = (char *)spc->spc_buf;
919 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ;
920
921 /* ensure it's 0-terminated */
922 /* XXX make sure it contains sensible chars? */
923 comm[commlen] = '\0';
924
925 if ((error = lwproc_rfork(spc,
926 RUMP_RFCFDG, comm)) != 0) {
927 shutdown(spc->spc_fd, SHUT_RDWR);
928 }
929
930 spcfreebuf(spc);
931 if (error)
932 return;
933
934 spc->spc_mainlwp = lwproc_curlwp();
935
936 send_handshake_resp(spc, reqno, 0);
937 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) {
938 struct lwp *tmpmain;
939 struct prefork *pf;
940 struct handshake_fork *rfp;
941 int cancel;
942
943 if (spc->spc_off-HDRSZ != sizeof(*rfp)) {
944 send_error_resp(spc, reqno, EINVAL);
945 shutdown(spc->spc_fd, SHUT_RDWR);
946 spcfreebuf(spc);
947 return;
948 }
949
950 /*LINTED*/
951 rfp = (void *)spc->spc_buf;
952 cancel = rfp->rf_cancel;
953
954 pthread_mutex_lock(&pfmtx);
955 LIST_FOREACH(pf, &preforks, pf_entries) {
956 if (memcmp(rfp->rf_auth, pf->pf_auth,
957 sizeof(rfp->rf_auth)) == 0) {
958 LIST_REMOVE(pf, pf_entries);
959 LIST_REMOVE(pf, pf_spcentries);
960 break;
961 }
962 }
963 pthread_mutex_lock(&pfmtx);
964 spcfreebuf(spc);
965
966 if (!pf) {
967 send_error_resp(spc, reqno, ESRCH);
968 shutdown(spc->spc_fd, SHUT_RDWR);
969 return;
970 }
971
972 tmpmain = pf->pf_lwp;
973 free(pf);
974 lwproc_switch(tmpmain);
975 if (cancel) {
976 lwproc_release();
977 shutdown(spc->spc_fd, SHUT_RDWR);
978 return;
979 }
980
981 /*
982 * So, we forked already during "prefork" to save
983 * the file descriptors from a parent exit
984 * race condition. But now we need to fork
985 * a second time since the initial fork has
986 * the wrong spc pointer. (yea, optimize
987 * interfaces some day if anyone cares)
988 */
989 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) {
990 send_error_resp(spc, reqno, error);
991 shutdown(spc->spc_fd, SHUT_RDWR);
992 lwproc_release();
993 return;
994 }
995 spc->spc_mainlwp = lwproc_curlwp();
996 lwproc_switch(tmpmain);
997 lwproc_release();
998 lwproc_switch(spc->spc_mainlwp);
999
1000 send_handshake_resp(spc, reqno, 0);
1001 } else {
1002 send_error_resp(spc, reqno, EAUTH);
1003 shutdown(spc->spc_fd, SHUT_RDWR);
1004 spcfreebuf(spc);
1005 return;
1006 }
1007
1008 spc->spc_pid = lwproc_getpid();
1009
1010 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n",
1011 spc, spc->spc_pid));
1012
1013 lwproc_switch(NULL);
1014 spc->spc_state = SPCSTATE_RUNNING;
1015 return;
1016 }
1017
1018 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) {
1019 struct prefork *pf;
1020 uint32_t auth[AUTHLEN];
1021 int inexec;
1022
1023 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc));
1024 spcfreebuf(spc);
1025
1026 pthread_mutex_lock(&spc->spc_mtx);
1027 inexec = spc->spc_inexec;
1028 pthread_mutex_unlock(&spc->spc_mtx);
1029 if (inexec) {
1030 send_error_resp(spc, reqno, EBUSY);
1031 shutdown(spc->spc_fd, SHUT_RDWR);
1032 return;
1033 }
1034
1035 pf = malloc(sizeof(*pf));
1036 if (pf == NULL) {
1037 send_error_resp(spc, reqno, ENOMEM);
1038 return;
1039 }
1040
1041 /*
1042 * Use client main lwp to fork. this is never used by
1043 * worker threads (except in exec, but we checked for that
1044 * above) so we can safely use it here.
1045 */
1046 lwproc_switch(spc->spc_mainlwp);
1047 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) {
1048 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc));
1049 send_error_resp(spc, reqno, error);
1050 lwproc_switch(NULL);
1051 free(pf);
1052 return;
1053 }
1054
1055 /* Ok, we have a new process context and a new curlwp */
1056 for (i = 0; i < AUTHLEN; i++) {
1057 pf->pf_auth[i] = auth[i] = arc4random();
1058 }
1059 pf->pf_lwp = lwproc_curlwp();
1060 lwproc_switch(NULL);
1061
1062 pthread_mutex_lock(&pfmtx);
1063 LIST_INSERT_HEAD(&preforks, pf, pf_entries);
1064 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries);
1065 pthread_mutex_unlock(&pfmtx);
1066
1067 DPRINTF(("rump_sp: prefork handler success %p\n", spc));
1068
1069 send_prefork_resp(spc, reqno, auth);
1070 return;
1071 }
1072
1073 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_HANDSHAKE)) {
1074 int inexec;
1075
1076 if (spc->spc_hdr.rsp_handshake != HANDSHAKE_EXEC) {
1077 send_error_resp(spc, reqno, EINVAL);
1078 shutdown(spc->spc_fd, SHUT_RDWR);
1079 spcfreebuf(spc);
1080 return;
1081 }
1082
1083 pthread_mutex_lock(&spc->spc_mtx);
1084 inexec = spc->spc_inexec;
1085 pthread_mutex_unlock(&spc->spc_mtx);
1086 if (inexec) {
1087 send_error_resp(spc, reqno, EBUSY);
1088 shutdown(spc->spc_fd, SHUT_RDWR);
1089 spcfreebuf(spc);
1090 return;
1091 }
1092
1093 pthread_mutex_lock(&spc->spc_mtx);
1094 spc->spc_inexec = 1;
1095 pthread_mutex_unlock(&spc->spc_mtx);
1096
1097 /*
1098 * start to drain lwps. we will wait for it to finish
1099 * in another thread
1100 */
1101 lwproc_switch(spc->spc_mainlwp);
1102 lwproc_lwpexit();
1103 lwproc_switch(NULL);
1104
1105 /*
1106 * exec has to wait for lwps to drain, so finish it off
1107 * in another thread
1108 */
1109 schedulework(spc, SBA_EXEC);
1110 return;
1111 }
1112
1113 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
1114 send_error_resp(spc, reqno, EINVAL);
1115 spcfreebuf(spc);
1116 return;
1117 }
1118
1119 schedulework(spc, SBA_SYSCALL);
1120 }
1121
1122 static void *
1123 spserver(void *arg)
1124 {
1125 struct spservarg *sarg = arg;
1126 struct spclient *spc;
1127 unsigned idx;
1128 int seen;
1129 int rv;
1130 unsigned int nfds, maxidx;
1131
1132 for (idx = 0; idx < MAXCLI; idx++) {
1133 pfdlist[idx].fd = -1;
1134 pfdlist[idx].events = POLLIN;
1135
1136 spc = &spclist[idx];
1137 pthread_mutex_init(&spc->spc_mtx, NULL);
1138 pthread_cond_init(&spc->spc_cv, NULL);
1139 spc->spc_fd = -1;
1140 }
1141 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
1142 pfdlist[0].events = POLLIN;
1143 nfds = 1;
1144 maxidx = 0;
1145
1146 pthread_attr_init(&pattr_detached);
1147 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
1148 /* XXX: doesn't stacksize currently work on NetBSD */
1149 pthread_attr_setstacksize(&pattr_detached, 32*1024);
1150
1151 pthread_mutex_init(&sbamtx, NULL);
1152 pthread_cond_init(&sbacv, NULL);
1153
1154 DPRINTF(("rump_sp: server mainloop\n"));
1155
1156 for (;;) {
1157 int discoed;
1158
1159 /* g/c hangarounds (eventually) */
1160 discoed = atomic_swap_uint(&disco, 0);
1161 while (discoed--) {
1162 nfds--;
1163 idx = maxidx;
1164 while (idx) {
1165 if (pfdlist[idx].fd != -1) {
1166 maxidx = idx;
1167 break;
1168 }
1169 idx--;
1170 }
1171 DPRINTF(("rump_sp: set maxidx to [%u]\n",
1172 maxidx));
1173 }
1174
1175 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
1176 seen = 0;
1177 rv = poll(pfdlist, maxidx+1, INFTIM);
1178 assert(maxidx+1 <= MAXCLI);
1179 assert(rv != 0);
1180 if (rv == -1) {
1181 if (errno == EINTR)
1182 continue;
1183 fprintf(stderr, "rump_spserver: poll returned %d\n",
1184 errno);
1185 break;
1186 }
1187
1188 for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
1189 if ((pfdlist[idx].revents & POLLIN) == 0)
1190 continue;
1191
1192 seen++;
1193 DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
1194 idx, seen, rv));
1195 if (idx > 0) {
1196 spc = &spclist[idx];
1197 DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
1198 switch (readframe(spc)) {
1199 case 0:
1200 break;
1201 case -1:
1202 serv_handledisco(idx);
1203 break;
1204 default:
1205 switch (spc->spc_hdr.rsp_class) {
1206 case RUMPSP_RESP:
1207 kickwaiter(spc);
1208 break;
1209 case RUMPSP_REQ:
1210 handlereq(spc);
1211 break;
1212 default:
1213 send_error_resp(spc,
1214 spc->spc_hdr.rsp_reqno,
1215 ENOENT);
1216 spcfreebuf(spc);
1217 break;
1218 }
1219 break;
1220 }
1221
1222 } else {
1223 DPRINTF(("rump_sp: mainloop new connection\n"));
1224
1225 if (__predict_false(spfini)) {
1226 close(spclist[0].spc_fd);
1227 serv_shutdown();
1228 goto out;
1229 }
1230
1231 idx = serv_handleconn(pfdlist[0].fd,
1232 sarg->sps_connhook, nfds == MAXCLI);
1233 if (idx)
1234 nfds++;
1235 if (idx > maxidx)
1236 maxidx = idx;
1237 DPRINTF(("rump_sp: maxid now %d\n", maxidx));
1238 }
1239 }
1240 }
1241
1242 out:
1243 return NULL;
1244 }
1245
1246 static unsigned cleanupidx;
1247 static struct sockaddr *cleanupsa;
1248 int
1249 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
1250 const char *ostype, const char *osrelease, const char *machine)
1251 {
1252 pthread_t pt;
1253 struct spservarg *sarg;
1254 struct sockaddr *sap;
1255 char *p;
1256 unsigned idx;
1257 int error, s;
1258
1259 p = strdup(url);
1260 if (p == NULL)
1261 return ENOMEM;
1262 error = parseurl(p, &sap, &idx, 1);
1263 free(p);
1264 if (error)
1265 return error;
1266
1267 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
1268 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
1269
1270 s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
1271 if (s == -1)
1272 return errno;
1273
1274 spops = *spopsp;
1275 sarg = malloc(sizeof(*sarg));
1276 if (sarg == NULL) {
1277 close(s);
1278 return ENOMEM;
1279 }
1280
1281 sarg->sps_sock = s;
1282 sarg->sps_connhook = parsetab[idx].connhook;
1283
1284 cleanupidx = idx;
1285 cleanupsa = sap;
1286
1287 /* sloppy error recovery */
1288
1289 /*LINTED*/
1290 if (bind(s, sap, sap->sa_len) == -1) {
1291 fprintf(stderr, "rump_sp: server bind failed\n");
1292 return errno;
1293 }
1294
1295 if (listen(s, MAXCLI) == -1) {
1296 fprintf(stderr, "rump_sp: server listen failed\n");
1297 return errno;
1298 }
1299
1300 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
1301 fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
1302 return errno;
1303 }
1304 pthread_detach(pt);
1305
1306 return 0;
1307 }
1308
1309 void
1310 rumpuser_sp_fini(void *arg)
1311 {
1312 struct spclient *spc = arg;
1313 register_t retval[2] = {0, 0};
1314
1315 if (spclist[0].spc_fd) {
1316 parsetab[cleanupidx].cleanup(cleanupsa);
1317 }
1318
1319 /*
1320 * stuff response into the socket, since this process is just
1321 * about to exit
1322 */
1323 if (spc && spc->spc_syscallreq)
1324 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval);
1325
1326 if (spclist[0].spc_fd) {
1327 shutdown(spclist[0].spc_fd, SHUT_RDWR);
1328 spfini = 1;
1329 }
1330 }
1331