rumpuser_pth.c revision 1.43 1 /* $NetBSD: rumpuser_pth.c,v 1.43 2014/11/04 19:05:17 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.43 2014/11/04 19:05:17 pooka Exp $");
32 #endif /* !lint */
33
34 #include <sys/queue.h>
35
36 #if defined(HAVE_SYS_ATOMIC_H)
37 #include <sys/atomic.h>
38 #endif
39
40 #include <assert.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <pthread.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdint.h>
48 #include <unistd.h>
49
50 #include <rump/rumpuser.h>
51
52 #include "rumpuser_int.h"
53
54 int
55 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
56 int joinable, int priority, int cpuidx, void **ptcookie)
57 {
58 pthread_t ptid;
59 pthread_t *ptidp;
60 pthread_attr_t pattr;
61 int rv, i;
62
63 if ((rv = pthread_attr_init(&pattr)) != 0)
64 return rv;
65
66 if (joinable) {
67 NOFAIL(ptidp = malloc(sizeof(*ptidp)));
68 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
69 } else {
70 ptidp = &ptid;
71 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
72 }
73
74 for (i = 0; i < 10; i++) {
75 const struct timespec ts = {0, 10*1000*1000};
76
77 rv = pthread_create(ptidp, &pattr, f, arg);
78 if (rv != EAGAIN)
79 break;
80 nanosleep(&ts, NULL);
81 }
82
83 #if defined(HAVE_PTHREAD_SETNAME_3)
84 if (rv == 0 && thrname) {
85 pthread_setname_np(*ptidp, thrname, NULL);
86 }
87 #elif defined(HAVE_PTHREAD_SETNAME_2)
88 if (rv == 0 && thrname) {
89 pthread_setname_np(*ptidp, thrname);
90 }
91 #endif
92
93 if (joinable) {
94 assert(ptcookie);
95 *ptcookie = ptidp;
96 }
97
98 pthread_attr_destroy(&pattr);
99
100 ET(rv);
101 }
102
103 __dead void
104 rumpuser_thread_exit(void)
105 {
106
107 pthread_exit(NULL);
108 }
109
110 int
111 rumpuser_thread_join(void *ptcookie)
112 {
113 pthread_t *pt = ptcookie;
114 int rv;
115
116 KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
117 if (rv == 0)
118 free(pt);
119
120 ET(rv);
121 }
122
123 struct rumpuser_mtx {
124 pthread_mutex_t pthmtx;
125 struct lwp *owner;
126 int flags;
127 };
128
129 void
130 rumpuser_mutex_init(struct rumpuser_mtx **mtxp, int flags)
131 {
132 struct rumpuser_mtx *mtx;
133 pthread_mutexattr_t att;
134 size_t allocsz;
135
136 allocsz = (sizeof(*mtx)+RUMPUSER_LOCKALIGN) & ~(RUMPUSER_LOCKALIGN-1);
137 NOFAIL(mtx = aligned_alloc(RUMPUSER_LOCKALIGN, allocsz));
138
139 pthread_mutexattr_init(&att);
140 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
141 NOFAIL_ERRNO(pthread_mutex_init(&mtx->pthmtx, &att));
142 pthread_mutexattr_destroy(&att);
143
144 mtx->owner = NULL;
145 assert(flags != 0);
146 mtx->flags = flags;
147
148 *mtxp = mtx;
149 }
150
151 static void
152 mtxenter(struct rumpuser_mtx *mtx)
153 {
154
155 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
156 return;
157
158 assert(mtx->owner == NULL);
159 mtx->owner = rumpuser_curlwp();
160 }
161
162 static void
163 mtxexit(struct rumpuser_mtx *mtx)
164 {
165
166 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
167 return;
168
169 assert(mtx->owner != NULL);
170 mtx->owner = NULL;
171 }
172
173 void
174 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
175 {
176
177 if (mtx->flags & RUMPUSER_MTX_SPIN) {
178 rumpuser_mutex_enter_nowrap(mtx);
179 return;
180 }
181
182 assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
183 if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
184 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
185 mtxenter(mtx);
186 }
187
188 void
189 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
190 {
191
192 assert(mtx->flags & RUMPUSER_MTX_SPIN);
193 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
194 mtxenter(mtx);
195 }
196
197 int
198 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
199 {
200 int rv;
201
202 rv = pthread_mutex_trylock(&mtx->pthmtx);
203 if (rv == 0) {
204 mtxenter(mtx);
205 }
206
207 ET(rv);
208 }
209
210 void
211 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
212 {
213
214 mtxexit(mtx);
215 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
216 }
217
218 void
219 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
220 {
221
222 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
223 free(mtx);
224 }
225
226 void
227 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
228 {
229
230 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
231 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
232 abort();
233 }
234
235 *lp = mtx->owner;
236 }
237
238 /*
239 * rwlocks. these are mostly simple, except that NetBSD wants to
240 * support something called downgrade, which means we need to swap
241 * our exclusive lock for a shared lock. to accommodate this,
242 * we need to check *after* acquiring a lock in case someone was
243 * downgrading it. if so, we couldn't actually have it and maybe
244 * need to retry later.
245 */
246
247 struct rumpuser_rw {
248 pthread_rwlock_t pthrw;
249 #if !defined(__APPLE__) && !defined(__ANDROID__)
250 char pad[64 - sizeof(pthread_rwlock_t)];
251 pthread_spinlock_t spin;
252 #endif
253 unsigned int readers;
254 struct lwp *writer;
255 int downgrade; /* someone is downgrading (hopefully lock holder ;) */
256 };
257
258 static int
259 rw_amwriter(struct rumpuser_rw *rw)
260 {
261
262 return rw->writer == rumpuser_curlwp() && rw->readers == (unsigned)-1;
263 }
264
265 static int
266 rw_nreaders(struct rumpuser_rw *rw)
267 {
268 unsigned nreaders = rw->readers;
269
270 return nreaders != (unsigned)-1 ? nreaders : 0;
271 }
272
273 static int
274 rw_setwriter(struct rumpuser_rw *rw, int retry)
275 {
276
277 /*
278 * Don't need the spinlock here, we already have an
279 * exclusive lock and "downgrade" is stable until complete.
280 */
281 if (rw->downgrade) {
282 pthread_rwlock_unlock(&rw->pthrw);
283 if (retry) {
284 struct timespec ts;
285
286 /* portable yield, essentially */
287 ts.tv_sec = 0;
288 ts.tv_nsec = 1;
289 KLOCK_WRAP(nanosleep(&ts, NULL));
290 }
291 return EBUSY;
292 }
293 assert(rw->readers == 0);
294 rw->writer = rumpuser_curlwp();
295 rw->readers = (unsigned)-1;
296 return 0;
297 }
298
299 static void
300 rw_clearwriter(struct rumpuser_rw *rw)
301 {
302
303 assert(rw_amwriter(rw));
304 rw->readers = 0;
305 rw->writer = NULL;
306 }
307
308 static inline void
309 rw_readup(struct rumpuser_rw *rw)
310 {
311
312 #if defined(__NetBSD__) || defined(__APPLE__) || defined(__ANDROID__)
313 atomic_inc_uint(&rw->readers);
314 #else
315 pthread_spin_lock(&rw->spin);
316 ++rw->readers;
317 pthread_spin_unlock(&rw->spin);
318 #endif
319 }
320
321 static inline void
322 rw_readdown(struct rumpuser_rw *rw)
323 {
324
325 #if defined(__NetBSD__) || defined(__APPLE__) || defined(__ANDROID__)
326 atomic_dec_uint(&rw->readers);
327 #else
328 pthread_spin_lock(&rw->spin);
329 assert(rw->readers > 0);
330 --rw->readers;
331 pthread_spin_unlock(&rw->spin);
332 #endif
333 }
334
335 void
336 rumpuser_rw_init(struct rumpuser_rw **rwp)
337 {
338 struct rumpuser_rw *rw;
339 size_t allocsz;
340
341 allocsz = (sizeof(*rw)+RUMPUSER_LOCKALIGN) & ~(RUMPUSER_LOCKALIGN-1);
342
343 NOFAIL(rw = aligned_alloc(RUMPUSER_LOCKALIGN, allocsz));
344 NOFAIL_ERRNO(pthread_rwlock_init(&rw->pthrw, NULL));
345 #if !defined(__APPLE__) && !defined(__ANDROID__)
346 NOFAIL_ERRNO(pthread_spin_init(&rw->spin, PTHREAD_PROCESS_PRIVATE));
347 #endif
348 rw->readers = 0;
349 rw->writer = NULL;
350 rw->downgrade = 0;
351
352 *rwp = rw;
353 }
354
355 void
356 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw)
357 {
358 enum rumprwlock lk = enum_rumprwlock;
359
360 switch (lk) {
361 case RUMPUSER_RW_WRITER:
362 do {
363 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
364 KLOCK_WRAP(NOFAIL_ERRNO(
365 pthread_rwlock_wrlock(&rw->pthrw)));
366 } while (rw_setwriter(rw, 1) != 0);
367 break;
368 case RUMPUSER_RW_READER:
369 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
370 KLOCK_WRAP(NOFAIL_ERRNO(
371 pthread_rwlock_rdlock(&rw->pthrw)));
372 rw_readup(rw);
373 break;
374 }
375 }
376
377 int
378 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw)
379 {
380 enum rumprwlock lk = enum_rumprwlock;
381 int rv;
382
383 switch (lk) {
384 case RUMPUSER_RW_WRITER:
385 rv = pthread_rwlock_trywrlock(&rw->pthrw);
386 if (rv == 0)
387 rv = rw_setwriter(rw, 0);
388 break;
389 case RUMPUSER_RW_READER:
390 rv = pthread_rwlock_tryrdlock(&rw->pthrw);
391 if (rv == 0)
392 rw_readup(rw);
393 break;
394 default:
395 rv = EINVAL;
396 break;
397 }
398
399 ET(rv);
400 }
401
402 int
403 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
404 {
405
406 /*
407 * Not supported by pthreads. Since the caller needs to
408 * back off anyway to avoid deadlock, always failing
409 * is correct.
410 */
411 ET(EBUSY);
412 }
413
414 /*
415 * convert from exclusive to shared lock without allowing anyone to
416 * obtain an exclusive lock in between. actually, might allow
417 * someone to obtain the lock, we just don't allow that thread to
418 * return from the hypercall with it.
419 */
420 void
421 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
422 {
423
424 assert(rw->downgrade == 0);
425 rw->downgrade = 1;
426 rumpuser_rw_exit(rw);
427 /*
428 * though the competition can't get out of the hypervisor, it
429 * might have rescheduled itself after we released the lock.
430 * so need a wrap here.
431 */
432 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw)));
433 rw->downgrade = 0;
434 rw_readup(rw);
435 }
436
437 void
438 rumpuser_rw_exit(struct rumpuser_rw *rw)
439 {
440
441 if (rw_nreaders(rw))
442 rw_readdown(rw);
443 else
444 rw_clearwriter(rw);
445 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
446 }
447
448 void
449 rumpuser_rw_destroy(struct rumpuser_rw *rw)
450 {
451
452 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
453 #if !defined(__APPLE__) && ! defined(__ANDROID__)
454 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
455 #endif
456 free(rw);
457 }
458
459 void
460 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv)
461 {
462 enum rumprwlock lk = enum_rumprwlock;
463
464 switch (lk) {
465 case RUMPUSER_RW_WRITER:
466 *rv = rw_amwriter(rw);
467 break;
468 case RUMPUSER_RW_READER:
469 *rv = rw_nreaders(rw);
470 break;
471 }
472 }
473
474 /*
475 * condvar
476 */
477
478 struct rumpuser_cv {
479 pthread_cond_t pthcv;
480 int nwaiters;
481 };
482
483 void
484 rumpuser_cv_init(struct rumpuser_cv **cv)
485 {
486
487 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
488 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
489 (*cv)->nwaiters = 0;
490 }
491
492 void
493 rumpuser_cv_destroy(struct rumpuser_cv *cv)
494 {
495
496 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
497 free(cv);
498 }
499
500 static void
501 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
502 {
503
504 rumpkern_unsched(nlocks, mtx);
505 mtxexit(mtx);
506 }
507
508 static void
509 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
510 {
511
512 /*
513 * If the cv interlock is a spin mutex, we must first release
514 * the mutex that was reacquired by pthread_cond_wait(),
515 * acquire the CPU context and only then relock the mutex.
516 * This is to preserve resource allocation order so that
517 * we don't deadlock. Non-spinning mutexes don't have this
518 * problem since they don't use a hold-and-wait approach
519 * to acquiring the mutex wrt the rump kernel CPU context.
520 *
521 * The more optimal solution would be to rework rumpkern_sched()
522 * so that it's possible to tell the scheduler
523 * "if you need to block, drop this lock first", but I'm not
524 * going poking there without some numbers on how often this
525 * path is taken for spin mutexes.
526 */
527 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
528 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
529 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
530 rumpkern_sched(nlocks, mtx);
531 rumpuser_mutex_enter_nowrap(mtx);
532 } else {
533 mtxenter(mtx);
534 rumpkern_sched(nlocks, mtx);
535 }
536 }
537
538 void
539 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
540 {
541 int nlocks;
542
543 cv->nwaiters++;
544 cv_unschedule(mtx, &nlocks);
545 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
546 cv_reschedule(mtx, nlocks);
547 cv->nwaiters--;
548 }
549
550 void
551 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
552 {
553
554 cv->nwaiters++;
555 mtxexit(mtx);
556 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
557 mtxenter(mtx);
558 cv->nwaiters--;
559 }
560
561 int
562 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
563 int64_t sec, int64_t nsec)
564 {
565 struct timespec ts;
566 int rv, nlocks;
567
568 /*
569 * Get clock already here, just in case we will be put to sleep
570 * after releasing the kernel context.
571 *
572 * The condition variables should use CLOCK_MONOTONIC, but since
573 * that's not available everywhere, leave it for another day.
574 */
575 clock_gettime(CLOCK_REALTIME, &ts);
576
577 cv->nwaiters++;
578 cv_unschedule(mtx, &nlocks);
579
580 ts.tv_sec += sec;
581 ts.tv_nsec += nsec;
582 if (ts.tv_nsec >= 1000*1000*1000) {
583 ts.tv_sec++;
584 ts.tv_nsec -= 1000*1000*1000;
585 }
586 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
587
588 cv_reschedule(mtx, nlocks);
589 cv->nwaiters--;
590
591 ET(rv);
592 }
593
594 void
595 rumpuser_cv_signal(struct rumpuser_cv *cv)
596 {
597
598 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
599 }
600
601 void
602 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
603 {
604
605 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
606 }
607
608 void
609 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
610 {
611
612 *nwaiters = cv->nwaiters;
613 }
614
615 /*
616 * curlwp
617 */
618
619 static pthread_key_t curlwpkey;
620
621 /*
622 * the if0'd curlwp implementation is not used by this hypervisor,
623 * but serves as test code to check that the intended usage works.
624 */
625 #if 0
626 struct rumpuser_lwp {
627 struct lwp *l;
628 LIST_ENTRY(rumpuser_lwp) l_entries;
629 };
630 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
631 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
632
633 void
634 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
635 {
636 struct rumpuser_lwp *rl, *rliter;
637
638 switch (op) {
639 case RUMPUSER_LWP_CREATE:
640 rl = malloc(sizeof(*rl));
641 rl->l = l;
642 pthread_mutex_lock(&lwplock);
643 LIST_FOREACH(rliter, &lwps, l_entries) {
644 if (rliter->l == l) {
645 fprintf(stderr, "LWP_CREATE: %p exists\n", l);
646 abort();
647 }
648 }
649 LIST_INSERT_HEAD(&lwps, rl, l_entries);
650 pthread_mutex_unlock(&lwplock);
651 break;
652 case RUMPUSER_LWP_DESTROY:
653 pthread_mutex_lock(&lwplock);
654 LIST_FOREACH(rl, &lwps, l_entries) {
655 if (rl->l == l)
656 break;
657 }
658 if (!rl) {
659 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
660 abort();
661 }
662 LIST_REMOVE(rl, l_entries);
663 pthread_mutex_unlock(&lwplock);
664 free(rl);
665 break;
666 case RUMPUSER_LWP_SET:
667 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL);
668
669 pthread_mutex_lock(&lwplock);
670 LIST_FOREACH(rl, &lwps, l_entries) {
671 if (rl->l == l)
672 break;
673 }
674 if (!rl) {
675 fprintf(stderr,
676 "LWP_SET: %p does not exist\n", l);
677 abort();
678 }
679 pthread_mutex_unlock(&lwplock);
680
681 pthread_setspecific(curlwpkey, rl);
682 break;
683 case RUMPUSER_LWP_CLEAR:
684 assert(((struct rumpuser_lwp *)
685 pthread_getspecific(curlwpkey))->l == l);
686 pthread_setspecific(curlwpkey, NULL);
687 break;
688 }
689 }
690
691 struct lwp *
692 rumpuser_curlwp(void)
693 {
694 struct rumpuser_lwp *rl;
695
696 rl = pthread_getspecific(curlwpkey);
697 return rl ? rl->l : NULL;
698 }
699
700 #else
701
702 void
703 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l)
704 {
705 enum rumplwpop op = enum_rumplwpop;
706
707 switch (op) {
708 case RUMPUSER_LWP_CREATE:
709 break;
710 case RUMPUSER_LWP_DESTROY:
711 break;
712 case RUMPUSER_LWP_SET:
713 assert(pthread_getspecific(curlwpkey) == NULL);
714 pthread_setspecific(curlwpkey, l);
715 break;
716 case RUMPUSER_LWP_CLEAR:
717 assert(pthread_getspecific(curlwpkey) == l);
718 pthread_setspecific(curlwpkey, NULL);
719 break;
720 }
721 }
722
723 struct lwp *
724 rumpuser_curlwp(void)
725 {
726
727 return pthread_getspecific(curlwpkey);
728 }
729 #endif
730
731
732 void
733 rumpuser__thrinit(void)
734 {
735 pthread_key_create(&curlwpkey, NULL);
736 }
737