rumpuser_pth.c revision 1.36 1 /* $NetBSD: rumpuser_pth.c,v 1.36 2014/03/10 22:37:51 justin Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.36 2014/03/10 22:37:51 justin Exp $");
32 #endif /* !lint */
33
34 #include <sys/queue.h>
35 #if defined(__NetBSD__)
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #endif
39
40 #include <assert.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <pthread.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdint.h>
48 #include <unistd.h>
49
50 #include <rump/rumpuser.h>
51
52 #include "rumpuser_int.h"
53
54 #if defined(__NetBSD__)
55 static void *
56 aligned_alloc(size_t size)
57 {
58 void *ptr;
59
60 size = roundup2(size, COHERENCY_UNIT);
61 return posix_memalign(&ptr, COHERENCY_UNIT, size) ? NULL : ptr;
62 }
63 #else
64 #define aligned_alloc(sz) malloc(sz)
65 #endif
66
67 int
68 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
69 int joinable, int priority, int cpuidx, void **ptcookie)
70 {
71 pthread_t ptid;
72 pthread_t *ptidp;
73 pthread_attr_t pattr;
74 int rv, i;
75
76 if ((rv = pthread_attr_init(&pattr)) != 0)
77 return rv;
78
79 if (joinable) {
80 NOFAIL(ptidp = malloc(sizeof(*ptidp)));
81 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
82 } else {
83 ptidp = &ptid;
84 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
85 }
86
87 for (i = 0; i < 10; i++) {
88 const struct timespec ts = {0, 10*1000*1000};
89
90 rv = pthread_create(ptidp, &pattr, f, arg);
91 if (rv != EAGAIN)
92 break;
93 nanosleep(&ts, NULL);
94 }
95
96 #if defined(__NetBSD__)
97 if (rv == 0 && thrname)
98 pthread_setname_np(ptid, thrname, NULL);
99 #elif defined(__linux__)
100 /*
101 * The pthread_setname_np() call varies from one Linux distro to
102 * another. Comment out the call pending autoconf support.
103 */
104 #if 0
105 if (rv == 0 && thrname)
106 pthread_setname_np(ptid, thrname);
107 #endif
108 #endif
109
110 if (joinable) {
111 assert(ptcookie);
112 *ptcookie = ptidp;
113 }
114
115 pthread_attr_destroy(&pattr);
116
117 ET(rv);
118 }
119
120 __dead void
121 rumpuser_thread_exit(void)
122 {
123
124 pthread_exit(NULL);
125 }
126
127 int
128 rumpuser_thread_join(void *ptcookie)
129 {
130 pthread_t *pt = ptcookie;
131 int rv;
132
133 KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
134 if (rv == 0)
135 free(pt);
136
137 ET(rv);
138 }
139
140 struct rumpuser_mtx {
141 pthread_mutex_t pthmtx;
142 struct lwp *owner;
143 int flags;
144 };
145
146 void
147 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags)
148 {
149 pthread_mutexattr_t att;
150
151 NOFAIL(*mtx = aligned_alloc(sizeof(struct rumpuser_mtx)));
152
153 pthread_mutexattr_init(&att);
154 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
155 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att));
156 pthread_mutexattr_destroy(&att);
157
158 (*mtx)->owner = NULL;
159 assert(flags != 0);
160 (*mtx)->flags = flags;
161 }
162
163 static void
164 mtxenter(struct rumpuser_mtx *mtx)
165 {
166
167 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
168 return;
169
170 assert(mtx->owner == NULL);
171 mtx->owner = rumpuser_curlwp();
172 }
173
174 static void
175 mtxexit(struct rumpuser_mtx *mtx)
176 {
177
178 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
179 return;
180
181 assert(mtx->owner != NULL);
182 mtx->owner = NULL;
183 }
184
185 void
186 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
187 {
188
189 if (mtx->flags & RUMPUSER_MTX_SPIN) {
190 rumpuser_mutex_enter_nowrap(mtx);
191 return;
192 }
193
194 assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
195 if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
196 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
197 mtxenter(mtx);
198 }
199
200 void
201 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
202 {
203
204 assert(mtx->flags & RUMPUSER_MTX_SPIN);
205 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
206 mtxenter(mtx);
207 }
208
209 int
210 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
211 {
212 int rv;
213
214 rv = pthread_mutex_trylock(&mtx->pthmtx);
215 if (rv == 0) {
216 mtxenter(mtx);
217 }
218
219 ET(rv);
220 }
221
222 void
223 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
224 {
225
226 mtxexit(mtx);
227 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
228 }
229
230 void
231 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
232 {
233
234 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
235 free(mtx);
236 }
237
238 void
239 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
240 {
241
242 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
243 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
244 abort();
245 }
246
247 *lp = mtx->owner;
248 }
249
250 /*
251 * rwlocks. these are mostly simple, except that NetBSD wants to
252 * support something called downgrade, which means we need to swap
253 * our exclusive lock for a shared lock. to accommodate this,
254 * we need to check *after* acquiring a lock in case someone was
255 * downgrading it. if so, we couldn't actually have it and maybe
256 * need to retry later.
257 */
258
259 struct rumpuser_rw {
260 pthread_rwlock_t pthrw;
261 #if !defined(__APPLE__)
262 char pad[64 - sizeof(pthread_rwlock_t)];
263 pthread_spinlock_t spin;
264 #endif
265 unsigned int readers;
266 struct lwp *writer;
267 int downgrade; /* someone is downgrading (hopefully lock holder ;) */
268 };
269
270 static int
271 rw_amwriter(struct rumpuser_rw *rw)
272 {
273
274 return rw->writer == rumpuser_curlwp() && rw->readers == (unsigned)-1;
275 }
276
277 static int
278 rw_nreaders(struct rumpuser_rw *rw)
279 {
280 unsigned nreaders = rw->readers;
281
282 return nreaders != (unsigned)-1 ? nreaders : 0;
283 }
284
285 static int
286 rw_setwriter(struct rumpuser_rw *rw, int retry)
287 {
288
289 /*
290 * Don't need the spinlock here, we already have an
291 * exclusive lock and "downgrade" is stable until complete.
292 */
293 if (rw->downgrade) {
294 pthread_rwlock_unlock(&rw->pthrw);
295 if (retry) {
296 struct timespec ts;
297
298 /* portable yield, essentially */
299 ts.tv_sec = 0;
300 ts.tv_nsec = 1;
301 KLOCK_WRAP(nanosleep(&ts, NULL));
302 }
303 return EBUSY;
304 }
305 assert(rw->readers == 0);
306 rw->writer = rumpuser_curlwp();
307 rw->readers = (unsigned)-1;
308 return 0;
309 }
310
311 static void
312 rw_clearwriter(struct rumpuser_rw *rw)
313 {
314
315 assert(rw_amwriter(rw));
316 rw->readers = 0;
317 rw->writer = NULL;
318 }
319
320 static inline void
321 rw_readup(struct rumpuser_rw *rw)
322 {
323
324 #if defined(__NetBSD__) || defined(__APPLE__)
325 atomic_inc_uint(&rw->readers);
326 #else
327 pthread_spin_lock(&rw->spin);
328 ++rw->readers;
329 pthread_spin_unlock(&rw->spin);
330 #endif
331 }
332
333 static inline void
334 rw_readdown(struct rumpuser_rw *rw)
335 {
336
337 #if defined(__NetBSD__) || defined(__APPLE__)
338 atomic_dec_uint(&rw->readers);
339 #else
340 pthread_spin_lock(&rw->spin);
341 assert(rw->readers > 0);
342 --rw->readers;
343 pthread_spin_unlock(&rw->spin);
344 #endif
345 }
346
347 void
348 rumpuser_rw_init(struct rumpuser_rw **rw)
349 {
350
351 NOFAIL(*rw = aligned_alloc(sizeof(struct rumpuser_rw)));
352 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL));
353 #if !defined(__APPLE__)
354 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE));
355 #endif
356 (*rw)->readers = 0;
357 (*rw)->writer = NULL;
358 (*rw)->downgrade = 0;
359 }
360
361 void
362 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw)
363 {
364 enum rumprwlock lk = enum_rumprwlock;
365
366 switch (lk) {
367 case RUMPUSER_RW_WRITER:
368 do {
369 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
370 KLOCK_WRAP(NOFAIL_ERRNO(
371 pthread_rwlock_wrlock(&rw->pthrw)));
372 } while (rw_setwriter(rw, 1) != 0);
373 break;
374 case RUMPUSER_RW_READER:
375 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
376 KLOCK_WRAP(NOFAIL_ERRNO(
377 pthread_rwlock_rdlock(&rw->pthrw)));
378 rw_readup(rw);
379 break;
380 }
381 }
382
383 int
384 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw)
385 {
386 enum rumprwlock lk = enum_rumprwlock;
387 int rv;
388
389 switch (lk) {
390 case RUMPUSER_RW_WRITER:
391 rv = pthread_rwlock_trywrlock(&rw->pthrw);
392 if (rv == 0)
393 rv = rw_setwriter(rw, 0);
394 break;
395 case RUMPUSER_RW_READER:
396 rv = pthread_rwlock_tryrdlock(&rw->pthrw);
397 if (rv == 0)
398 rw_readup(rw);
399 break;
400 default:
401 rv = EINVAL;
402 break;
403 }
404
405 ET(rv);
406 }
407
408 int
409 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
410 {
411
412 /*
413 * Not supported by pthreads. Since the caller needs to
414 * back off anyway to avoid deadlock, always failing
415 * is correct.
416 */
417 ET(EBUSY);
418 }
419
420 /*
421 * convert from exclusive to shared lock without allowing anyone to
422 * obtain an exclusive lock in between. actually, might allow
423 * someone to obtain the lock, we just don't allow that thread to
424 * return from the hypercall with it.
425 */
426 void
427 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
428 {
429
430 assert(rw->downgrade == 0);
431 rw->downgrade = 1;
432 rumpuser_rw_exit(rw);
433 /*
434 * though the competition can't get out of the hypervisor, it
435 * might have rescheduled itself after we released the lock.
436 * so need a wrap here.
437 */
438 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw)));
439 rw->downgrade = 0;
440 rw_readup(rw);
441 }
442
443 void
444 rumpuser_rw_exit(struct rumpuser_rw *rw)
445 {
446
447 if (rw_nreaders(rw))
448 rw_readdown(rw);
449 else
450 rw_clearwriter(rw);
451 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
452 }
453
454 void
455 rumpuser_rw_destroy(struct rumpuser_rw *rw)
456 {
457
458 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
459 #if !defined(__APPLE__)
460 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
461 #endif
462 free(rw);
463 }
464
465 void
466 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv)
467 {
468 enum rumprwlock lk = enum_rumprwlock;
469
470 switch (lk) {
471 case RUMPUSER_RW_WRITER:
472 *rv = rw_amwriter(rw);
473 break;
474 case RUMPUSER_RW_READER:
475 *rv = rw_nreaders(rw);
476 break;
477 }
478 }
479
480 /*
481 * condvar
482 */
483
484 struct rumpuser_cv {
485 pthread_cond_t pthcv;
486 int nwaiters;
487 };
488
489 void
490 rumpuser_cv_init(struct rumpuser_cv **cv)
491 {
492
493 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
494 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
495 (*cv)->nwaiters = 0;
496 }
497
498 void
499 rumpuser_cv_destroy(struct rumpuser_cv *cv)
500 {
501
502 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
503 free(cv);
504 }
505
506 static void
507 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
508 {
509
510 rumpkern_unsched(nlocks, mtx);
511 mtxexit(mtx);
512 }
513
514 static void
515 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
516 {
517
518 /*
519 * If the cv interlock is a spin mutex, we must first release
520 * the mutex that was reacquired by pthread_cond_wait(),
521 * acquire the CPU context and only then relock the mutex.
522 * This is to preserve resource allocation order so that
523 * we don't deadlock. Non-spinning mutexes don't have this
524 * problem since they don't use a hold-and-wait approach
525 * to acquiring the mutex wrt the rump kernel CPU context.
526 *
527 * The more optimal solution would be to rework rumpkern_sched()
528 * so that it's possible to tell the scheduler
529 * "if you need to block, drop this lock first", but I'm not
530 * going poking there without some numbers on how often this
531 * path is taken for spin mutexes.
532 */
533 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
534 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
535 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
536 rumpkern_sched(nlocks, mtx);
537 rumpuser_mutex_enter_nowrap(mtx);
538 } else {
539 mtxenter(mtx);
540 rumpkern_sched(nlocks, mtx);
541 }
542 }
543
544 void
545 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
546 {
547 int nlocks;
548
549 cv->nwaiters++;
550 cv_unschedule(mtx, &nlocks);
551 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
552 cv_reschedule(mtx, nlocks);
553 cv->nwaiters--;
554 }
555
556 void
557 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
558 {
559
560 cv->nwaiters++;
561 mtxexit(mtx);
562 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
563 mtxenter(mtx);
564 cv->nwaiters--;
565 }
566
567 int
568 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
569 int64_t sec, int64_t nsec)
570 {
571 struct timespec ts;
572 int rv, nlocks;
573
574 /*
575 * Get clock already here, just in case we will be put to sleep
576 * after releasing the kernel context.
577 *
578 * The condition variables should use CLOCK_MONOTONIC, but since
579 * that's not available everywhere, leave it for another day.
580 */
581 clock_gettime(CLOCK_REALTIME, &ts);
582
583 cv->nwaiters++;
584 cv_unschedule(mtx, &nlocks);
585
586 ts.tv_sec += sec;
587 ts.tv_nsec += nsec;
588 if (ts.tv_nsec >= 1000*1000*1000) {
589 ts.tv_sec++;
590 ts.tv_nsec -= 1000*1000*1000;
591 }
592 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
593
594 cv_reschedule(mtx, nlocks);
595 cv->nwaiters--;
596
597 ET(rv);
598 }
599
600 void
601 rumpuser_cv_signal(struct rumpuser_cv *cv)
602 {
603
604 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
605 }
606
607 void
608 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
609 {
610
611 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
612 }
613
614 void
615 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
616 {
617
618 *nwaiters = cv->nwaiters;
619 }
620
621 /*
622 * curlwp
623 */
624
625 static pthread_key_t curlwpkey;
626
627 /*
628 * the if0'd curlwp implementation is not used by this hypervisor,
629 * but serves as test code to check that the intended usage works.
630 */
631 #if 0
632 struct rumpuser_lwp {
633 struct lwp *l;
634 LIST_ENTRY(rumpuser_lwp) l_entries;
635 };
636 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
637 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
638
639 void
640 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
641 {
642 struct rumpuser_lwp *rl, *rliter;
643
644 switch (op) {
645 case RUMPUSER_LWP_CREATE:
646 rl = malloc(sizeof(*rl));
647 rl->l = l;
648 pthread_mutex_lock(&lwplock);
649 LIST_FOREACH(rliter, &lwps, l_entries) {
650 if (rliter->l == l) {
651 fprintf(stderr, "LWP_CREATE: %p exists\n", l);
652 abort();
653 }
654 }
655 LIST_INSERT_HEAD(&lwps, rl, l_entries);
656 pthread_mutex_unlock(&lwplock);
657 break;
658 case RUMPUSER_LWP_DESTROY:
659 pthread_mutex_lock(&lwplock);
660 LIST_FOREACH(rl, &lwps, l_entries) {
661 if (rl->l == l)
662 break;
663 }
664 if (!rl) {
665 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
666 abort();
667 }
668 LIST_REMOVE(rl, l_entries);
669 pthread_mutex_unlock(&lwplock);
670 free(rl);
671 break;
672 case RUMPUSER_LWP_SET:
673 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL);
674
675 pthread_mutex_lock(&lwplock);
676 LIST_FOREACH(rl, &lwps, l_entries) {
677 if (rl->l == l)
678 break;
679 }
680 if (!rl) {
681 fprintf(stderr,
682 "LWP_SET: %p does not exist\n", l);
683 abort();
684 }
685 pthread_mutex_unlock(&lwplock);
686
687 pthread_setspecific(curlwpkey, rl);
688 break;
689 case RUMPUSER_LWP_CLEAR:
690 assert(((struct rumpuser_lwp *)
691 pthread_getspecific(curlwpkey))->l == l);
692 pthread_setspecific(curlwpkey, NULL);
693 break;
694 }
695 }
696
697 struct lwp *
698 rumpuser_curlwp(void)
699 {
700 struct rumpuser_lwp *rl;
701
702 rl = pthread_getspecific(curlwpkey);
703 return rl ? rl->l : NULL;
704 }
705
706 #else
707
708 void
709 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l)
710 {
711 enum rumplwpop op = enum_rumplwpop;
712
713 switch (op) {
714 case RUMPUSER_LWP_CREATE:
715 break;
716 case RUMPUSER_LWP_DESTROY:
717 break;
718 case RUMPUSER_LWP_SET:
719 assert(pthread_getspecific(curlwpkey) == NULL);
720 pthread_setspecific(curlwpkey, l);
721 break;
722 case RUMPUSER_LWP_CLEAR:
723 assert(pthread_getspecific(curlwpkey) == l);
724 pthread_setspecific(curlwpkey, NULL);
725 break;
726 }
727 }
728
729 struct lwp *
730 rumpuser_curlwp(void)
731 {
732
733 return pthread_getspecific(curlwpkey);
734 }
735 #endif
736
737
738 void
739 rumpuser__thrinit(void)
740 {
741 pthread_key_create(&curlwpkey, NULL);
742 }
743