rumpuser_pth.c revision 1.33 1 /* $NetBSD: rumpuser_pth.c,v 1.33 2013/09/26 00:41:51 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.33 2013/09/26 00:41:51 rmind Exp $");
32 #endif /* !lint */
33
34 #include <sys/queue.h>
35 #if defined(__NetBSD__)
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #endif
39
40 #include <assert.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <pthread.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdint.h>
48 #include <unistd.h>
49
50 #include <rump/rumpuser.h>
51
52 #include "rumpuser_int.h"
53
54 #if defined(__NetBSD__)
55 static void *
56 aligned_alloc(size_t size)
57 {
58 void *ptr;
59
60 size = roundup2(size, COHERENCY_UNIT);
61 return posix_memalign(&ptr, COHERENCY_UNIT, size) ? NULL : ptr;
62 }
63 #else
64 #define aligned_alloc(sz) malloc(sz)
65 #endif
66
67 int
68 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
69 int joinable, int priority, int cpuidx, void **ptcookie)
70 {
71 pthread_t ptid;
72 pthread_t *ptidp;
73 pthread_attr_t pattr;
74 int rv, i;
75
76 if ((rv = pthread_attr_init(&pattr)) != 0)
77 return rv;
78
79 if (joinable) {
80 NOFAIL(ptidp = malloc(sizeof(*ptidp)));
81 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
82 } else {
83 ptidp = &ptid;
84 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
85 }
86
87 for (i = 0; i < 10; i++) {
88 const struct timespec ts = {0, 10*1000*1000};
89
90 rv = pthread_create(ptidp, &pattr, f, arg);
91 if (rv != EAGAIN)
92 break;
93 nanosleep(&ts, NULL);
94 }
95
96 #if defined(__NetBSD__)
97 if (rv == 0 && thrname)
98 pthread_setname_np(ptid, thrname, NULL);
99 #elif defined(__linux__)
100 /*
101 * The pthread_setname_np() call varies from one Linux distro to
102 * another. Comment out the call pending autoconf support.
103 */
104 #if 0
105 if (rv == 0 && thrname)
106 pthread_setname_np(ptid, thrname);
107 #endif
108 #endif
109
110 if (joinable) {
111 assert(ptcookie);
112 *ptcookie = ptidp;
113 }
114
115 pthread_attr_destroy(&pattr);
116
117 ET(rv);
118 }
119
120 __dead void
121 rumpuser_thread_exit(void)
122 {
123
124 pthread_exit(NULL);
125 }
126
127 int
128 rumpuser_thread_join(void *ptcookie)
129 {
130 pthread_t *pt = ptcookie;
131 int rv;
132
133 KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
134 if (rv == 0)
135 free(pt);
136
137 ET(rv);
138 }
139
140 struct rumpuser_mtx {
141 pthread_mutex_t pthmtx;
142 struct lwp *owner;
143 int flags;
144 };
145
146 void
147 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags)
148 {
149 pthread_mutexattr_t att;
150
151 NOFAIL(*mtx = aligned_alloc(sizeof(struct rumpuser_mtx)));
152
153 pthread_mutexattr_init(&att);
154 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
155 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att));
156 pthread_mutexattr_destroy(&att);
157
158 (*mtx)->owner = NULL;
159 assert(flags != 0);
160 (*mtx)->flags = flags;
161 }
162
163 static void
164 mtxenter(struct rumpuser_mtx *mtx)
165 {
166
167 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
168 return;
169
170 assert(mtx->owner == NULL);
171 mtx->owner = rumpuser_curlwp();
172 }
173
174 static void
175 mtxexit(struct rumpuser_mtx *mtx)
176 {
177
178 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
179 return;
180
181 assert(mtx->owner != NULL);
182 mtx->owner = NULL;
183 }
184
185 void
186 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
187 {
188
189 if (mtx->flags & RUMPUSER_MTX_SPIN) {
190 rumpuser_mutex_enter_nowrap(mtx);
191 return;
192 }
193
194 assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
195 if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
196 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
197 mtxenter(mtx);
198 }
199
200 void
201 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
202 {
203
204 assert(mtx->flags & RUMPUSER_MTX_SPIN);
205 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
206 mtxenter(mtx);
207 }
208
209 int
210 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
211 {
212 int rv;
213
214 rv = pthread_mutex_trylock(&mtx->pthmtx);
215 if (rv == 0) {
216 mtxenter(mtx);
217 }
218
219 ET(rv);
220 }
221
222 void
223 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
224 {
225
226 mtxexit(mtx);
227 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
228 }
229
230 void
231 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
232 {
233
234 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
235 free(mtx);
236 }
237
238 void
239 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
240 {
241
242 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
243 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
244 abort();
245 }
246
247 *lp = mtx->owner;
248 }
249
250 /*
251 * rwlocks. these are mostly simple, except that NetBSD wants to
252 * support something called downgrade, which means we need to swap
253 * our exclusive lock for a shared lock. to accommodate this,
254 * we need to check *after* acquiring a lock in case someone was
255 * downgrading it. if so, we couldn't actually have it and maybe
256 * need to retry later.
257 */
258
259 struct rumpuser_rw {
260 pthread_rwlock_t pthrw;
261 char pad[64 - sizeof(pthread_rwlock_t)];
262 pthread_spinlock_t spin;
263 unsigned int readers;
264 struct lwp *writer;
265 int downgrade; /* someone is downgrading (hopefully lock holder ;) */
266 };
267
268 static int
269 rw_amwriter(struct rumpuser_rw *rw)
270 {
271
272 return rw->writer == rumpuser_curlwp() && rw->readers == (unsigned)-1;
273 }
274
275 static int
276 rw_nreaders(struct rumpuser_rw *rw)
277 {
278 unsigned nreaders = rw->readers;
279
280 return nreaders != (unsigned)-1 ? nreaders : 0;
281 }
282
283 static int
284 rw_setwriter(struct rumpuser_rw *rw, int retry)
285 {
286
287 /*
288 * Don't need the spinlock here, we already have an
289 * exclusive lock and "downgrade" is stable until complete.
290 */
291 if (rw->downgrade) {
292 pthread_rwlock_unlock(&rw->pthrw);
293 if (retry) {
294 struct timespec ts;
295
296 /* portable yield, essentially */
297 ts.tv_sec = 0;
298 ts.tv_nsec = 1;
299 KLOCK_WRAP(nanosleep(&ts, NULL));
300 }
301 return EBUSY;
302 }
303 assert(rw->readers == 0);
304 rw->writer = rumpuser_curlwp();
305 rw->readers = (unsigned)-1;
306 return 0;
307 }
308
309 static void
310 rw_clearwriter(struct rumpuser_rw *rw)
311 {
312
313 assert(rw_amwriter(rw));
314 rw->readers = 0;
315 rw->writer = NULL;
316 }
317
318 static inline void
319 rw_readup(struct rumpuser_rw *rw)
320 {
321
322 #if defined(__NetBSD__)
323 atomic_inc_uint(&rw->readers);
324 #else
325 pthread_spin_lock(&rw->spin);
326 ++rw->readers;
327 pthread_spin_unlock(&rw->spin);
328 #endif
329 }
330
331 static inline void
332 rw_readdown(struct rumpuser_rw *rw)
333 {
334
335 #if defined(__NetBSD__)
336 atomic_dec_uint(&rw->readers);
337 #else
338 pthread_spin_lock(&rw->spin);
339 assert(rw->readers > 0);
340 --rw->readers;
341 pthread_spin_unlock(&rw->spin);
342 #endif
343 }
344
345 void
346 rumpuser_rw_init(struct rumpuser_rw **rw)
347 {
348
349 NOFAIL(*rw = aligned_alloc(sizeof(struct rumpuser_rw)));
350 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL));
351 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE));
352 (*rw)->readers = 0;
353 (*rw)->writer = NULL;
354 (*rw)->downgrade = 0;
355 }
356
357 void
358 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw)
359 {
360 enum rumprwlock lk = enum_rumprwlock;
361
362 switch (lk) {
363 case RUMPUSER_RW_WRITER:
364 do {
365 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
366 KLOCK_WRAP(NOFAIL_ERRNO(
367 pthread_rwlock_wrlock(&rw->pthrw)));
368 } while (rw_setwriter(rw, 1) != 0);
369 break;
370 case RUMPUSER_RW_READER:
371 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
372 KLOCK_WRAP(NOFAIL_ERRNO(
373 pthread_rwlock_rdlock(&rw->pthrw)));
374 rw_readup(rw);
375 break;
376 }
377 }
378
379 int
380 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw)
381 {
382 enum rumprwlock lk = enum_rumprwlock;
383 int rv;
384
385 switch (lk) {
386 case RUMPUSER_RW_WRITER:
387 rv = pthread_rwlock_trywrlock(&rw->pthrw);
388 if (rv == 0)
389 rv = rw_setwriter(rw, 0);
390 break;
391 case RUMPUSER_RW_READER:
392 rv = pthread_rwlock_tryrdlock(&rw->pthrw);
393 if (rv == 0)
394 rw_readup(rw);
395 break;
396 default:
397 rv = EINVAL;
398 break;
399 }
400
401 ET(rv);
402 }
403
404 int
405 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
406 {
407
408 /*
409 * Not supported by pthreads. Since the caller needs to
410 * back off anyway to avoid deadlock, always failing
411 * is correct.
412 */
413 ET(EBUSY);
414 }
415
416 /*
417 * convert from exclusive to shared lock without allowing anyone to
418 * obtain an exclusive lock in between. actually, might allow
419 * someone to obtain the lock, we just don't allow that thread to
420 * return from the hypercall with it.
421 */
422 void
423 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
424 {
425
426 assert(rw->downgrade == 0);
427 rw->downgrade = 1;
428 rumpuser_rw_exit(rw);
429 /*
430 * though the competition can't get out of the hypervisor, it
431 * might have rescheduled itself after we released the lock.
432 * so need a wrap here.
433 */
434 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw)));
435 rw->downgrade = 0;
436 rw_readup(rw);
437 }
438
439 void
440 rumpuser_rw_exit(struct rumpuser_rw *rw)
441 {
442
443 if (rw_nreaders(rw))
444 rw_readdown(rw);
445 else
446 rw_clearwriter(rw);
447 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
448 }
449
450 void
451 rumpuser_rw_destroy(struct rumpuser_rw *rw)
452 {
453
454 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
455 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
456 free(rw);
457 }
458
459 void
460 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv)
461 {
462 enum rumprwlock lk = enum_rumprwlock;
463
464 switch (lk) {
465 case RUMPUSER_RW_WRITER:
466 *rv = rw_amwriter(rw);
467 break;
468 case RUMPUSER_RW_READER:
469 *rv = rw_nreaders(rw);
470 break;
471 }
472 }
473
474 /*
475 * condvar
476 */
477
478 struct rumpuser_cv {
479 pthread_cond_t pthcv;
480 int nwaiters;
481 };
482
483 void
484 rumpuser_cv_init(struct rumpuser_cv **cv)
485 {
486
487 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
488 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
489 (*cv)->nwaiters = 0;
490 }
491
492 void
493 rumpuser_cv_destroy(struct rumpuser_cv *cv)
494 {
495
496 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
497 free(cv);
498 }
499
500 static void
501 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
502 {
503
504 rumpkern_unsched(nlocks, mtx);
505 mtxexit(mtx);
506 }
507
508 static void
509 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
510 {
511
512 /*
513 * If the cv interlock is a spin mutex, we must first release
514 * the mutex that was reacquired by pthread_cond_wait(),
515 * acquire the CPU context and only then relock the mutex.
516 * This is to preserve resource allocation order so that
517 * we don't deadlock. Non-spinning mutexes don't have this
518 * problem since they don't use a hold-and-wait approach
519 * to acquiring the mutex wrt the rump kernel CPU context.
520 *
521 * The more optimal solution would be to rework rumpkern_sched()
522 * so that it's possible to tell the scheduler
523 * "if you need to block, drop this lock first", but I'm not
524 * going poking there without some numbers on how often this
525 * path is taken for spin mutexes.
526 */
527 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
528 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
529 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
530 rumpkern_sched(nlocks, mtx);
531 rumpuser_mutex_enter_nowrap(mtx);
532 } else {
533 mtxenter(mtx);
534 rumpkern_sched(nlocks, mtx);
535 }
536 }
537
538 void
539 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
540 {
541 int nlocks;
542
543 cv->nwaiters++;
544 cv_unschedule(mtx, &nlocks);
545 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
546 cv_reschedule(mtx, nlocks);
547 cv->nwaiters--;
548 }
549
550 void
551 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
552 {
553
554 cv->nwaiters++;
555 mtxexit(mtx);
556 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
557 mtxenter(mtx);
558 cv->nwaiters--;
559 }
560
561 int
562 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
563 int64_t sec, int64_t nsec)
564 {
565 struct timespec ts;
566 int rv, nlocks;
567
568 /*
569 * Get clock already here, just in case we will be put to sleep
570 * after releasing the kernel context.
571 *
572 * The condition variables should use CLOCK_MONOTONIC, but since
573 * that's not available everywhere, leave it for another day.
574 */
575 clock_gettime(CLOCK_REALTIME, &ts);
576
577 cv->nwaiters++;
578 cv_unschedule(mtx, &nlocks);
579
580 ts.tv_sec += sec;
581 ts.tv_nsec += nsec;
582 if (ts.tv_nsec >= 1000*1000*1000) {
583 ts.tv_sec++;
584 ts.tv_nsec -= 1000*1000*1000;
585 }
586 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
587
588 cv_reschedule(mtx, nlocks);
589 cv->nwaiters--;
590
591 ET(rv);
592 }
593
594 void
595 rumpuser_cv_signal(struct rumpuser_cv *cv)
596 {
597
598 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
599 }
600
601 void
602 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
603 {
604
605 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
606 }
607
608 void
609 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
610 {
611
612 *nwaiters = cv->nwaiters;
613 }
614
615 /*
616 * curlwp
617 */
618
619 static pthread_key_t curlwpkey;
620
621 /*
622 * the if0'd curlwp implementation is not used by this hypervisor,
623 * but serves as test code to check that the intended usage works.
624 */
625 #if 0
626 struct rumpuser_lwp {
627 struct lwp *l;
628 LIST_ENTRY(rumpuser_lwp) l_entries;
629 };
630 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
631 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
632
633 void
634 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
635 {
636 struct rumpuser_lwp *rl, *rliter;
637
638 switch (op) {
639 case RUMPUSER_LWP_CREATE:
640 rl = malloc(sizeof(*rl));
641 rl->l = l;
642 pthread_mutex_lock(&lwplock);
643 LIST_FOREACH(rliter, &lwps, l_entries) {
644 if (rliter->l == l) {
645 fprintf(stderr, "LWP_CREATE: %p exists\n", l);
646 abort();
647 }
648 }
649 LIST_INSERT_HEAD(&lwps, rl, l_entries);
650 pthread_mutex_unlock(&lwplock);
651 break;
652 case RUMPUSER_LWP_DESTROY:
653 pthread_mutex_lock(&lwplock);
654 LIST_FOREACH(rl, &lwps, l_entries) {
655 if (rl->l == l)
656 break;
657 }
658 if (!rl) {
659 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
660 abort();
661 }
662 LIST_REMOVE(rl, l_entries);
663 pthread_mutex_unlock(&lwplock);
664 free(rl);
665 break;
666 case RUMPUSER_LWP_SET:
667 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL);
668
669 pthread_mutex_lock(&lwplock);
670 LIST_FOREACH(rl, &lwps, l_entries) {
671 if (rl->l == l)
672 break;
673 }
674 if (!rl) {
675 fprintf(stderr,
676 "LWP_SET: %p does not exist\n", l);
677 abort();
678 }
679 pthread_mutex_unlock(&lwplock);
680
681 pthread_setspecific(curlwpkey, rl);
682 break;
683 case RUMPUSER_LWP_CLEAR:
684 assert(((struct rumpuser_lwp *)
685 pthread_getspecific(curlwpkey))->l == l);
686 pthread_setspecific(curlwpkey, NULL);
687 break;
688 }
689 }
690
691 struct lwp *
692 rumpuser_curlwp(void)
693 {
694 struct rumpuser_lwp *rl;
695
696 rl = pthread_getspecific(curlwpkey);
697 return rl ? rl->l : NULL;
698 }
699
700 #else
701
702 void
703 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l)
704 {
705 enum rumplwpop op = enum_rumplwpop;
706
707 switch (op) {
708 case RUMPUSER_LWP_CREATE:
709 break;
710 case RUMPUSER_LWP_DESTROY:
711 break;
712 case RUMPUSER_LWP_SET:
713 assert(pthread_getspecific(curlwpkey) == NULL);
714 pthread_setspecific(curlwpkey, l);
715 break;
716 case RUMPUSER_LWP_CLEAR:
717 assert(pthread_getspecific(curlwpkey) == l);
718 pthread_setspecific(curlwpkey, NULL);
719 break;
720 }
721 }
722
723 struct lwp *
724 rumpuser_curlwp(void)
725 {
726
727 return pthread_getspecific(curlwpkey);
728 }
729 #endif
730
731
732 void
733 rumpuser__thrinit(void)
734 {
735 pthread_key_create(&curlwpkey, NULL);
736 }
737