rumpuser_pth.c revision 1.27 1 /* $NetBSD: rumpuser_pth.c,v 1.27 2013/05/03 00:23:49 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.27 2013/05/03 00:23:49 pooka Exp $");
32 #endif /* !lint */
33
34 #include <sys/queue.h>
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <pthread.h>
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdint.h>
44 #include <unistd.h>
45
46 #include <rump/rumpuser.h>
47
48 #include "rumpuser_int.h"
49
50 int
51 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
52 int joinable, int priority, int cpuidx, void **ptcookie)
53 {
54 pthread_t ptid;
55 pthread_t *ptidp;
56 pthread_attr_t pattr;
57 int rv;
58
59 if ((rv = pthread_attr_init(&pattr)) != 0)
60 return rv;
61
62 if (joinable) {
63 NOFAIL(ptidp = malloc(sizeof(*ptidp)));
64 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
65 } else {
66 ptidp = &ptid;
67 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
68 }
69
70 rv = pthread_create(ptidp, &pattr, f, arg);
71 #if defined(__NetBSD__)
72 if (rv == 0 && thrname)
73 pthread_setname_np(ptid, thrname, NULL);
74 #elif defined(__linux__)
75 /*
76 * The pthread_setname_np() call varies from one Linux distro to
77 * another. Comment out the call pending autoconf support.
78 */
79 #if 0
80 if (rv == 0 && thrname)
81 pthread_setname_np(ptid, thrname);
82 #endif
83 #endif
84
85 if (joinable) {
86 assert(ptcookie);
87 *ptcookie = ptidp;
88 }
89
90 pthread_attr_destroy(&pattr);
91
92 ET(rv);
93 }
94
95 __dead void
96 rumpuser_thread_exit(void)
97 {
98
99 pthread_exit(NULL);
100 }
101
102 int
103 rumpuser_thread_join(void *ptcookie)
104 {
105 pthread_t *pt = ptcookie;
106 int rv;
107
108 KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
109 if (rv == 0)
110 free(pt);
111
112 ET(rv);
113 }
114
115 struct rumpuser_mtx {
116 pthread_mutex_t pthmtx;
117 struct lwp *owner;
118 int flags;
119 };
120
121 void
122 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags)
123 {
124 pthread_mutexattr_t att;
125
126 NOFAIL(*mtx = malloc(sizeof(struct rumpuser_mtx)));
127
128 pthread_mutexattr_init(&att);
129 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
130 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att));
131 pthread_mutexattr_destroy(&att);
132
133 (*mtx)->owner = NULL;
134 assert(flags != 0);
135 (*mtx)->flags = flags;
136 }
137
138 static void
139 mtxenter(struct rumpuser_mtx *mtx)
140 {
141
142 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
143 return;
144
145 assert(mtx->owner == NULL);
146 mtx->owner = rumpuser_curlwp();
147 }
148
149 static void
150 mtxexit(struct rumpuser_mtx *mtx)
151 {
152
153 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
154 return;
155
156 assert(mtx->owner != NULL);
157 mtx->owner = NULL;
158 }
159
160 void
161 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
162 {
163
164 if (mtx->flags & RUMPUSER_MTX_SPIN) {
165 rumpuser_mutex_enter_nowrap(mtx);
166 return;
167 }
168
169 assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
170 if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
171 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
172 mtxenter(mtx);
173 }
174
175 void
176 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
177 {
178
179 assert(mtx->flags & RUMPUSER_MTX_SPIN);
180 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
181 mtxenter(mtx);
182 }
183
184 int
185 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
186 {
187 int rv;
188
189 rv = pthread_mutex_trylock(&mtx->pthmtx);
190 if (rv == 0) {
191 mtxenter(mtx);
192 }
193
194 ET(rv);
195 }
196
197 void
198 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
199 {
200
201 mtxexit(mtx);
202 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
203 }
204
205 void
206 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
207 {
208
209 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
210 free(mtx);
211 }
212
213 void
214 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
215 {
216
217 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
218 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
219 abort();
220 }
221
222 *lp = mtx->owner;
223 }
224
225 /*
226 * rwlocks. these are mostly simple, except that NetBSD wants to
227 * support something called downgrade, which means we need to swap
228 * our exclusive lock for a shared lock. to accommodate this,
229 * we need to check *after* acquiring a lock in case someone was
230 * downgrading it. if so, we couldn't actually have it and maybe
231 * need to retry later.
232 */
233
234 struct rumpuser_rw {
235 pthread_rwlock_t pthrw;
236 pthread_spinlock_t spin;
237 int readers;
238 struct lwp *writer;
239 int downgrade; /* someone is downgrading (hopefully lock holder ;) */
240 };
241
242 static int
243 rw_amwriter(struct rumpuser_rw *rw)
244 {
245
246 return rw->writer == rumpuser_curlwp() && rw->readers == -1;
247 }
248
249 static int
250 rw_nreaders(struct rumpuser_rw *rw)
251 {
252
253 return rw->readers > 0 ? rw->readers : 0;
254 }
255
256 static int
257 rw_setwriter(struct rumpuser_rw *rw, int retry)
258 {
259
260 /*
261 * Don't need the spinlock here, we already have an
262 * exclusive lock and "downgrade" is stable until complete.
263 */
264 if (rw->downgrade) {
265 pthread_rwlock_unlock(&rw->pthrw);
266 if (retry) {
267 struct timespec ts;
268
269 /* portable yield, essentially */
270 ts.tv_sec = 0;
271 ts.tv_nsec = 1;
272 KLOCK_WRAP(nanosleep(&ts, NULL));
273 }
274 return EBUSY;
275 }
276 assert(rw->readers == 0);
277 rw->writer = rumpuser_curlwp();
278 rw->readers = -1;
279 return 0;
280 }
281
282 static void
283 rw_clearwriter(struct rumpuser_rw *rw)
284 {
285
286 assert(rw_amwriter(rw));
287 rw->readers = 0;
288 rw->writer = NULL;
289 }
290
291 static void
292 rw_readup(struct rumpuser_rw *rw)
293 {
294
295 pthread_spin_lock(&rw->spin);
296 assert(rw->readers >= 0);
297 ++rw->readers;
298 pthread_spin_unlock(&rw->spin);
299 }
300
301 static void
302 rw_readdown(struct rumpuser_rw *rw)
303 {
304
305 pthread_spin_lock(&rw->spin);
306 assert(rw->readers > 0);
307 --rw->readers;
308 pthread_spin_unlock(&rw->spin);
309 }
310
311 void
312 rumpuser_rw_init(struct rumpuser_rw **rw)
313 {
314
315 NOFAIL(*rw = malloc(sizeof(struct rumpuser_rw)));
316 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL));
317 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE));
318 (*rw)->readers = 0;
319 (*rw)->writer = NULL;
320 }
321
322 void
323 rumpuser_rw_enter(struct rumpuser_rw *rw, const enum rumprwlock lk)
324 {
325
326 switch (lk) {
327 case RUMPUSER_RW_WRITER:
328 do {
329 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
330 KLOCK_WRAP(NOFAIL_ERRNO(
331 pthread_rwlock_wrlock(&rw->pthrw)));
332 } while (rw_setwriter(rw, 1) != 0);
333 break;
334 case RUMPUSER_RW_READER:
335 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
336 KLOCK_WRAP(NOFAIL_ERRNO(
337 pthread_rwlock_rdlock(&rw->pthrw)));
338 rw_readup(rw);
339 break;
340 }
341 }
342
343 int
344 rumpuser_rw_tryenter(struct rumpuser_rw *rw, const enum rumprwlock lk)
345 {
346 int rv;
347
348 switch (lk) {
349 case RUMPUSER_RW_WRITER:
350 rv = pthread_rwlock_trywrlock(&rw->pthrw);
351 if (rv == 0)
352 rv = rw_setwriter(rw, 0);
353 break;
354 case RUMPUSER_RW_READER:
355 rv = pthread_rwlock_tryrdlock(&rw->pthrw);
356 if (rv == 0)
357 rw_readup(rw);
358 break;
359 default:
360 rv = EINVAL;
361 break;
362 }
363
364 ET(rv);
365 }
366
367 int
368 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
369 {
370
371 /*
372 * Not supported by pthreads. Since the caller needs to
373 * back off anyway to avoid deadlock, always failing
374 * is correct.
375 */
376 ET(EBUSY);
377 }
378
379 /*
380 * convert from exclusive to shared lock without allowing anyone to
381 * obtain an exclusive lock in between. actually, might allow
382 * someone to obtain the lock, we just don't allow that thread to
383 * return from the hypercall with it.
384 */
385 void
386 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
387 {
388
389 assert(rw->downgrade == 0);
390 rw->downgrade = 1;
391 rumpuser_rw_exit(rw);
392 /*
393 * though the competition can't get out of the hypervisor, it
394 * might have rescheduled itself after we released the lock.
395 * so need a wrap here.
396 */
397 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw)));
398 rw->downgrade = 0;
399 rw_readup(rw);
400 }
401
402 void
403 rumpuser_rw_exit(struct rumpuser_rw *rw)
404 {
405
406 if (rw_nreaders(rw))
407 rw_readdown(rw);
408 else
409 rw_clearwriter(rw);
410 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
411 }
412
413 void
414 rumpuser_rw_destroy(struct rumpuser_rw *rw)
415 {
416
417 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
418 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
419 free(rw);
420 }
421
422 void
423 rumpuser_rw_held(struct rumpuser_rw *rw, const enum rumprwlock lk, int *rv)
424 {
425
426 switch (lk) {
427 case RUMPUSER_RW_WRITER:
428 *rv = rw_amwriter(rw);
429 break;
430 case RUMPUSER_RW_READER:
431 *rv = rw_nreaders(rw);
432 break;
433 }
434 }
435
436 /*
437 * condvar
438 */
439
440 struct rumpuser_cv {
441 pthread_cond_t pthcv;
442 int nwaiters;
443 };
444
445 void
446 rumpuser_cv_init(struct rumpuser_cv **cv)
447 {
448
449 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
450 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
451 (*cv)->nwaiters = 0;
452 }
453
454 void
455 rumpuser_cv_destroy(struct rumpuser_cv *cv)
456 {
457
458 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
459 free(cv);
460 }
461
462 static void
463 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
464 {
465
466 rumpkern_unsched(nlocks, mtx);
467 mtxexit(mtx);
468 }
469
470 static void
471 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
472 {
473
474 /*
475 * If the cv interlock is a spin mutex, we must first release
476 * the mutex that was reacquired by pthread_cond_wait(),
477 * acquire the CPU context and only then relock the mutex.
478 * This is to preserve resource allocation order so that
479 * we don't deadlock. Non-spinning mutexes don't have this
480 * problem since they don't use a hold-and-wait approach
481 * to acquiring the mutex wrt the rump kernel CPU context.
482 *
483 * The more optimal solution would be to rework rumpkern_sched()
484 * so that it's possible to tell the scheduler
485 * "if you need to block, drop this lock first", but I'm not
486 * going poking there without some numbers on how often this
487 * path is taken for spin mutexes.
488 */
489 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
490 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
491 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
492 rumpkern_sched(nlocks, mtx);
493 rumpuser_mutex_enter_nowrap(mtx);
494 } else {
495 mtxenter(mtx);
496 rumpkern_sched(nlocks, mtx);
497 }
498 }
499
500 void
501 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
502 {
503 int nlocks;
504
505 cv->nwaiters++;
506 cv_unschedule(mtx, &nlocks);
507 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
508 cv_reschedule(mtx, nlocks);
509 cv->nwaiters--;
510 }
511
512 void
513 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
514 {
515
516 cv->nwaiters++;
517 mtxexit(mtx);
518 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
519 mtxenter(mtx);
520 cv->nwaiters--;
521 }
522
523 int
524 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
525 int64_t sec, int64_t nsec)
526 {
527 struct timespec ts;
528 int rv, nlocks;
529
530 /*
531 * Get clock already here, just in case we will be put to sleep
532 * after releasing the kernel context.
533 *
534 * The condition variables should use CLOCK_MONOTONIC, but since
535 * that's not available everywhere, leave it for another day.
536 */
537 clock_gettime(CLOCK_REALTIME, &ts);
538
539 cv->nwaiters++;
540 cv_unschedule(mtx, &nlocks);
541
542 ts.tv_sec += sec;
543 ts.tv_nsec += nsec;
544 if (ts.tv_nsec >= 1000*1000*1000) {
545 ts.tv_sec++;
546 ts.tv_nsec -= 1000*1000*1000;
547 }
548 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
549
550 cv_reschedule(mtx, nlocks);
551 cv->nwaiters--;
552
553 ET(rv);
554 }
555
556 void
557 rumpuser_cv_signal(struct rumpuser_cv *cv)
558 {
559
560 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
561 }
562
563 void
564 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
565 {
566
567 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
568 }
569
570 void
571 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
572 {
573
574 *nwaiters = cv->nwaiters;
575 }
576
577 /*
578 * curlwp
579 */
580
581 static pthread_key_t curlwpkey;
582
583 /*
584 * the if0'd curlwp implementation is not used by this hypervisor,
585 * but serves as test code to check that the intended usage works.
586 */
587 #if 0
588 struct rumpuser_lwp {
589 struct lwp *l;
590 LIST_ENTRY(rumpuser_lwp) l_entries;
591 };
592 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
593 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
594
595 void
596 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
597 {
598 struct rumpuser_lwp *rl, *rliter;
599
600 switch (op) {
601 case RUMPUSER_LWP_CREATE:
602 rl = malloc(sizeof(*rl));
603 rl->l = l;
604 pthread_mutex_lock(&lwplock);
605 LIST_FOREACH(rliter, &lwps, l_entries) {
606 if (rliter->l == l) {
607 fprintf(stderr, "LWP_CREATE: %p exists\n", l);
608 abort();
609 }
610 }
611 LIST_INSERT_HEAD(&lwps, rl, l_entries);
612 pthread_mutex_unlock(&lwplock);
613 break;
614 case RUMPUSER_LWP_DESTROY:
615 pthread_mutex_lock(&lwplock);
616 LIST_FOREACH(rl, &lwps, l_entries) {
617 if (rl->l == l)
618 break;
619 }
620 if (!rl) {
621 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
622 abort();
623 }
624 LIST_REMOVE(rl, l_entries);
625 pthread_mutex_unlock(&lwplock);
626 free(rl);
627 break;
628 case RUMPUSER_LWP_SET:
629 assert(pthread_getspecific(curlwpkey) == NULL || l == NULL);
630
631 if (l) {
632 pthread_mutex_lock(&lwplock);
633 LIST_FOREACH(rl, &lwps, l_entries) {
634 if (rl->l == l)
635 break;
636 }
637 if (!rl) {
638 fprintf(stderr,
639 "LWP_SET: %p does not exist\n", l);
640 abort();
641 }
642 pthread_mutex_unlock(&lwplock);
643 } else {
644 rl = NULL;
645 }
646
647 pthread_setspecific(curlwpkey, rl);
648 break;
649 }
650 }
651
652 struct lwp *
653 rumpuser_curlwp(void)
654 {
655 struct rumpuser_lwp *rl;
656
657 rl = pthread_getspecific(curlwpkey);
658 return rl ? rl->l : NULL;
659 }
660
661 #else
662
663 void
664 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
665 {
666
667 switch (op) {
668 case RUMPUSER_LWP_CREATE:
669 break;
670 case RUMPUSER_LWP_DESTROY:
671 break;
672 case RUMPUSER_LWP_SET:
673 assert(pthread_getspecific(curlwpkey) == NULL || l == NULL);
674 pthread_setspecific(curlwpkey, l);
675 break;
676 }
677 }
678
679 struct lwp *
680 rumpuser_curlwp(void)
681 {
682
683 return pthread_getspecific(curlwpkey);
684 }
685 #endif
686
687
688 void
689 rumpuser__thrinit(void)
690 {
691 pthread_key_create(&curlwpkey, NULL);
692 }
693