rumpuser_pth.c revision 1.32 1 /* $NetBSD: rumpuser_pth.c,v 1.32 2013/09/24 23:45:16 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser_pth.c,v 1.32 2013/09/24 23:45:16 rmind Exp $");
32 #endif /* !lint */
33
34 #include <sys/queue.h>
35 #if defined(__NetBSD__)
36 #include <sys/atomic.h>
37 #endif
38
39 #include <assert.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <pthread.h>
43 #include <stdlib.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdint.h>
47 #include <unistd.h>
48
49 #include <rump/rumpuser.h>
50
51 #include "rumpuser_int.h"
52
53 int
54 rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
55 int joinable, int priority, int cpuidx, void **ptcookie)
56 {
57 pthread_t ptid;
58 pthread_t *ptidp;
59 pthread_attr_t pattr;
60 int rv, i;
61
62 if ((rv = pthread_attr_init(&pattr)) != 0)
63 return rv;
64
65 if (joinable) {
66 NOFAIL(ptidp = malloc(sizeof(*ptidp)));
67 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
68 } else {
69 ptidp = &ptid;
70 pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
71 }
72
73 for (i = 0; i < 10; i++) {
74 const struct timespec ts = {0, 10*1000*1000};
75
76 rv = pthread_create(ptidp, &pattr, f, arg);
77 if (rv != EAGAIN)
78 break;
79 nanosleep(&ts, NULL);
80 }
81
82 #if defined(__NetBSD__)
83 if (rv == 0 && thrname)
84 pthread_setname_np(ptid, thrname, NULL);
85 #elif defined(__linux__)
86 /*
87 * The pthread_setname_np() call varies from one Linux distro to
88 * another. Comment out the call pending autoconf support.
89 */
90 #if 0
91 if (rv == 0 && thrname)
92 pthread_setname_np(ptid, thrname);
93 #endif
94 #endif
95
96 if (joinable) {
97 assert(ptcookie);
98 *ptcookie = ptidp;
99 }
100
101 pthread_attr_destroy(&pattr);
102
103 ET(rv);
104 }
105
106 __dead void
107 rumpuser_thread_exit(void)
108 {
109
110 pthread_exit(NULL);
111 }
112
113 int
114 rumpuser_thread_join(void *ptcookie)
115 {
116 pthread_t *pt = ptcookie;
117 int rv;
118
119 KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
120 if (rv == 0)
121 free(pt);
122
123 ET(rv);
124 }
125
126 struct rumpuser_mtx {
127 pthread_mutex_t pthmtx;
128 struct lwp *owner;
129 int flags;
130 };
131
132 void
133 rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags)
134 {
135 pthread_mutexattr_t att;
136
137 NOFAIL(*mtx = malloc(sizeof(struct rumpuser_mtx)));
138
139 pthread_mutexattr_init(&att);
140 pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
141 NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att));
142 pthread_mutexattr_destroy(&att);
143
144 (*mtx)->owner = NULL;
145 assert(flags != 0);
146 (*mtx)->flags = flags;
147 }
148
149 static void
150 mtxenter(struct rumpuser_mtx *mtx)
151 {
152
153 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
154 return;
155
156 assert(mtx->owner == NULL);
157 mtx->owner = rumpuser_curlwp();
158 }
159
160 static void
161 mtxexit(struct rumpuser_mtx *mtx)
162 {
163
164 if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
165 return;
166
167 assert(mtx->owner != NULL);
168 mtx->owner = NULL;
169 }
170
171 void
172 rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
173 {
174
175 if (mtx->flags & RUMPUSER_MTX_SPIN) {
176 rumpuser_mutex_enter_nowrap(mtx);
177 return;
178 }
179
180 assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
181 if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
182 KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
183 mtxenter(mtx);
184 }
185
186 void
187 rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
188 {
189
190 assert(mtx->flags & RUMPUSER_MTX_SPIN);
191 NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
192 mtxenter(mtx);
193 }
194
195 int
196 rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
197 {
198 int rv;
199
200 rv = pthread_mutex_trylock(&mtx->pthmtx);
201 if (rv == 0) {
202 mtxenter(mtx);
203 }
204
205 ET(rv);
206 }
207
208 void
209 rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
210 {
211
212 mtxexit(mtx);
213 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
214 }
215
216 void
217 rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
218 {
219
220 NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
221 free(mtx);
222 }
223
224 void
225 rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
226 {
227
228 if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
229 printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
230 abort();
231 }
232
233 *lp = mtx->owner;
234 }
235
236 /*
237 * rwlocks. these are mostly simple, except that NetBSD wants to
238 * support something called downgrade, which means we need to swap
239 * our exclusive lock for a shared lock. to accommodate this,
240 * we need to check *after* acquiring a lock in case someone was
241 * downgrading it. if so, we couldn't actually have it and maybe
242 * need to retry later.
243 */
244
245 struct rumpuser_rw {
246 pthread_rwlock_t pthrw;
247 pthread_spinlock_t spin;
248 unsigned int readers;
249 struct lwp *writer;
250 int downgrade; /* someone is downgrading (hopefully lock holder ;) */
251 };
252
253 static int
254 rw_amwriter(struct rumpuser_rw *rw)
255 {
256
257 return rw->writer == rumpuser_curlwp() && rw->readers == (unsigned)-1;
258 }
259
260 static int
261 rw_nreaders(struct rumpuser_rw *rw)
262 {
263 unsigned nreaders = rw->readers;
264
265 return nreaders != (unsigned)-1 ? nreaders : 0;
266 }
267
268 static int
269 rw_setwriter(struct rumpuser_rw *rw, int retry)
270 {
271
272 /*
273 * Don't need the spinlock here, we already have an
274 * exclusive lock and "downgrade" is stable until complete.
275 */
276 if (rw->downgrade) {
277 pthread_rwlock_unlock(&rw->pthrw);
278 if (retry) {
279 struct timespec ts;
280
281 /* portable yield, essentially */
282 ts.tv_sec = 0;
283 ts.tv_nsec = 1;
284 KLOCK_WRAP(nanosleep(&ts, NULL));
285 }
286 return EBUSY;
287 }
288 assert(rw->readers == 0);
289 rw->writer = rumpuser_curlwp();
290 rw->readers = (unsigned)-1;
291 return 0;
292 }
293
294 static void
295 rw_clearwriter(struct rumpuser_rw *rw)
296 {
297
298 assert(rw_amwriter(rw));
299 rw->readers = 0;
300 rw->writer = NULL;
301 }
302
303 static void
304 rw_readup(struct rumpuser_rw *rw)
305 {
306
307 #if defined(__NetBSD__)
308 atomic_inc_uint(&rw->readers);
309 #else
310 pthread_spin_lock(&rw->spin);
311 ++rw->readers;
312 pthread_spin_unlock(&rw->spin);
313 #endif
314 }
315
316 static void
317 rw_readdown(struct rumpuser_rw *rw)
318 {
319
320 #if defined(__NetBSD__)
321 atomic_dec_uint(&rw->readers);
322 #else
323 pthread_spin_lock(&rw->spin);
324 assert(rw->readers > 0);
325 --rw->readers;
326 pthread_spin_unlock(&rw->spin);
327 #endif
328 }
329
330 void
331 rumpuser_rw_init(struct rumpuser_rw **rw)
332 {
333
334 NOFAIL(*rw = malloc(sizeof(struct rumpuser_rw)));
335 NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL));
336 NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE));
337 (*rw)->readers = 0;
338 (*rw)->writer = NULL;
339 (*rw)->downgrade = 0;
340 }
341
342 void
343 rumpuser_rw_enter(int enum_rumprwlock, struct rumpuser_rw *rw)
344 {
345 enum rumprwlock lk = enum_rumprwlock;
346
347 switch (lk) {
348 case RUMPUSER_RW_WRITER:
349 do {
350 if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
351 KLOCK_WRAP(NOFAIL_ERRNO(
352 pthread_rwlock_wrlock(&rw->pthrw)));
353 } while (rw_setwriter(rw, 1) != 0);
354 break;
355 case RUMPUSER_RW_READER:
356 if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
357 KLOCK_WRAP(NOFAIL_ERRNO(
358 pthread_rwlock_rdlock(&rw->pthrw)));
359 rw_readup(rw);
360 break;
361 }
362 }
363
364 int
365 rumpuser_rw_tryenter(int enum_rumprwlock, struct rumpuser_rw *rw)
366 {
367 enum rumprwlock lk = enum_rumprwlock;
368 int rv;
369
370 switch (lk) {
371 case RUMPUSER_RW_WRITER:
372 rv = pthread_rwlock_trywrlock(&rw->pthrw);
373 if (rv == 0)
374 rv = rw_setwriter(rw, 0);
375 break;
376 case RUMPUSER_RW_READER:
377 rv = pthread_rwlock_tryrdlock(&rw->pthrw);
378 if (rv == 0)
379 rw_readup(rw);
380 break;
381 default:
382 rv = EINVAL;
383 break;
384 }
385
386 ET(rv);
387 }
388
389 int
390 rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
391 {
392
393 /*
394 * Not supported by pthreads. Since the caller needs to
395 * back off anyway to avoid deadlock, always failing
396 * is correct.
397 */
398 ET(EBUSY);
399 }
400
401 /*
402 * convert from exclusive to shared lock without allowing anyone to
403 * obtain an exclusive lock in between. actually, might allow
404 * someone to obtain the lock, we just don't allow that thread to
405 * return from the hypercall with it.
406 */
407 void
408 rumpuser_rw_downgrade(struct rumpuser_rw *rw)
409 {
410
411 assert(rw->downgrade == 0);
412 rw->downgrade = 1;
413 rumpuser_rw_exit(rw);
414 /*
415 * though the competition can't get out of the hypervisor, it
416 * might have rescheduled itself after we released the lock.
417 * so need a wrap here.
418 */
419 KLOCK_WRAP(NOFAIL_ERRNO(pthread_rwlock_rdlock(&rw->pthrw)));
420 rw->downgrade = 0;
421 rw_readup(rw);
422 }
423
424 void
425 rumpuser_rw_exit(struct rumpuser_rw *rw)
426 {
427
428 if (rw_nreaders(rw))
429 rw_readdown(rw);
430 else
431 rw_clearwriter(rw);
432 NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
433 }
434
435 void
436 rumpuser_rw_destroy(struct rumpuser_rw *rw)
437 {
438
439 NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
440 NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
441 free(rw);
442 }
443
444 void
445 rumpuser_rw_held(int enum_rumprwlock, struct rumpuser_rw *rw, int *rv)
446 {
447 enum rumprwlock lk = enum_rumprwlock;
448
449 switch (lk) {
450 case RUMPUSER_RW_WRITER:
451 *rv = rw_amwriter(rw);
452 break;
453 case RUMPUSER_RW_READER:
454 *rv = rw_nreaders(rw);
455 break;
456 }
457 }
458
459 /*
460 * condvar
461 */
462
463 struct rumpuser_cv {
464 pthread_cond_t pthcv;
465 int nwaiters;
466 };
467
468 void
469 rumpuser_cv_init(struct rumpuser_cv **cv)
470 {
471
472 NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
473 NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
474 (*cv)->nwaiters = 0;
475 }
476
477 void
478 rumpuser_cv_destroy(struct rumpuser_cv *cv)
479 {
480
481 NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
482 free(cv);
483 }
484
485 static void
486 cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
487 {
488
489 rumpkern_unsched(nlocks, mtx);
490 mtxexit(mtx);
491 }
492
493 static void
494 cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
495 {
496
497 /*
498 * If the cv interlock is a spin mutex, we must first release
499 * the mutex that was reacquired by pthread_cond_wait(),
500 * acquire the CPU context and only then relock the mutex.
501 * This is to preserve resource allocation order so that
502 * we don't deadlock. Non-spinning mutexes don't have this
503 * problem since they don't use a hold-and-wait approach
504 * to acquiring the mutex wrt the rump kernel CPU context.
505 *
506 * The more optimal solution would be to rework rumpkern_sched()
507 * so that it's possible to tell the scheduler
508 * "if you need to block, drop this lock first", but I'm not
509 * going poking there without some numbers on how often this
510 * path is taken for spin mutexes.
511 */
512 if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
513 (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
514 NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
515 rumpkern_sched(nlocks, mtx);
516 rumpuser_mutex_enter_nowrap(mtx);
517 } else {
518 mtxenter(mtx);
519 rumpkern_sched(nlocks, mtx);
520 }
521 }
522
523 void
524 rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
525 {
526 int nlocks;
527
528 cv->nwaiters++;
529 cv_unschedule(mtx, &nlocks);
530 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
531 cv_reschedule(mtx, nlocks);
532 cv->nwaiters--;
533 }
534
535 void
536 rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
537 {
538
539 cv->nwaiters++;
540 mtxexit(mtx);
541 NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
542 mtxenter(mtx);
543 cv->nwaiters--;
544 }
545
546 int
547 rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
548 int64_t sec, int64_t nsec)
549 {
550 struct timespec ts;
551 int rv, nlocks;
552
553 /*
554 * Get clock already here, just in case we will be put to sleep
555 * after releasing the kernel context.
556 *
557 * The condition variables should use CLOCK_MONOTONIC, but since
558 * that's not available everywhere, leave it for another day.
559 */
560 clock_gettime(CLOCK_REALTIME, &ts);
561
562 cv->nwaiters++;
563 cv_unschedule(mtx, &nlocks);
564
565 ts.tv_sec += sec;
566 ts.tv_nsec += nsec;
567 if (ts.tv_nsec >= 1000*1000*1000) {
568 ts.tv_sec++;
569 ts.tv_nsec -= 1000*1000*1000;
570 }
571 rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
572
573 cv_reschedule(mtx, nlocks);
574 cv->nwaiters--;
575
576 ET(rv);
577 }
578
579 void
580 rumpuser_cv_signal(struct rumpuser_cv *cv)
581 {
582
583 NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
584 }
585
586 void
587 rumpuser_cv_broadcast(struct rumpuser_cv *cv)
588 {
589
590 NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
591 }
592
593 void
594 rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
595 {
596
597 *nwaiters = cv->nwaiters;
598 }
599
600 /*
601 * curlwp
602 */
603
604 static pthread_key_t curlwpkey;
605
606 /*
607 * the if0'd curlwp implementation is not used by this hypervisor,
608 * but serves as test code to check that the intended usage works.
609 */
610 #if 0
611 struct rumpuser_lwp {
612 struct lwp *l;
613 LIST_ENTRY(rumpuser_lwp) l_entries;
614 };
615 static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
616 static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
617
618 void
619 rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
620 {
621 struct rumpuser_lwp *rl, *rliter;
622
623 switch (op) {
624 case RUMPUSER_LWP_CREATE:
625 rl = malloc(sizeof(*rl));
626 rl->l = l;
627 pthread_mutex_lock(&lwplock);
628 LIST_FOREACH(rliter, &lwps, l_entries) {
629 if (rliter->l == l) {
630 fprintf(stderr, "LWP_CREATE: %p exists\n", l);
631 abort();
632 }
633 }
634 LIST_INSERT_HEAD(&lwps, rl, l_entries);
635 pthread_mutex_unlock(&lwplock);
636 break;
637 case RUMPUSER_LWP_DESTROY:
638 pthread_mutex_lock(&lwplock);
639 LIST_FOREACH(rl, &lwps, l_entries) {
640 if (rl->l == l)
641 break;
642 }
643 if (!rl) {
644 fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
645 abort();
646 }
647 LIST_REMOVE(rl, l_entries);
648 pthread_mutex_unlock(&lwplock);
649 free(rl);
650 break;
651 case RUMPUSER_LWP_SET:
652 assert(pthread_getspecific(curlwpkey) == NULL && l != NULL);
653
654 pthread_mutex_lock(&lwplock);
655 LIST_FOREACH(rl, &lwps, l_entries) {
656 if (rl->l == l)
657 break;
658 }
659 if (!rl) {
660 fprintf(stderr,
661 "LWP_SET: %p does not exist\n", l);
662 abort();
663 }
664 pthread_mutex_unlock(&lwplock);
665
666 pthread_setspecific(curlwpkey, rl);
667 break;
668 case RUMPUSER_LWP_CLEAR:
669 assert(((struct rumpuser_lwp *)
670 pthread_getspecific(curlwpkey))->l == l);
671 pthread_setspecific(curlwpkey, NULL);
672 break;
673 }
674 }
675
676 struct lwp *
677 rumpuser_curlwp(void)
678 {
679 struct rumpuser_lwp *rl;
680
681 rl = pthread_getspecific(curlwpkey);
682 return rl ? rl->l : NULL;
683 }
684
685 #else
686
687 void
688 rumpuser_curlwpop(int enum_rumplwpop, struct lwp *l)
689 {
690 enum rumplwpop op = enum_rumplwpop;
691
692 switch (op) {
693 case RUMPUSER_LWP_CREATE:
694 break;
695 case RUMPUSER_LWP_DESTROY:
696 break;
697 case RUMPUSER_LWP_SET:
698 assert(pthread_getspecific(curlwpkey) == NULL);
699 pthread_setspecific(curlwpkey, l);
700 break;
701 case RUMPUSER_LWP_CLEAR:
702 assert(pthread_getspecific(curlwpkey) == l);
703 pthread_setspecific(curlwpkey, NULL);
704 break;
705 }
706 }
707
708 struct lwp *
709 rumpuser_curlwp(void)
710 {
711
712 return pthread_getspecific(curlwpkey);
713 }
714 #endif
715
716
717 void
718 rumpuser__thrinit(void)
719 {
720 pthread_key_create(&curlwpkey, NULL);
721 }
722