vm.c revision 1.41 1 /* $NetBSD: vm.c,v 1.41 2008/10/15 13:04:26 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/null.h>
49 #include <sys/vnode.h>
50 #include <sys/buf.h>
51 #include <sys/kmem.h>
52
53 #include <machine/pmap.h>
54
55 #include <rump/rumpuser.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_prot.h>
59 #include <uvm/uvm_readahead.h>
60
61 #include "rump_private.h"
62
63 /* dumdidumdum */
64 #define len2npages(off, len) \
65 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
66 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
67
68 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
69 int *, int, vm_prot_t, int, int);
70 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
71 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
72 int *, int, vm_prot_t, int, int);
73 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
74
75 const struct uvm_pagerops uvm_vnodeops = {
76 .pgo_get = vn_get,
77 .pgo_put = vn_put,
78 };
79 const struct uvm_pagerops aobj_pager = {
80 .pgo_get = ao_get,
81 .pgo_put = ao_put,
82 };
83
84 kmutex_t uvm_pageqlock;
85
86 struct uvmexp uvmexp;
87 struct uvm uvm;
88
89 struct vmspace rump_vmspace;
90 struct vm_map rump_vmmap;
91 const struct rb_tree_ops uvm_page_tree_ops;
92
93 static struct vm_map_kernel kernel_map_store;
94 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
95
96 /*
97 * vm pages
98 */
99
100 /* called with the object locked */
101 struct vm_page *
102 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
103 {
104 struct vm_page *pg;
105
106 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
107 pg->offset = off;
108 pg->uobject = uobj;
109
110 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
111 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
112
113 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
114
115 return pg;
116 }
117
118 /*
119 * Release a page.
120 *
121 * Called with the vm object locked.
122 */
123 void
124 uvm_pagefree(struct vm_page *pg)
125 {
126 struct uvm_object *uobj = pg->uobject;
127
128 if (pg->flags & PG_WANTED)
129 wakeup(pg);
130
131 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
132 kmem_free((void *)pg->uanon, PAGE_SIZE);
133 kmem_free(pg, sizeof(*pg));
134 }
135
136 struct rumpva {
137 vaddr_t addr;
138 struct vm_page *pg;
139
140 LIST_ENTRY(rumpva) entries;
141 };
142 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
143 static kmutex_t rvamtx;
144
145 void
146 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
147 {
148 struct rumpva *rva;
149
150 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
151 rva->addr = addr;
152 rva->pg = pg;
153 mutex_enter(&rvamtx);
154 LIST_INSERT_HEAD(&rvahead, rva, entries);
155 mutex_exit(&rvamtx);
156 }
157
158 void
159 rumpvm_flushva()
160 {
161 struct rumpva *rva;
162
163 mutex_enter(&rvamtx);
164 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
165 LIST_REMOVE(rva, entries);
166 kmem_free(rva, sizeof(*rva));
167 }
168 mutex_exit(&rvamtx);
169 }
170
171 /*
172 * vnode pager
173 */
174
175 static int
176 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
177 int *npages, int centeridx, vm_prot_t access_type,
178 int advice, int flags)
179 {
180 struct vnode *vp = (struct vnode *)uobj;
181
182 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
183 advice, flags);
184 }
185
186 static int
187 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
188 {
189 struct vnode *vp = (struct vnode *)uobj;
190
191 return VOP_PUTPAGES(vp, offlo, offhi, flags);
192 }
193
194 /*
195 * Anon object stuff
196 */
197
198 static int
199 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
200 int *npages, int centeridx, vm_prot_t access_type,
201 int advice, int flags)
202 {
203 struct vm_page *pg;
204 int i;
205
206 if (centeridx)
207 panic("%s: centeridx != 0 not supported", __func__);
208
209 /* loop over pages */
210 off = trunc_page(off);
211 for (i = 0; i < *npages; i++) {
212 retrylookup:
213 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
214 if (pg) {
215 if (pg->flags & PG_BUSY) {
216 pg->flags |= PG_WANTED;
217 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
218 "aogetpg", 0);
219 goto retrylookup;
220 }
221 pg->flags |= PG_BUSY;
222 pgs[i] = pg;
223 } else {
224 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
225 pgs[i] = pg;
226 }
227 }
228 mutex_exit(&uobj->vmobjlock);
229
230 return 0;
231
232 }
233
234 static int
235 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
236 {
237 struct vm_page *pg;
238
239 /* we only free all pages for now */
240 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
241 mutex_exit(&uobj->vmobjlock);
242 return 0;
243 }
244
245 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
246 uvm_pagefree(pg);
247 mutex_exit(&uobj->vmobjlock);
248
249 return 0;
250 }
251
252 struct uvm_object *
253 uao_create(vsize_t size, int flags)
254 {
255 struct uvm_object *uobj;
256
257 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
258 uobj->pgops = &aobj_pager;
259 TAILQ_INIT(&uobj->memq);
260 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
261
262 return uobj;
263 }
264
265 void
266 uao_detach(struct uvm_object *uobj)
267 {
268
269 mutex_enter(&uobj->vmobjlock);
270 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
271 kmem_free(uobj, sizeof(*uobj));
272 }
273
274 /*
275 * UBC
276 */
277
278 struct ubc_window {
279 struct uvm_object *uwin_obj;
280 voff_t uwin_off;
281 uint8_t *uwin_mem;
282 size_t uwin_mapsize;
283
284 LIST_ENTRY(ubc_window) uwin_entries;
285 };
286
287 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
288 static kmutex_t uwinmtx;
289
290 int
291 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
292 struct ubc_window *uwinp)
293 {
294 struct vm_page **pgs;
295 int npages = len2npages(uio->uio_offset, n);
296 size_t allocsize;
297 int i, rv;
298
299 if (uwinp == NULL) {
300 mutex_enter(&uwinmtx);
301 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
302 if ((uint8_t *)va >= uwinp->uwin_mem
303 && (uint8_t *)va
304 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
305 break;
306 mutex_exit(&uwinmtx);
307 if (uwinp == NULL) {
308 KASSERT(rvp != NULL);
309 return 0;
310 }
311 }
312
313 allocsize = npages * sizeof(pgs);
314 pgs = kmem_zalloc(allocsize, KM_SLEEP);
315 mutex_enter(&uwinp->uwin_obj->vmobjlock);
316 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
317 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
318 pgs, &npages, 0, 0, 0, 0);
319 if (rv)
320 goto out;
321
322 for (i = 0; i < npages; i++) {
323 size_t xfersize;
324 off_t pageoff;
325
326 pageoff = uio->uio_offset & PAGE_MASK;
327 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
328 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
329 if (uio->uio_rw == UIO_WRITE)
330 pgs[i]->flags &= ~PG_CLEAN;
331 n -= xfersize;
332 }
333 uvm_page_unbusy(pgs, npages);
334
335 out:
336 kmem_free(pgs, allocsize);
337 if (rvp)
338 *rvp = rv;
339 return 1;
340 }
341
342 static struct ubc_window *
343 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
344 {
345 struct ubc_window *uwinp; /* pronounced: you wimp! */
346
347 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
348 uwinp->uwin_obj = uobj;
349 uwinp->uwin_off = off;
350 uwinp->uwin_mapsize = len;
351 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
352
353 return uwinp;
354 }
355
356 static void
357 uwin_free(struct ubc_window *uwinp)
358 {
359
360 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
361 kmem_free(uwinp, sizeof(struct ubc_window));
362 }
363
364 void *
365 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
366 int flags)
367 {
368 struct ubc_window *uwinp;
369
370 uwinp = uwin_alloc(uobj, offset, *lenp);
371 mutex_enter(&uwinmtx);
372 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
373 mutex_exit(&uwinmtx);
374
375 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
376 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
377
378 return uwinp->uwin_mem;
379 }
380
381 void
382 ubc_release(void *va, int flags)
383 {
384 struct ubc_window *uwinp;
385
386 mutex_enter(&uwinmtx);
387 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
388 if ((uint8_t *)va >= uwinp->uwin_mem
389 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
390 break;
391 mutex_exit(&uwinmtx);
392 if (uwinp == NULL)
393 panic("%s: releasing invalid window at %p", __func__, va);
394
395 LIST_REMOVE(uwinp, uwin_entries);
396 uwin_free(uwinp);
397 }
398
399 int
400 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
401 int advice, int flags)
402 {
403 struct ubc_window *uwinp;
404 vsize_t len;
405
406 while (todo > 0) {
407 len = todo;
408
409 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
410 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
411 uwin_free(uwinp);
412
413 todo -= len;
414 }
415 return 0;
416 }
417
418
419 /*
420 * Misc routines
421 */
422
423 void
424 rumpvm_init()
425 {
426
427 uvmexp.free = 1024*1024; /* XXX */
428 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
429 rump_vmspace.vm_map.pmap = pmap_kernel();
430
431 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
432 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
433 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
434
435 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
436 }
437
438 void
439 uvm_pageactivate(struct vm_page *pg)
440 {
441
442 /* nada */
443 }
444
445 void
446 uvm_pagewire(struct vm_page *pg)
447 {
448
449 /* nada */
450 }
451
452 void
453 uvm_pageunwire(struct vm_page *pg)
454 {
455
456 /* nada */
457 }
458
459 vaddr_t
460 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
461 {
462
463 panic("%s: unimplemented", __func__);
464 }
465
466 /* Called with the vm object locked */
467 struct vm_page *
468 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
469 {
470 struct vm_page *pg;
471
472 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
473 if (pg->offset == off) {
474 return pg;
475 }
476 }
477
478 return NULL;
479 }
480
481 struct vm_page *
482 uvm_pageratop(vaddr_t va)
483 {
484 struct rumpva *rva;
485
486 mutex_enter(&rvamtx);
487 LIST_FOREACH(rva, &rvahead, entries)
488 if (rva->addr == va)
489 break;
490 mutex_exit(&rvamtx);
491
492 if (rva == NULL)
493 panic("%s: va %llu", __func__, (unsigned long long)va);
494
495 return rva->pg;
496 }
497
498 void
499 uvm_page_unbusy(struct vm_page **pgs, int npgs)
500 {
501 struct vm_page *pg;
502 int i;
503
504 for (i = 0; i < npgs; i++) {
505 pg = pgs[i];
506 if (pg == NULL)
507 continue;
508
509 KASSERT(pg->flags & PG_BUSY);
510 if (pg->flags & PG_WANTED)
511 wakeup(pg);
512 if (pg->flags & PG_RELEASED)
513 uvm_pagefree(pg);
514 else
515 pg->flags &= ~(PG_WANTED|PG_BUSY);
516 }
517 }
518
519 void
520 uvm_estimatepageable(int *active, int *inactive)
521 {
522
523 /* XXX: guessing game */
524 *active = 1024;
525 *inactive = 1024;
526 }
527
528 void
529 uvm_aio_biodone1(struct buf *bp)
530 {
531
532 panic("%s: unimplemented", __func__);
533 }
534
535 void
536 uvm_aio_biodone(struct buf *bp)
537 {
538
539 uvm_aio_aiodone(bp);
540 }
541
542 void
543 uvm_aio_aiodone(struct buf *bp)
544 {
545
546 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
547 bioopsp->io_pageiodone(bp);
548 }
549
550 void
551 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
552 {
553
554 mutex_enter(&vp->v_interlock);
555 vp->v_size = vp->v_writesize = newsize;
556 mutex_exit(&vp->v_interlock);
557 }
558
559 void
560 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
561 {
562
563 mutex_enter(&vp->v_interlock);
564 vp->v_writesize = newsize;
565 mutex_exit(&vp->v_interlock);
566 }
567
568 void
569 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
570 {
571 struct uvm_object *uobj = &vp->v_uobj;
572 struct vm_page **pgs;
573 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
574 int rv, npages, i;
575
576 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
577 while (len) {
578 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
579 memset(pgs, 0, npages * sizeof(struct vm_page *));
580 mutex_enter(&uobj->vmobjlock);
581 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
582 KASSERT(npages > 0);
583
584 for (i = 0; i < npages; i++) {
585 uint8_t *start;
586 size_t chunkoff, chunklen;
587
588 chunkoff = off & PAGE_MASK;
589 chunklen = MIN(PAGE_SIZE - chunkoff, len);
590 start = (uint8_t *)pgs[i]->uanon + chunkoff;
591
592 memset(start, 0, chunklen);
593 pgs[i]->flags &= ~PG_CLEAN;
594
595 off += chunklen;
596 len -= chunklen;
597 }
598 uvm_page_unbusy(pgs, npages);
599 }
600 kmem_free(pgs, maxpages * sizeof(pgs));
601
602 return;
603 }
604
605 struct uvm_ractx *
606 uvm_ra_allocctx()
607 {
608
609 return NULL;
610 }
611
612 void
613 uvm_ra_freectx(struct uvm_ractx *ra)
614 {
615
616 return;
617 }
618
619 bool
620 uvn_clean_p(struct uvm_object *uobj)
621 {
622 struct vnode *vp = (void *)uobj;
623
624 return (vp->v_iflag & VI_ONWORKLST) == 0;
625 }
626
627 struct vm_map_kernel *
628 vm_map_to_kernel(struct vm_map *map)
629 {
630
631 return (struct vm_map_kernel *)map;
632 }
633
634 bool
635 vm_map_starved_p(struct vm_map *map)
636 {
637
638 return false;
639 }
640
641 void
642 uvm_pageout_start(int npages)
643 {
644
645 uvmexp.paging += npages;
646 }
647
648 void
649 uvm_pageout_done(int npages)
650 {
651
652 uvmexp.paging -= npages;
653
654 /*
655 * wake up either of pagedaemon or LWPs waiting for it.
656 */
657
658 if (uvmexp.free <= uvmexp.reserve_kernel) {
659 wakeup(&uvm.pagedaemon);
660 } else {
661 wakeup(&uvmexp.free);
662 }
663 }
664
665 /* XXX: following two are unfinished because lwp's are not refcounted yet */
666 void
667 uvm_lwp_hold(struct lwp *l)
668 {
669
670 atomic_inc_uint(&l->l_holdcnt);
671 }
672
673 void
674 uvm_lwp_rele(struct lwp *l)
675 {
676
677 atomic_dec_uint(&l->l_holdcnt);
678 }
679
680 int
681 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
682 {
683
684 panic("%s: unimplemented", __func__);
685 }
686
687 void
688 uvm_unloan(void *v, int npages, int flags)
689 {
690
691 panic("%s: unimplemented", __func__);
692 }
693
694 /*
695 * Kmem
696 */
697
698 #ifndef RUMP_USE_REAL_KMEM
699 void *
700 kmem_alloc(size_t size, km_flag_t kmflag)
701 {
702
703 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
704 }
705
706 void *
707 kmem_zalloc(size_t size, km_flag_t kmflag)
708 {
709 void *rv;
710
711 rv = kmem_alloc(size, kmflag);
712 if (rv)
713 memset(rv, 0, size);
714
715 return rv;
716 }
717
718 void
719 kmem_free(void *p, size_t size)
720 {
721
722 rumpuser_free(p);
723 }
724 #endif /* RUMP_USE_REAL_KMEM */
725
726 /*
727 * UVM km
728 */
729
730 vaddr_t
731 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
732 {
733 void *rv;
734
735 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
736 if (rv && flags & UVM_KMF_ZERO)
737 memset(rv, 0, size);
738
739 return (vaddr_t)rv;
740 }
741
742 void
743 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
744 {
745
746 rumpuser_free((void *)vaddr);
747 }
748
749 struct vm_map *
750 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
751 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
752 {
753
754 return (struct vm_map *)417416;
755 }
756
757 vaddr_t
758 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
759 {
760
761 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
762 }
763
764 void
765 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
766 {
767
768 rumpuser_free((void *)addr);
769 }
770