vm.c revision 1.39 1 /* $NetBSD: vm.c,v 1.39 2008/10/10 20:45:21 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/null.h>
48 #include <sys/vnode.h>
49 #include <sys/buf.h>
50 #include <sys/kmem.h>
51
52 #include <machine/pmap.h>
53
54 #include <rump/rumpuser.h>
55
56 #include <uvm/uvm.h>
57 #include <uvm/uvm_prot.h>
58 #include <uvm/uvm_readahead.h>
59
60 #include "rump_private.h"
61
62 /* dumdidumdum */
63 #define len2npages(off, len) \
64 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
65 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
66
67 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
68 int *, int, vm_prot_t, int, int);
69 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
70 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
71 int *, int, vm_prot_t, int, int);
72 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
73
74 const struct uvm_pagerops uvm_vnodeops = {
75 .pgo_get = vn_get,
76 .pgo_put = vn_put,
77 };
78 const struct uvm_pagerops aobj_pager = {
79 .pgo_get = ao_get,
80 .pgo_put = ao_put,
81 };
82
83 kmutex_t uvm_pageqlock;
84
85 struct uvmexp uvmexp;
86 struct uvm uvm;
87
88 struct vmspace rump_vmspace;
89 struct vm_map rump_vmmap;
90 const struct rb_tree_ops uvm_page_tree_ops;
91
92 static struct vm_map_kernel kernel_map_store;
93 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
94
95 /*
96 * vm pages
97 */
98
99 /* called with the object locked */
100 struct vm_page *
101 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
102 {
103 struct vm_page *pg;
104
105 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
106 pg->offset = off;
107 pg->uobject = uobj;
108
109 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
110 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
111
112 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
113
114 return pg;
115 }
116
117 /*
118 * Release a page.
119 *
120 * Called with the vm object locked.
121 */
122 void
123 uvm_pagefree(struct vm_page *pg)
124 {
125 struct uvm_object *uobj = pg->uobject;
126
127 if (pg->flags & PG_WANTED)
128 wakeup(pg);
129
130 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
131 kmem_free((void *)pg->uanon, PAGE_SIZE);
132 kmem_free(pg, sizeof(*pg));
133 }
134
135 struct rumpva {
136 vaddr_t addr;
137 struct vm_page *pg;
138
139 LIST_ENTRY(rumpva) entries;
140 };
141 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
142 static kmutex_t rvamtx;
143
144 void
145 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
146 {
147 struct rumpva *rva;
148
149 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
150 rva->addr = addr;
151 rva->pg = pg;
152 mutex_enter(&rvamtx);
153 LIST_INSERT_HEAD(&rvahead, rva, entries);
154 mutex_exit(&rvamtx);
155 }
156
157 void
158 rumpvm_flushva()
159 {
160 struct rumpva *rva;
161
162 mutex_enter(&rvamtx);
163 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
164 LIST_REMOVE(rva, entries);
165 kmem_free(rva, sizeof(*rva));
166 }
167 mutex_exit(&rvamtx);
168 }
169
170 /*
171 * vnode pager
172 */
173
174 static int
175 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
176 int *npages, int centeridx, vm_prot_t access_type,
177 int advice, int flags)
178 {
179 struct vnode *vp = (struct vnode *)uobj;
180
181 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
182 advice, flags);
183 }
184
185 static int
186 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
187 {
188 struct vnode *vp = (struct vnode *)uobj;
189
190 return VOP_PUTPAGES(vp, offlo, offhi, flags);
191 }
192
193 /*
194 * Anon object stuff
195 */
196
197 static int
198 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
199 int *npages, int centeridx, vm_prot_t access_type,
200 int advice, int flags)
201 {
202 struct vm_page *pg;
203 int i;
204
205 if (centeridx)
206 panic("%s: centeridx != 0 not supported", __func__);
207
208 /* loop over pages */
209 off = trunc_page(off);
210 for (i = 0; i < *npages; i++) {
211 retrylookup:
212 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
213 if (pg) {
214 if (pg->flags & PG_BUSY) {
215 pg->flags |= PG_WANTED;
216 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
217 "aogetpg", 0);
218 goto retrylookup;
219 }
220 pg->flags |= PG_BUSY;
221 pgs[i] = pg;
222 } else {
223 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
224 pgs[i] = pg;
225 }
226 }
227 mutex_exit(&uobj->vmobjlock);
228
229 return 0;
230
231 }
232
233 static int
234 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
235 {
236 struct vm_page *pg;
237
238 /* we only free all pages for now */
239 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
240 mutex_exit(&uobj->vmobjlock);
241 return 0;
242 }
243
244 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
245 uvm_pagefree(pg);
246 mutex_exit(&uobj->vmobjlock);
247
248 return 0;
249 }
250
251 struct uvm_object *
252 uao_create(vsize_t size, int flags)
253 {
254 struct uvm_object *uobj;
255
256 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
257 uobj->pgops = &aobj_pager;
258 TAILQ_INIT(&uobj->memq);
259 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
260
261 return uobj;
262 }
263
264 void
265 uao_detach(struct uvm_object *uobj)
266 {
267
268 mutex_enter(&uobj->vmobjlock);
269 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
270 kmem_free(uobj, sizeof(*uobj));
271 }
272
273 /*
274 * UBC
275 */
276
277 struct ubc_window {
278 struct uvm_object *uwin_obj;
279 voff_t uwin_off;
280 uint8_t *uwin_mem;
281 size_t uwin_mapsize;
282
283 LIST_ENTRY(ubc_window) uwin_entries;
284 };
285
286 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
287 static kmutex_t uwinmtx;
288
289 int
290 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
291 struct ubc_window *uwinp)
292 {
293 struct vm_page **pgs;
294 int npages = len2npages(uio->uio_offset, n);
295 size_t allocsize;
296 int i, rv;
297
298 if (uwinp == NULL) {
299 mutex_enter(&uwinmtx);
300 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
301 if ((uint8_t *)va >= uwinp->uwin_mem
302 && (uint8_t *)va
303 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
304 break;
305 mutex_exit(&uwinmtx);
306 if (uwinp == NULL) {
307 KASSERT(rvp != NULL);
308 return 0;
309 }
310 }
311
312 allocsize = npages * sizeof(pgs);
313 pgs = kmem_zalloc(allocsize, KM_SLEEP);
314 mutex_enter(&uwinp->uwin_obj->vmobjlock);
315 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
316 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
317 pgs, &npages, 0, 0, 0, 0);
318 if (rv)
319 goto out;
320
321 for (i = 0; i < npages; i++) {
322 size_t xfersize;
323 off_t pageoff;
324
325 pageoff = uio->uio_offset & PAGE_MASK;
326 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
327 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
328 if (uio->uio_rw == UIO_WRITE)
329 pgs[i]->flags &= ~PG_CLEAN;
330 n -= xfersize;
331 }
332 uvm_page_unbusy(pgs, npages);
333
334 out:
335 kmem_free(pgs, allocsize);
336 if (rvp)
337 *rvp = rv;
338 return 1;
339 }
340
341 static struct ubc_window *
342 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
343 {
344 struct ubc_window *uwinp; /* pronounced: you wimp! */
345
346 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
347 uwinp->uwin_obj = uobj;
348 uwinp->uwin_off = off;
349 uwinp->uwin_mapsize = len;
350 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
351
352 return uwinp;
353 }
354
355 static void
356 uwin_free(struct ubc_window *uwinp)
357 {
358
359 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
360 kmem_free(uwinp, sizeof(struct ubc_window));
361 }
362
363 void *
364 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
365 int flags)
366 {
367 struct ubc_window *uwinp;
368
369 uwinp = uwin_alloc(uobj, offset, *lenp);
370 mutex_enter(&uwinmtx);
371 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
372 mutex_exit(&uwinmtx);
373
374 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
375 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
376
377 return uwinp->uwin_mem;
378 }
379
380 void
381 ubc_release(void *va, int flags)
382 {
383 struct ubc_window *uwinp;
384
385 mutex_enter(&uwinmtx);
386 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
387 if ((uint8_t *)va >= uwinp->uwin_mem
388 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
389 break;
390 mutex_exit(&uwinmtx);
391 if (uwinp == NULL)
392 panic("%s: releasing invalid window at %p", __func__, va);
393
394 LIST_REMOVE(uwinp, uwin_entries);
395 uwin_free(uwinp);
396 }
397
398 int
399 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
400 int advice, int flags)
401 {
402 struct ubc_window *uwinp;
403 vsize_t len;
404
405 while (todo > 0) {
406 len = todo;
407
408 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
409 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
410 uwin_free(uwinp);
411
412 todo -= len;
413 }
414 return 0;
415 }
416
417
418 /*
419 * Misc routines
420 */
421
422 void
423 rumpvm_init()
424 {
425
426 uvmexp.free = 1024*1024; /* XXX */
427 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
428 rump_vmspace.vm_map.pmap = pmap_kernel();
429
430 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
431 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
432 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
433
434 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
435 }
436
437 void
438 uvm_pageactivate(struct vm_page *pg)
439 {
440
441 /* nada */
442 }
443
444 void
445 uvm_pagewire(struct vm_page *pg)
446 {
447
448 /* nada */
449 }
450
451 void
452 uvm_pageunwire(struct vm_page *pg)
453 {
454
455 /* nada */
456 }
457
458 vaddr_t
459 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
460 {
461
462 panic("%s: unimplemented", __func__);
463 }
464
465 /* Called with the vm object locked */
466 struct vm_page *
467 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
468 {
469 struct vm_page *pg;
470
471 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
472 if (pg->offset == off) {
473 return pg;
474 }
475 }
476
477 return NULL;
478 }
479
480 struct vm_page *
481 uvm_pageratop(vaddr_t va)
482 {
483 struct rumpva *rva;
484
485 mutex_enter(&rvamtx);
486 LIST_FOREACH(rva, &rvahead, entries)
487 if (rva->addr == va)
488 break;
489 mutex_exit(&rvamtx);
490
491 if (rva == NULL)
492 panic("%s: va %llu", __func__, (unsigned long long)va);
493
494 return rva->pg;
495 }
496
497 void
498 uvm_page_unbusy(struct vm_page **pgs, int npgs)
499 {
500 struct vm_page *pg;
501 int i;
502
503 for (i = 0; i < npgs; i++) {
504 pg = pgs[i];
505 if (pg == NULL)
506 continue;
507
508 KASSERT(pg->flags & PG_BUSY);
509 if (pg->flags & PG_WANTED)
510 wakeup(pg);
511 if (pg->flags & PG_RELEASED)
512 uvm_pagefree(pg);
513 else
514 pg->flags &= ~(PG_WANTED|PG_BUSY);
515 }
516 }
517
518 void
519 uvm_estimatepageable(int *active, int *inactive)
520 {
521
522 /* XXX: guessing game */
523 *active = 1024;
524 *inactive = 1024;
525 }
526
527 void
528 uvm_aio_biodone1(struct buf *bp)
529 {
530
531 panic("%s: unimplemented", __func__);
532 }
533
534 void
535 uvm_aio_biodone(struct buf *bp)
536 {
537
538 uvm_aio_aiodone(bp);
539 }
540
541 void
542 uvm_aio_aiodone(struct buf *bp)
543 {
544
545 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
546 bioopsp->io_pageiodone(bp);
547 }
548
549 void
550 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
551 {
552
553 mutex_enter(&vp->v_interlock);
554 vp->v_size = vp->v_writesize = newsize;
555 mutex_exit(&vp->v_interlock);
556 }
557
558 void
559 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
560 {
561
562 mutex_enter(&vp->v_interlock);
563 vp->v_writesize = newsize;
564 mutex_exit(&vp->v_interlock);
565 }
566
567 void
568 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
569 {
570 struct uvm_object *uobj = &vp->v_uobj;
571 struct vm_page **pgs;
572 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
573 int rv, npages, i;
574
575 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
576 while (len) {
577 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
578 memset(pgs, 0, npages * sizeof(struct vm_page *));
579 mutex_enter(&uobj->vmobjlock);
580 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
581 KASSERT(npages > 0);
582
583 for (i = 0; i < npages; i++) {
584 uint8_t *start;
585 size_t chunkoff, chunklen;
586
587 chunkoff = off & PAGE_MASK;
588 chunklen = MIN(PAGE_SIZE - chunkoff, len);
589 start = (uint8_t *)pgs[i]->uanon + chunkoff;
590
591 memset(start, 0, chunklen);
592 pgs[i]->flags &= ~PG_CLEAN;
593
594 off += chunklen;
595 len -= chunklen;
596 }
597 uvm_page_unbusy(pgs, npages);
598 }
599 kmem_free(pgs, maxpages * sizeof(pgs));
600
601 return;
602 }
603
604 struct uvm_ractx *
605 uvm_ra_allocctx()
606 {
607
608 return NULL;
609 }
610
611 void
612 uvm_ra_freectx(struct uvm_ractx *ra)
613 {
614
615 return;
616 }
617
618 bool
619 uvn_clean_p(struct uvm_object *uobj)
620 {
621 struct vnode *vp = (void *)uobj;
622
623 return (vp->v_iflag & VI_ONWORKLST) == 0;
624 }
625
626 struct vm_map_kernel *
627 vm_map_to_kernel(struct vm_map *map)
628 {
629
630 return (struct vm_map_kernel *)map;
631 }
632
633 void
634 uvm_pageout_start(int npages)
635 {
636
637 uvmexp.paging += npages;
638 }
639
640 void
641 uvm_pageout_done(int npages)
642 {
643
644 uvmexp.paging -= npages;
645
646 /*
647 * wake up either of pagedaemon or LWPs waiting for it.
648 */
649
650 if (uvmexp.free <= uvmexp.reserve_kernel) {
651 wakeup(&uvm.pagedaemon);
652 } else {
653 wakeup(&uvmexp.free);
654 }
655 }
656
657 /*
658 * Kmem
659 */
660
661 #ifndef RUMP_USE_REAL_KMEM
662 void *
663 kmem_alloc(size_t size, km_flag_t kmflag)
664 {
665
666 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
667 }
668
669 void *
670 kmem_zalloc(size_t size, km_flag_t kmflag)
671 {
672 void *rv;
673
674 rv = kmem_alloc(size, kmflag);
675 if (rv)
676 memset(rv, 0, size);
677
678 return rv;
679 }
680
681 void
682 kmem_free(void *p, size_t size)
683 {
684
685 rumpuser_free(p);
686 }
687 #endif /* RUMP_USE_REAL_KMEM */
688
689 /*
690 * UVM km
691 */
692
693 vaddr_t
694 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
695 {
696 void *rv;
697
698 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
699 if (rv && flags & UVM_KMF_ZERO)
700 memset(rv, 0, size);
701
702 return (vaddr_t)rv;
703 }
704
705 void
706 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
707 {
708
709 rumpuser_free((void *)vaddr);
710 }
711
712 struct vm_map *
713 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
714 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
715 {
716
717 return (struct vm_map *)417416;
718 }
719