vm.c revision 1.41.8.3 1 /* vm.c,v 1.41 2008/10/15 13:04:26 pooka Exp */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/null.h>
49 #include <sys/vnode.h>
50 #include <sys/buf.h>
51 #include <sys/kmem.h>
52
53 #include <machine/pmap.h>
54
55 #include <rump/rumpuser.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_prot.h>
59 #include <uvm/uvm_readahead.h>
60
61 #include "rump_private.h"
62
63 /* dumdidumdum */
64 #define len2npages(off, len) \
65 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
66 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
67
68 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
69 int *, int, vm_prot_t, int, int);
70 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
71 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
72 int *, int, vm_prot_t, int, int);
73 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
74
75 const struct uvm_pagerops uvm_vnodeops = {
76 .pgo_get = vn_get,
77 .pgo_put = vn_put,
78 };
79 const struct uvm_pagerops aobj_pager = {
80 .pgo_get = ao_get,
81 .pgo_put = ao_put,
82 };
83
84 struct uvm_pggroup uvm_pggroup_store;
85
86 kmutex_t uvm_pageqlock;
87
88 struct uvmexp uvmexp;
89 struct uvm uvm;
90
91 struct vmspace rump_vmspace;
92 struct vm_map rump_vmmap;
93 const struct rb_tree_ops uvm_page_tree_ops;
94
95 static struct vm_map_kernel kernel_map_store;
96 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
97
98 /*
99 * vm pages
100 */
101
102 /* called with the object locked */
103 struct vm_page *
104 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
105 {
106 struct vm_page *pg;
107
108 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
109 pg->offset = off;
110 pg->uobject = uobj;
111
112 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
113 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
114
115 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
116
117 return pg;
118 }
119
120 /*
121 * Release a page.
122 *
123 * Called with the vm object locked.
124 */
125 void
126 uvm_pagefree(struct vm_page *pg)
127 {
128 struct uvm_object *uobj = pg->uobject;
129
130 if (pg->flags & PG_WANTED)
131 wakeup(pg);
132
133 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
134 kmem_free((void *)pg->uanon, PAGE_SIZE);
135 kmem_free(pg, sizeof(*pg));
136 }
137
138 struct rumpva {
139 vaddr_t addr;
140 struct vm_page *pg;
141
142 LIST_ENTRY(rumpva) entries;
143 };
144 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
145 static kmutex_t rvamtx;
146
147 void
148 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
149 {
150 struct rumpva *rva;
151
152 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
153 rva->addr = addr;
154 rva->pg = pg;
155 mutex_enter(&rvamtx);
156 LIST_INSERT_HEAD(&rvahead, rva, entries);
157 mutex_exit(&rvamtx);
158 }
159
160 void
161 rumpvm_flushva()
162 {
163 struct rumpva *rva;
164
165 mutex_enter(&rvamtx);
166 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
167 LIST_REMOVE(rva, entries);
168 kmem_free(rva, sizeof(*rva));
169 }
170 mutex_exit(&rvamtx);
171 }
172
173 /*
174 * vnode pager
175 */
176
177 static int
178 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
179 int *npages, int centeridx, vm_prot_t access_type,
180 int advice, int flags)
181 {
182 struct vnode *vp = (struct vnode *)uobj;
183
184 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
185 advice, flags);
186 }
187
188 static int
189 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
190 {
191 struct vnode *vp = (struct vnode *)uobj;
192
193 return VOP_PUTPAGES(vp, offlo, offhi, flags);
194 }
195
196 /*
197 * Anon object stuff
198 */
199
200 static int
201 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
202 int *npages, int centeridx, vm_prot_t access_type,
203 int advice, int flags)
204 {
205 struct vm_page *pg;
206 int i;
207
208 if (centeridx)
209 panic("%s: centeridx != 0 not supported", __func__);
210
211 /* loop over pages */
212 off = trunc_page(off);
213 for (i = 0; i < *npages; i++) {
214 retrylookup:
215 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
216 if (pg) {
217 if (pg->flags & PG_BUSY) {
218 pg->flags |= PG_WANTED;
219 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
220 "aogetpg", 0);
221 goto retrylookup;
222 }
223 pg->flags |= PG_BUSY;
224 pgs[i] = pg;
225 } else {
226 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
227 pgs[i] = pg;
228 }
229 }
230 mutex_exit(&uobj->vmobjlock);
231
232 return 0;
233
234 }
235
236 static int
237 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
238 {
239 struct vm_page *pg;
240
241 /* we only free all pages for now */
242 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
243 mutex_exit(&uobj->vmobjlock);
244 return 0;
245 }
246
247 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
248 uvm_pagefree(pg);
249 mutex_exit(&uobj->vmobjlock);
250
251 return 0;
252 }
253
254 struct uvm_object *
255 uao_create(vsize_t size, int flags)
256 {
257 struct uvm_object *uobj;
258
259 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
260 uobj->pgops = &aobj_pager;
261 TAILQ_INIT(&uobj->memq);
262 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
263
264 return uobj;
265 }
266
267 void
268 uao_detach(struct uvm_object *uobj)
269 {
270
271 mutex_enter(&uobj->vmobjlock);
272 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
273 kmem_free(uobj, sizeof(*uobj));
274 }
275
276 /*
277 * UBC
278 */
279
280 struct ubc_window {
281 struct uvm_object *uwin_obj;
282 voff_t uwin_off;
283 uint8_t *uwin_mem;
284 size_t uwin_mapsize;
285
286 LIST_ENTRY(ubc_window) uwin_entries;
287 };
288
289 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
290 static kmutex_t uwinmtx;
291
292 int
293 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
294 struct ubc_window *uwinp)
295 {
296 struct vm_page **pgs;
297 int npages = len2npages(uio->uio_offset, n);
298 size_t allocsize;
299 int i, rv;
300
301 if (uwinp == NULL) {
302 mutex_enter(&uwinmtx);
303 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
304 if ((uint8_t *)va >= uwinp->uwin_mem
305 && (uint8_t *)va
306 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
307 break;
308 mutex_exit(&uwinmtx);
309 if (uwinp == NULL) {
310 KASSERT(rvp != NULL);
311 return 0;
312 }
313 }
314
315 allocsize = npages * sizeof(pgs);
316 pgs = kmem_zalloc(allocsize, KM_SLEEP);
317 mutex_enter(&uwinp->uwin_obj->vmobjlock);
318 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
319 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
320 pgs, &npages, 0, 0, 0, 0);
321 if (rv)
322 goto out;
323
324 for (i = 0; i < npages; i++) {
325 size_t xfersize;
326 off_t pageoff;
327
328 pageoff = uio->uio_offset & PAGE_MASK;
329 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
330 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
331 if (uio->uio_rw == UIO_WRITE)
332 pgs[i]->flags &= ~PG_CLEAN;
333 n -= xfersize;
334 }
335 uvm_page_unbusy(pgs, npages);
336
337 out:
338 kmem_free(pgs, allocsize);
339 if (rvp)
340 *rvp = rv;
341 return 1;
342 }
343
344 static struct ubc_window *
345 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
346 {
347 struct ubc_window *uwinp; /* pronounced: you wimp! */
348
349 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
350 uwinp->uwin_obj = uobj;
351 uwinp->uwin_off = off;
352 uwinp->uwin_mapsize = len;
353 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
354
355 return uwinp;
356 }
357
358 static void
359 uwin_free(struct ubc_window *uwinp)
360 {
361
362 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
363 kmem_free(uwinp, sizeof(struct ubc_window));
364 }
365
366 void *
367 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
368 int flags)
369 {
370 struct ubc_window *uwinp;
371
372 uwinp = uwin_alloc(uobj, offset, *lenp);
373 mutex_enter(&uwinmtx);
374 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
375 mutex_exit(&uwinmtx);
376
377 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
378 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
379
380 return uwinp->uwin_mem;
381 }
382
383 void
384 ubc_release(void *va, int flags)
385 {
386 struct ubc_window *uwinp;
387
388 mutex_enter(&uwinmtx);
389 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
390 if ((uint8_t *)va >= uwinp->uwin_mem
391 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
392 break;
393 mutex_exit(&uwinmtx);
394 if (uwinp == NULL)
395 panic("%s: releasing invalid window at %p", __func__, va);
396
397 LIST_REMOVE(uwinp, uwin_entries);
398 uwin_free(uwinp);
399 }
400
401 int
402 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
403 int advice, int flags)
404 {
405 struct ubc_window *uwinp;
406 vsize_t len;
407
408 while (todo > 0) {
409 len = todo;
410
411 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
412 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
413 uwin_free(uwinp);
414
415 todo -= len;
416 }
417 return 0;
418 }
419
420
421 /*
422 * Misc routines
423 */
424
425 void
426 rumpvm_init()
427 {
428
429 uvmexp.free = 1024*1024; /* XXX */
430 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
431 rump_vmspace.vm_map.pmap = pmap_kernel();
432
433 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
434 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
435 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
436
437 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
438 }
439
440 void
441 uvm_pageactivate(struct vm_page *pg)
442 {
443
444 /* nada */
445 }
446
447 void
448 uvm_pagewire(struct vm_page *pg)
449 {
450
451 /* nada */
452 }
453
454 void
455 uvm_pageunwire(struct vm_page *pg)
456 {
457
458 /* nada */
459 }
460
461 vaddr_t
462 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
463 {
464
465 panic("%s: unimplemented", __func__);
466 }
467
468 /* Called with the vm object locked */
469 struct vm_page *
470 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
471 {
472 struct vm_page *pg;
473
474 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
475 if (pg->offset == off) {
476 return pg;
477 }
478 }
479
480 return NULL;
481 }
482
483 struct vm_page *
484 uvm_pageratop(vaddr_t va)
485 {
486 struct rumpva *rva;
487
488 mutex_enter(&rvamtx);
489 LIST_FOREACH(rva, &rvahead, entries)
490 if (rva->addr == va)
491 break;
492 mutex_exit(&rvamtx);
493
494 if (rva == NULL)
495 panic("%s: va %llu", __func__, (unsigned long long)va);
496
497 return rva->pg;
498 }
499
500 void
501 uvm_page_unbusy(struct vm_page **pgs, int npgs)
502 {
503 struct vm_page *pg;
504 int i;
505
506 for (i = 0; i < npgs; i++) {
507 pg = pgs[i];
508 if (pg == NULL)
509 continue;
510
511 KASSERT(pg->flags & PG_BUSY);
512 if (pg->flags & PG_WANTED)
513 wakeup(pg);
514 if (pg->flags & PG_RELEASED)
515 uvm_pagefree(pg);
516 else
517 pg->flags &= ~(PG_WANTED|PG_BUSY);
518 }
519 }
520
521 void
522 uvm_estimatepageable(const struct uvm_pggroup *pg,
523 u_int *active, u_int *inactive)
524 {
525
526 /* XXX: guessing game */
527 *active = 1024;
528 *inactive = 1024;
529 }
530
531 void
532 uvm_aio_biodone1(struct buf *bp)
533 {
534
535 panic("%s: unimplemented", __func__);
536 }
537
538 void
539 uvm_aio_biodone(struct buf *bp)
540 {
541
542 uvm_aio_aiodone(bp);
543 }
544
545 void
546 uvm_aio_aiodone(struct buf *bp)
547 {
548
549 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
550 bioopsp->io_pageiodone(bp);
551 }
552
553 void
554 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
555 {
556
557 mutex_enter(&vp->v_interlock);
558 vp->v_size = vp->v_writesize = newsize;
559 mutex_exit(&vp->v_interlock);
560 }
561
562 void
563 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
564 {
565
566 mutex_enter(&vp->v_interlock);
567 vp->v_writesize = newsize;
568 mutex_exit(&vp->v_interlock);
569 }
570
571 void
572 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
573 {
574 struct uvm_object *uobj = &vp->v_uobj;
575 struct vm_page **pgs;
576 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
577 int rv, npages, i;
578
579 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
580 while (len) {
581 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
582 memset(pgs, 0, npages * sizeof(struct vm_page *));
583 mutex_enter(&uobj->vmobjlock);
584 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
585 KASSERT(npages > 0);
586
587 for (i = 0; i < npages; i++) {
588 uint8_t *start;
589 size_t chunkoff, chunklen;
590
591 chunkoff = off & PAGE_MASK;
592 chunklen = MIN(PAGE_SIZE - chunkoff, len);
593 start = (uint8_t *)pgs[i]->uanon + chunkoff;
594
595 memset(start, 0, chunklen);
596 pgs[i]->flags &= ~PG_CLEAN;
597
598 off += chunklen;
599 len -= chunklen;
600 }
601 uvm_page_unbusy(pgs, npages);
602 }
603 kmem_free(pgs, maxpages * sizeof(pgs));
604
605 return;
606 }
607
608 struct uvm_ractx *
609 uvm_ra_allocctx()
610 {
611
612 return NULL;
613 }
614
615 void
616 uvm_ra_freectx(struct uvm_ractx *ra)
617 {
618
619 return;
620 }
621
622 bool
623 uvn_clean_p(struct uvm_object *uobj)
624 {
625 struct vnode *vp = (void *)uobj;
626
627 return (vp->v_iflag & VI_ONWORKLST) == 0;
628 }
629
630 struct vm_map_kernel *
631 vm_map_to_kernel(struct vm_map *map)
632 {
633
634 return (struct vm_map_kernel *)map;
635 }
636
637 bool
638 vm_map_starved_p(struct vm_map *map)
639 {
640
641 return false;
642 }
643
644 void
645 uvm_pageout_start(struct uvm_pggroup *grp, u_int npages)
646 {
647
648 uvmexp.paging += npages;
649 }
650
651 void
652 uvm_pageout_done(struct vm_page *pg, bool freed)
653 {
654
655 uvmexp.paging -= 1;
656
657 /*
658 * wake up either of pagedaemon or LWPs waiting for it.
659 */
660
661 if (uvmexp.free <= uvmexp.reserve_kernel) {
662 wakeup(&uvm.pagedaemon);
663 } else {
664 wakeup(&uvmexp.free);
665 }
666 }
667
668 /* XXX: following two are unfinished because lwp's are not refcounted yet */
669 void
670 uvm_lwp_hold(struct lwp *l)
671 {
672
673 atomic_inc_uint(&l->l_holdcnt);
674 }
675
676 void
677 uvm_lwp_rele(struct lwp *l)
678 {
679
680 atomic_dec_uint(&l->l_holdcnt);
681 }
682
683 int
684 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
685 {
686
687 panic("%s: unimplemented", __func__);
688 }
689
690 void
691 uvm_unloan(void *v, int npages, int flags)
692 {
693
694 panic("%s: unimplemented", __func__);
695 }
696
697 /*
698 * Kmem
699 */
700
701 #ifndef RUMP_USE_REAL_KMEM
702 void *
703 kmem_alloc(size_t size, km_flag_t kmflag)
704 {
705
706 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
707 }
708
709 void *
710 kmem_zalloc(size_t size, km_flag_t kmflag)
711 {
712 void *rv;
713
714 rv = kmem_alloc(size, kmflag);
715 if (rv)
716 memset(rv, 0, size);
717
718 return rv;
719 }
720
721 void
722 kmem_free(void *p, size_t size)
723 {
724
725 rumpuser_free(p);
726 }
727 #endif /* RUMP_USE_REAL_KMEM */
728
729 /*
730 * UVM km
731 */
732
733 vaddr_t
734 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
735 {
736 void *rv;
737
738 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
739 if (rv && flags & UVM_KMF_ZERO)
740 memset(rv, 0, size);
741
742 return (vaddr_t)rv;
743 }
744
745 void
746 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
747 {
748
749 rumpuser_free((void *)vaddr);
750 }
751
752 struct vm_map *
753 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
754 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
755 {
756
757 return (struct vm_map *)417416;
758 }
759
760 vaddr_t
761 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
762 {
763
764 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
765 }
766
767 void
768 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
769 {
770
771 rumpuser_free((void *)addr);
772 }
773
774 struct uvm_pggroup *
775 uvm_page_to_pggroup(struct vm_page *pg)
776 {
777 return &uvm_pggroup_store;
778 }
779